########################################################################### # # Hierarchy.pm -- # A component of the Greenstone digital library software # from the New Zealand Digital Library Project at the # University of Waikato, New Zealand. # # Copyright (C) 1999 New Zealand Digital Library Project # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. # ########################################################################### # classifier plugin for generating hierarchical classifications # options for this classifier are: # -hfile file.txt - classification file # -metadata Metaname - metadata field to test against file.txt # -sort Meta - this option is optional (genious;-). by default this # classifier will sort documents within each section # alphabetically by Title. sort=nosort prevents sorting # (i.e. documents will end up in build order), sort=Meta # will sort each field alphabetically by Meta (Meta may # also be 'Filename' to sort by the original filename). # -buttonname Title - another optional field. this is what will end up in the # Title field for this classification. if not present it # defaults to Metaname # -hlist_at_top - use a horizontal list for the top level (i.e. display it # like an AZList classification) # 12/05/02 Added usage datastructure - John Thompson # 12/05/02 Modified new() so as not to die on error, only on init() - John Thompson package Hierarchy; use BasClas; use util; use cfgread; use sorttools; sub BEGIN { @ISA = ('BasClas'); } my $arguments = [ { 'name' => "metadata", 'desc' => "{Hierarchy.metadata}", 'type' => "metadata", 'reqd' => "yes" }, { 'name' => "hfile", 'desc' => "{Hierarchy.hfile}", 'type' => "string", 'deft' => "", 'reqd' => "yes" }, { 'name' => "buttonname", 'desc' => "{Hierarchy.buttonname}", 'type' => "string", 'deft' => "{BasClas.metadata.deft}", 'reqd' => "no" }, { 'name' => "sort", 'desc' => "{Hierarchy.sort}", 'type' => "string", 'deft' => "{BasClas.metadata.deft}", 'reqd' => "no" }, { 'name' => "hlist_at_top", 'desc' => "{Hierarchy.hlist_at_top}", 'type' => "flag", 'reqd' => "no" } ]; my $options = { 'name' => "Hierarchy", 'desc' => "{Hierarchy.desc}", 'abstract' => "no", 'inherits' => "yes", 'args' => $arguments }; # sub print_usage { # print STDERR " # usage: classify Hierarchy [options] # options: # -buttonname X Title field for this classification. # Defaults to metadata name. # -metadata X Metadata field used for classification, # list will be sorted by this element, # unless -sort is used. # -hfile X The classification structure file # -sort X Metadata field to sort by (defaults to -metadata) # use '-sort nosort' for no sorting. # -hlist_at_top Display the first level of the classification # horizontally. # "; # } sub new { my $class = shift (@_); my $self = new BasClas($class, @_); # 14-05-02 To allow for proper inheritance of arguments - John Thompson my $option_list = $self->{'option_list'}; push( @{$option_list}, $options ); my ($hfile, $metadata, $sortname, $title, $hlist_at_top); if (!parsargv::parse(\@_, q^buttonname/.*/^, \$title, q^sort/.*/^, \$sortname, q^hfile/.*/^, \$hfile, q^metadata/.*/^, \$metadata, q^hlist_at_top^, \$hlist_at_top, "allow_extra_options")) { $self->{'construction_error'} = "Incorrect options passed to $class, check your collect.cfg file."; } if (!$metadata) { $self->{'construction_error'} = "Hierarchy error: no metadata supplied."; } $title = $metadata unless ($title); # if no sortname specified, it defaults to metadata $sortname = $metadata unless ($sortname); $sortname = undef if $sortname =~ /^nosort$/; my $subjectfile; if (!$hfile) { $self->{'construction_error'} = "Hierarchy error: No -hfile supplied."; } else { $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile); if (!-e $subjectfile) { my $collfile = $subjectfile; $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile); if (!-e $subjectfile) { my $outhandle = $self->{'outhandle'}; $self->print_txt_usage(""); # Use default resource bundle print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n"; print STDERR "This file should be in $collfile or $subjectfile\n"; die "\n"; } } } $self->{'descriptorlist'} = {}; # first field in subject file $self->{'locatorlist'} = {}; # second field in subject file $self->{'subjectfile'} = $subjectfile; $self->{'metaname'} = $metadata; $self->{'sortname'} = $sortname; $self->{'title'} = $title; $self->{'hlist_at_top'} = $hlist_at_top; return bless $self, $class; } sub init { my $self = shift (@_); if(defined $self->{'construction_error'} || !defined $self->{'metaname'} || !defined $self->{'subjectfile'}) { print STDERR "Error: " , $self->{'construction_error'} , "\n"; $self->print_txt_usage(""); # Use default resource bundle die "\n"; } # read in the subject file my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\w'); # $list is a hash that is indexed by the descriptor. The contents of this # hash is a list of two items. The first item is the OID and the second item # is the title foreach $descriptor (keys (%$list)) { $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0]; unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) { $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1]; $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = []; } } } sub classify { my $self = shift (@_); my ($doc_obj) = @_; my $doc_OID = $doc_obj->get_OID(); my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(), $self->{'metaname'}); my $lang = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Language'); $lang = 'en' unless defined $lang; my $sortmeta = ""; if (defined $self->{'sortname'}) { if ($self->{'sortname'} =~ /^filename$/i) { $sortmeta = $doc_obj->get_source_filename(); } else { $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sortname'}); if (defined $sortmeta) { if ($self->{'sortname'} eq "Creator") { if ($lang eq 'en') { &sorttools::format_string_name_english (\$sortmeta); } } else { if ($lang eq 'en') { &sorttools::format_string_english (\$sortmeta); } } } } $sortmeta = "" unless defined $sortmeta; } foreach $metaelement (@$metadata) { if ((defined $self->{'descriptorlist'}->{$metaelement}) && (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) { push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}}, [$doc_OID, $sortmeta]); } } } sub get_classify_info { my $self = shift (@_); my $list = $self->{'locatorlist'}; my ($classifyinfo); if ($self->{'hlist_at_top'}) { $classifyinfo = $self->get_entry ($self->{'title'}, "HList", "Invisible"); } else { $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible"); } # sorted the keys - otherwise funny things happen - kjdon 03/01/03 foreach $OID (sort keys (%$list)) { my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList"); if (defined $self->{'sortname'}) { foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) { push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]}); } } else { foreach $subOID (@{$list->{$OID}->{'contents'}}) { push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]}); } } } return $classifyinfo; } sub get_OID_entry { my $self = shift (@_); my ($OID, $classifyinfo, $title, $classifytype) = @_; $OID = "" unless defined $OID; $OID =~ s/^\.+//; my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/; $tailOID = "" unless defined $tailOID; if (!defined $headOID) { $classifyinfo->{'Title'} = $title; $classifyinfo->{'classifytype'} = $classifytype; return $classifyinfo; } $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'}; my $offset = 0; foreach $thing (@{$classifyinfo->{'contains'}}) { $offset ++ if defined $thing->{'OID'}; } while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) { push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype)); } return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype); } sub get_entry { my $self = shift (@_); my ($title, $childtype, $thistype) = @_; # organise into classification structure my %classifyinfo = ('childtype'=>$childtype, 'Title'=>$title, 'contains'=>[]); $classifyinfo{'thistype'} = $thistype if defined $thistype && $thistype =~ /\w/; return \%classifyinfo; } 1;