Changeset 9206
- Timestamp:
- 2005-02-28T12:09:17+13:00 (19 years ago)
- Location:
- trunk/gsdl/perllib/classify
- Files:
-
- 1 added
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/classify/Hierarchy.pm
r8852 r9206 1 1 ########################################################################### 2 2 # 3 # Hierarchy.pm -- 3 # Hierarchy.pm -- classifier that enables a Hierarchy to beformed without 4 # the need for a hierarchy file (like HFileHierarchy). Used 5 # to be called AutoHierarchy. Inherits from HFileHierarchy 6 # so can also do everything that does as well. 7 # Created by Imene, modified by Katherine and David. 8 # 4 9 # A component of the Greenstone digital library software 5 10 # from the New Zealand Digital Library Project at the … … 24 29 ########################################################################### 25 30 26 # classifier plugin for generating hierarchical classifications27 31 # An advanced Hierarchical classifier 32 # to see the options, run "perl -S classinfo.pl Hierarchy" 28 33 29 34 package Hierarchy; 30 35 31 use BasClas; 32 use util; 33 use cfgread; 36 use HFileHierarchy; 34 37 use sorttools; 35 38 36 39 sub BEGIN { 37 @Hierarchy::ISA = ('BasClas'); 38 } 39 40 my $arguments = 41 [ { 'name' => "metadata", 42 'desc' => "{Hierarchy.metadata}", 43 'type' => "metadata", 44 'reqd' => "yes" }, 45 { 'name' => "hfile", 46 'desc' => "{Hierarchy.hfile}", 47 'type' => "string", 48 'deft' => "", 49 'reqd' => "yes" }, 50 { 'name' => "buttonname", 51 'desc' => "{BasClas.buttonname}", 52 'type' => "string", 53 'deft' => "", 40 @ISA = ('HFileHierarchy'); 41 } 42 43 my $arguments = 44 [ { 'name' => "separator", 45 'desc' => "{AutoHierarchy.separator}", 46 'type' => "regexp", 47 'deft' => "[\\\\\\\/|\\\\\\\|]", 54 48 'reqd' => "no" }, 55 { 'name' => "sort", 56 'desc' => "{Hierarchy.sort}", 57 'type' => "string", 58 'deft' => "{BasClas.metadata.deft}", 59 'reqd' => "no" }, 60 { 'name' => "reverse_sort", 61 'desc' => "{Hierarchy.reverse_sort}", 62 'type' => "flag", 63 'reqd' => "no" }, 64 { 'name' => "hlist_at_top", 65 'desc' => "{Hierarchy.hlist_at_top}", 49 { 'name' => "suppresslastlevel", 50 'desc' => "{AutoHierarchy.suppresslastlevel}", 66 51 'type' => "flag", 67 52 'reqd' => "no" } ]; 68 53 69 my $options = 70 { 'name' => "Hierarchy", 71 'desc' => "{Hierarchy.desc}", 72 'abstract' => "no", 73 'inherits' => "yes", 74 'args' => $arguments }; 54 my $options = { 'name' => "Hierarchy", 55 'desc' => "{AutoHierarchy.desc}", 56 'abstract' => "no", 57 'inherits' => "yes", 58 'args' => $arguments }; 75 59 76 60 77 61 sub new { 78 62 my $class = shift (@_); 79 my $self = new BasClas($class, @_); 80 63 my $self = new HFileHierarchy($class, @_); 64 65 # 14-05-02 To allow for proper inheritance of arguments - John Thompson 81 66 my $option_list = $self->{'option_list'}; 82 67 push( @{$option_list}, $options ); 83 68 84 69 if ($self->{'info_only'}) { 85 70 # created from classinfo.pl - don't need to parse the arguments 86 71 return bless $self, $class; 87 72 } 88 89 my ($hfile, $metadata, $sortname, $reverse_sort, $title, $hlist_at_top); 90 73 74 my $separator; 75 my $suppresslastlevel = 0; 76 91 77 if (!parsargv::parse(\@_, 92 q^buttonname/.*/^, \$title, 93 q^sort/.*/^, \$sortname, 94 q^reverse_sort^, \$reverse_sort, 95 q^hfile/.*/^, \$hfile, 96 q^metadata/.*/^, \$metadata, 97 q^hlist_at_top^, \$hlist_at_top, 78 q^separator/.*/^, \$separator, 79 q^suppresslastlevel^, \$suppresslastlevel, 98 80 "allow_extra_options")) { 99 81 … … 101 83 $self->print_txt_usage(""); # Use default resource bundle 102 84 die "\n"; 103 104 } 105 106 if (!$metadata) { 107 print STDERR "$class Error: required option -metadata not supplied\n"; 108 $self->print_txt_usage(""); # Use default resource bundle 85 } 86 87 88 if (!$separator) { 89 $separator = "[\\\/|\\\|]"; 90 } 91 $self->{'separator'} = $separator; 92 $self->{'suppresslastlevel'} = $suppresslastlevel; 93 94 # the hash that we use to build up the hierarchy 95 $self->{'path_hash'}= {}; 96 97 return bless $self, $class; 98 } 99 100 101 sub auto_classify { 102 my $self = shift (@_); 103 my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_; 104 105 my $doc_OID = $doc_obj->get_OID(); 106 107 #Add all the metadata values to the hash 108 my $path_hash; 109 my $current_pos; 110 111 foreach my $metavalue (@$metavalues) { 112 $path_hash = $self->{'path_hash'}; 113 my @chunks = split (/$self->{'separator'}/, $metavalue); 114 if ($self->{'suppresslastlevel'}) { 115 pop(@chunks); # remove the last element from the end 116 } 109 117 110 die "$class Error: required option -metadata not supplied\n"; 111 } 112 113 if (!$hfile) { 114 print STDERR "$class Error: required option -hfile not supplied\n"; 115 $self->print_txt_usage(""); # Use default resource bundle 116 117 die "$class Error: required option -hfile not supplied\n"; 118 } 119 120 $title = $metadata unless ($title); 121 # if no sortname specified, it defaults to metadata 122 $sortname = $metadata unless ($sortname); 123 $sortname = undef if $sortname =~ /^nosort$/; 124 if (defined $sortname && $reverse_sort) { 125 $self->{'reverse_sort'} = 1; 126 } 127 128 my $subjectfile; 129 $subjectfile = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"etc", $hfile); 130 if (!-e $subjectfile) { 131 my $collfile = $subjectfile; 132 $subjectfile = &util::filename_cat($ENV{'GSDLHOME'},"etc", $hfile); 133 if (!-e $subjectfile) { 134 my $outhandle = $self->{'outhandle'}; 135 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n"; 136 print STDERR "This file should be in $collfile or $subjectfile\n"; 137 $self->print_txt_usage(""); # Use default resource bundle 138 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n"; 139 print STDERR "This file should be in $collfile or $subjectfile\n"; 140 die "\n"; 141 } 142 } 143 144 $self->{'descriptorlist'} = {}; # first field in subject file 145 $self->{'locatorlist'} = {}; # second field in subject file 146 $self->{'subjectfile'} = $subjectfile; 147 $self->{'metaname'} = $metadata; 148 $self->{'sortname'} = $sortname; 149 $self->{'title'} = $title; 150 $self->{'hlist_at_top'} = $hlist_at_top; 151 152 return bless $self, $class; 153 } 154 155 sub init { 156 my $self = shift (@_); 157 158 # read in the subject file 159 my $list = &cfgread::read_cfg_file ($self->{'subjectfile'}, undef, '^[^#]?\S'); 160 # $list is a hash that is indexed by the descriptor. The contents of this 161 # hash is a list of two items. The first item is the OID and the second item 162 # is the title 163 foreach $descriptor (keys (%$list)) { 164 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0]; 165 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) { 166 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'title'} = $list->{$descriptor}->[1]; 167 $self->{'locatorlist'}->{$list->{$descriptor}->[0]}->{'contents'} = []; 168 } 169 } 118 foreach my $folderName (@chunks) 119 { 120 if ($folderName ne ""){ #sometimes the tokens are empty 121 $current_pos = $self->add_To_Hash($path_hash, $folderName, $nosort); 122 $path_hash = $current_pos->{'nodes'}; 123 } 124 } 125 # now add the document, with sort meta if needed 126 if ($nosort) { 127 push(@{$current_pos->{'docs'}}, $doc_OID); 128 } else { 129 if (defined $sortmeta) { 130 # can you ever get the same doc twice in one classification?? 131 $current_pos->{'docs'}->{$doc_OID} = $sortmeta; 132 } else { 133 $current_pos->{'docs'}->{$doc_OID} = $metavalue; 134 } 135 } 136 } # foreach metadata 137 170 138 } 171 139 … … 173 141 my $self = shift (@_); 174 142 my ($doc_obj) = @_; 175 143 176 144 my $doc_OID = $doc_obj->get_OID(); 177 178 my $metadata = $doc_obj->get_metadata ($doc_obj->get_top_section(), 179 $self->{'metaname'}); 180 181 my $lang = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), 'Language'); 182 $lang = 'en' unless defined $lang; 183 184 my $sortmeta = ""; 185 if (defined $self->{'sortname'}) { 145 146 # are we sorting the list?? 147 my $nosort = 0; 148 if (defined $self->{'sortname'} && $self->{'sortname'} eq "nosort") { 149 $nosort = 1; 150 } 151 152 my $metavalues = []; 153 # find all the metadata values 154 foreach $m (@{$self->{'meta_list'}}) { 155 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m); 156 next unless (@{$mvalues}); 157 if ($self->{'onlyfirst'}) { 158 # we only want the first metadata value 159 push (@$metavalues, $mvalues[0]); 160 last; 161 } 162 push (@$metavalues, @$mvalues); 163 last if (!$self->{'allvalues'}); # we don't want to try other elements 164 # cos we have already found some 165 } 166 167 return unless (@$metavalues); 168 169 #check for a sort element other than our metadata 170 my $sortmeta = undef; 171 if (!$nosort && defined $self->{'sortname'}) { 172 186 173 if ($self->{'sortname'} =~ /^filename$/i) { 187 174 $sortmeta = $doc_obj->get_source_filename(); 188 175 } else { 189 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), 190 $self->{'sortname'}); 176 $sortmeta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $self->{'sortname'}); 191 177 if (defined $sortmeta) { 192 178 $sortmeta = &sorttools::format_metadata_for_sorting($self->{'sortname'}, $sortmeta, $doc_obj); 193 #if ($self->{'sortname'} eq "Creator") {194 # if ($lang eq 'en') {195 # &sorttools::format_string_name_english (\$sortmeta);196 # }197 #} else {198 # if ($lang eq 'en') {199 # &sorttools::format_string_english (\$sortmeta);200 # }201 #}202 179 } 203 180 } 204 181 $sortmeta = "" unless defined $sortmeta; 205 182 } 206 207 foreach $metaelement (@$metadata) { 208 if ((defined $self->{'descriptorlist'}->{$metaelement}) && 209 (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) { 210 push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}}, 211 [$doc_OID, $sortmeta]); 212 my $localid = $self->{'descriptorlist'}->{$metaelement}; 213 my $classid = $self->get_number(); 214 215 $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL$classid.$localid"); 216 } 217 } 218 } 219 220 sub get_classify_info { 221 my $self = shift (@_); 222 223 my $list = $self->{'locatorlist'}; 224 225 my ($classifyinfo); 226 if ($self->{'hlist_at_top'}) { 227 $classifyinfo = $self->get_entry ($self->{'title'}, "HList", "Invisible"); 228 } else { 229 $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible"); 230 } 231 # sorted the keys - otherwise funny things happen - kjdon 03/01/03 232 foreach $OID (sort keys (%$list)) { 233 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList"); 234 235 if (defined $self->{'sortname'}) { 236 if ($self->{'reverse_sort'}) { 237 foreach $subOID (sort {$b->[1] cmp $a->[1];} @{$list->{$OID}->{'contents'}}) { 238 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]}); 239 } 240 } 241 else { 242 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) { 243 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]}); 244 } 245 } 246 } 247 else { 248 foreach $subOID (@{$list->{$OID}->{'contents'}}) { 249 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]}); 250 } 251 } 252 } 253 254 return $classifyinfo; 255 } 256 257 sub supports_memberof { 258 my $self = shift(@_); 259 260 return "true"; 261 } 262 263 sub get_OID_entry { 264 my $self = shift (@_); 265 my ($OID, $classifyinfo, $title, $classifytype) = @_; 266 267 $OID = "" unless defined $OID; 268 $OID =~ s/^\.+//; 269 270 my ($headOID, $tailOID) = $OID =~ /^(\d+)(.*)$/; 271 $tailOID = "" unless defined $tailOID; 272 273 if (!defined $headOID) { 274 $classifyinfo->{'Title'} = $title; 275 $classifyinfo->{'classifytype'} = $classifytype; 276 return $classifyinfo; 277 } 278 279 $classifyinfo->{'contains'} = [] unless defined $classifyinfo->{'contains'}; 280 my $offset = 0; 281 foreach my $thing (@{$classifyinfo->{'contains'}}) { 282 $offset ++ if defined $thing->{'OID'}; 283 } 284 285 while (scalar(@{$classifyinfo->{'contains'}}) < ($headOID+$offset)) { 286 push (@{$classifyinfo->{'contains'}}, $self->get_entry("", $classifytype)); 287 } 288 289 return $self->get_OID_entry ($tailOID, $classifyinfo->{'contains'}->[($headOID+$offset-1)], $title, $classifytype); 183 184 if (defined $self->{'subjectfile'}) { 185 $self->hfile_classify($doc_obj,$sortmeta,$metavalues); 186 } 187 else { 188 $self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues); 189 } 190 } 191 192 sub add_To_Hash { 193 my $self = shift (@_); 194 my ($myhash, $k, $nosort) = @_; 195 196 if (!defined $myhash->{$k}){ 197 $myhash->{$k}={}; 198 $myhash->{$k}->{'nodes'}={}; 199 if ($nosort) { 200 $myhash->{$k}->{'docs'}=[]; 201 } else { 202 $myhash->{$k}->{'docs'} = {}; 203 } 204 } 205 return $myhash->{$k}; 206 } 207 208 sub print_Hash{ 209 my $self = shift (@_); 210 my ($myHash, $num_spaces) = @_; 211 212 foreach my $key (keys %{$myHash}){ 213 print "\n"; 214 $self->print_spaces($num_spaces); 215 print STDERR "$key*"; 216 $self->print_Hash($myHash->{$key}, $num_spaces + 2); 217 } 218 } 219 220 sub print_spaces{ 221 my $self = shift (@_); 222 my ($num_spaces) = @_; 223 224 for ($i = 0; $i < $num_spaces; $i++){ 225 print STDERR " "; 226 } 290 227 } 291 228 … … 293 230 my $self = shift (@_); 294 231 my ($title, $childtype, $thistype) = @_; 295 my $memberof = &supports_memberof();296 232 297 233 # organise into classification structure 298 234 my %classifyinfo = ('childtype'=>$childtype, 299 235 'Title'=>$title, 300 'supportsmemberof'=>$memberof, 301 'contains'=>[]); 236 'contains'=>[],); 302 237 $classifyinfo{'thistype'} = $thistype 303 238 if defined $thistype && $thistype =~ /\w/; 304 239 305 240 return \%classifyinfo; 306 241 } 307 242 243 sub process_hash { 244 my $self = shift (@_); 245 my ($top_hash, $top_entry) = @_; 246 my ($entry); 247 248 my $hash = {}; 249 foreach my $key (sort keys %{$top_hash}) { 250 $entry = $self->get_entry($key,"VList","VList"); 251 my $has_content = 0; 252 my @doc_list; 253 # generate a sorted list of doc ids 254 if ($nosort && scalar(@{$top_hash->{$key}->{'docs'}})) { 255 @doc_list = @{$top_hash->{$key}->{'docs'}}; 256 } elsif (!$nosort && (keys %{$top_hash->{$key}->{'docs'}})) { 257 @doc_list = sort {$top_hash->{$key}->{'docs'}->{$a} 258 cmp $top_hash->{$key}->{'docs'}->{$b};} keys %{$top_hash->{$key}->{'docs'}}; 259 260 } 261 # if this key has documents, add them 262 if (@doc_list) { 263 $has_content = 1; 264 foreach $d(@doc_list) { 265 push (@{$entry->{'contains'}}, {'OID'=>$d}); 266 } 267 } 268 # if this key has nodes, add them 269 if (scalar(keys %{$top_hash->{$key}->{'nodes'}})) { 270 $has_content = 1; 271 $self->process_hash($top_hash->{$key}->{'nodes'}, $entry); 272 } 273 # if we have found some content, add the new entry for this key into the parent node 274 if ($has_content) { 275 push (@{$top_entry->{'contains'}}, $entry); 276 } 277 278 } 279 } 280 281 sub auto_get_classify_info { 282 my $self = shift (@_); 283 my ($no_thistype) = @_; 284 $no_thistype = 0 unless defined $no_thistype; 285 286 my ($classification); 287 my $top_h = $self->{'path_hash'}; 288 289 if ($self->{'path_hash'}) { 290 if ($self->{'hlist_at_top'}) { 291 $classification = $self->get_entry ($self->{'title'}, "HList", "Invisible"); 292 } 293 else { 294 $classification = $self->get_entry ($self->{'title'}, "VList", "Invisible"); 295 } 296 } 297 298 $self->process_hash($top_h, $classification); 299 300 return $classification; 301 302 } 303 304 sub auto_get_classify_info 305 { 306 my $self = shift (@_); 307 my ($classifyinfo) = @_; 308 309 $self->process_hash($self->{'path_hash'}, $classifyinfo); 310 311 return $classifyinfo; 312 } 313 314 315 sub get_classify_info { 316 my $self = shift (@_); 317 318 my ($classifyinfo); 319 320 if ($self->{'hlist_at_top'}) { 321 $classifyinfo = $self->get_entry ($self->{'title'}, "HList", "Invisible"); 322 } 323 else { 324 $classifyinfo = $self->get_entry ($self->{'title'}, "VList", "Invisible"); 325 } 326 327 if (defined $self->{'subjectfile'}) { 328 return $self->hfile_get_classify_info($classifyinfo); 329 } 330 else { 331 return $self->auto_get_classify_info($classifyinfo); 332 } 333 } 334 308 335 309 336 1; 337 338 339
Note:
See TracChangeset
for help on using the changeset viewer.