Changeset 10253 for trunk/gsdl
- Timestamp:
- 2005-07-19T16:18:25+12:00 (19 years ago)
- Location:
- trunk/gsdl/perllib/classify
- Files:
-
- 15 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/classify/AZCompactList.pm
r10218 r10253 31 31 use sorttools; 32 32 33 use strict; 34 no strict 'refs'; # allow filehandles to be variables and viceversa 35 33 36 sub BEGIN { 34 @ ISA = ('BasClas');37 @AZCompactList::ISA = ('BasClas'); 35 38 } 36 39 … … 130 133 my $self = (defined $hashArgOptLists)? new BasClas($pluginlist,$inputargs,$hashArgOptLists): new BasClas($pluginlist,$inputargs); 131 134 135 if ($self->{'info_only'}) { 136 # don't worry about any options etc 137 return bless $self, $class; 138 } 139 132 140 if (!$self->{"metadata"}) { 133 141 my $outhandle = $self->{'outhandle'}; … … 173 181 } 174 182 175 $tmp = 0;183 my $tmp = 0; 176 184 177 185 sub classify … … 460 468 # should be recursively classified. 461 469 #-- 462 foreach $dm_pair (@$multiple_cl_ref)470 foreach my $dm_pair (@$multiple_cl_ref) 463 471 { 464 472 my ($doc_OID,$mdoffset,$metavalue,$cs_metavalue) = @$dm_pair; … … 651 659 # don't need to do any splitting if there are less than 'minnesting' classifications 652 660 if ((scalar @$classlistref) <= $self->{'minnesting'}) { 653 foreach $subOID (@$classlistref) {661 foreach my $subOID (@$classlistref) { 654 662 if ($subOID =~ /^CLASSIFY\.(.*)$/ 655 663 && defined $self->{'classifiers'}->{$1}) … … 670 678 671 679 # first split up the list into separate A-Z and 0-9 classifications 672 foreach $classification (@$classlistref) {680 foreach my $classification (@$classlistref) { 673 681 my $title = $self->{'reclassifylist'}->{$classification}; 674 682 $title =~ s/&(.){2,4};//g; # remove any HTML special chars … … 696 704 697 705 my @tmparr = (); 698 foreach $subsection (sort keys (%$classhash)) {706 foreach my $subsection (sort keys (%$classhash)) { 699 707 push (@tmparr, $subsection); 700 708 } … … 706 714 push (@tmparr, '0-9'); 707 715 } 708 foreach $subclass (@tmparr)716 foreach my $subclass (@tmparr) 709 717 { 710 718 my $tempclassify … … 714 722 715 723 716 foreach $subsubOID (@{$classhash->{$subclass}})724 foreach my $subsubOID (@{$classhash->{$subclass}}) 717 725 { 718 726 if ($subsubOID =~ /^CLASSIFY\.(.*)$/ … … 758 766 my $max = $self->{'maxcompact'}; 759 767 760 foreach $subsection (sort keys %$classhashref) {768 foreach my $subsection (sort keys %$classhashref) { 761 769 if ($subsection eq '0-9') { 762 770 @{$compactedhash->{$subsection}} = @{$classhashref->{$subsection}}; -
trunk/gsdl/perllib/classify/AZCompactSectionList.pm
r10218 r10253 10 10 11 11 use AZCompactList; 12 use strict; 13 no strict 'refs'; # allow filehandles to be variables and viceversa 12 14 13 15 sub BEGIN { 14 @ISA = ('AZCompactList'); 15 } 16 16 @AZCompactSectionList::ISA = ('AZCompactList'); 17 } 18 19 my $arguments = [ 20 ]; 17 21 my $options = 18 22 { 'name' => "AZCompactSectionList", … … 183 187 # should be recursively classified. 184 188 #-- 185 foreach $dm_pair (@$multiple_cl_ref)189 foreach my $dm_pair (@$multiple_cl_ref) 186 190 { 187 191 my ($doc_OID,$mdoffset,$metavalue) = @$dm_pair; … … 193 197 foreach $node_name (keys %{$self->{'classifiers'}}) 194 198 { 195 $resafe_node_name = $node_name;199 my $resafe_node_name = $node_name; 196 200 $resafe_node_name =~ s/(\(|\)|\[|\]|\{|\}|\^|\$|\.|\+|\*|\?|\|)/\\$1/g; 197 201 if ($metavalue =~ m/^$resafe_node_name$/i) -
trunk/gsdl/perllib/classify/AZList.pm
r10218 r10253 32 32 use iso639; 33 33 34 use strict; 35 no strict 'refs'; # allow filehandles to be variables and viceversa 36 34 37 sub BEGIN { 35 @ ISA = ('BasClas');38 @AZList::ISA = ('BasClas'); 36 39 } 37 40 … … 76 79 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs); 77 80 81 if ($self->{'info_only'}) { 82 # don't worry about any options etc 83 return bless $self, $class; 84 } 85 78 86 if (!$self->{"metadata"}) { 79 87 print STDERR "AZList Error: required option -metadata not supplied \n"; … … 131 139 132 140 # find the first available metadata 133 foreach $m (@{$self->{'meta_list'}}) {141 foreach my $m (@{$self->{'meta_list'}}) { 134 142 $metavalue = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $m); 135 143 $metaname = $m; -
trunk/gsdl/perllib/classify/AZSectionList.pm
r10218 r10253 41 41 use sorttools; 42 42 43 use strict; 44 no strict 'refs'; # allow filehandles to be variables and viceversa 45 43 46 sub BEGIN { 44 @ ISA = ('AZList');47 @AZSectionList::ISA = ('AZList'); 45 48 } 46 49 50 my $arguments = [ 51 ]; 47 52 my $options = { 'name' => "AZSectionList", 48 53 'desc' => "{AZSectionList.desc}", … … 92 97 93 98 # find the first available metadata 94 foreach $m (@{$self->{'meta_list'}}) {99 foreach my $m (@{$self->{'meta_list'}}) { 95 100 $metavalue = $doc_obj->get_metadata_element($section, $m); 96 101 $metaname = $m; -
trunk/gsdl/perllib/classify/AllList.pm
r10218 r10253 1 package AllList; 2 1 3 use BasClas; 2 package AllList; 4 5 use strict; 6 no strict 'refs'; # allow filehandles to be variables and viceversa 3 7 4 8 sub BEGIN { … … 23 27 24 28 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs); 29 30 if ($self->{'info_only'}) { 31 # don't worry about any options etc 32 return bless $self, $class; 33 } 25 34 26 35 # Manually set $self parameters. … … 55 64 'classifyOID' =>"oai"); 56 65 $classifyinfo{'thistype'} = 'Invisible'; 57 @list = @{$self->{'list'}};66 my @list = @{$self->{'list'}}; 58 67 59 68 my $seqNo = 0; 60 foreach $OID (@list) {69 foreach my $OID (@list) { 61 70 my $hashref={}; 62 71 $hashref->{'OID'}=$OID; -
trunk/gsdl/perllib/classify/BasClas.pm
r10229 r10253 64 64 use strict; 65 65 no strict 'subs'; # allow barewords (eg STDERR) as function arguments 66 no strict 'refs'; # allow filehandles to be variables and viceversa 66 67 67 68 my $arguments = … … 310 311 my $self = shift (@_); 311 312 my $metadata = shift (@_); 313 314 return "" unless defined $metadata && $metadata =~ /\S/; 312 315 313 316 my @metalist = split(/,/, $metadata); -
trunk/gsdl/perllib/classify/Browse.pm
r10218 r10253 26 26 # 12/05/02 Added usage datastructure - John Thompson 27 27 28 use BasClas;29 28 package Browse; 30 29 30 use BasClas; 31 31 use sorttools; 32 32 33 use strict; 34 no strict 'refs'; # allow filehandles to be variables and viceversa 35 33 36 sub BEGIN { 34 @ ISA = ('BasClas');37 @Browse::ISA = ('BasClas'); 35 38 } 36 39 40 my $arguments = [ 41 ]; 37 42 my $options = { 'name' => "Browse", 38 43 'desc' => "{Browse.desc}", … … 50 55 51 56 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs); 57 58 if ($self->{'info_only'}) { 59 # don't worry about any options etc 60 return bless $self, $class; 61 } 52 62 53 63 # Manually set $self parameters. -
trunk/gsdl/perllib/classify/Collage.pm
r10223 r10253 29 29 use sorttools; 30 30 31 use strict; 32 no strict 'refs'; # allow filehandles to be variables and viceversa 33 31 34 sub BEGIN { 32 @ ISA = ('BasClas');35 @Collage::ISA = ('BasClas'); 33 36 } 34 37 … … 114 117 115 118 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs); 119 120 if ($self->{'info_only'}) { 121 # don't worry about any options etc 122 return bless $self, $class; 123 } 116 124 117 125 # Manually set $self parameters. … … 179 187 my $within_page_c=1; 180 188 181 foreach $oid (@list) {189 foreach my $oid (@list) { 182 190 if ($within_page_c>$items_per_page) { 183 191 my $title = "Items $global_c+"; … … 207 215 my %classifyinfo = ('childtype'=>$childtype, 208 216 'Title'=>$title, 209 'parameters'=> $parameters,210 217 'contains'=>[]); 211 218 … … 226 233 my $caption = $self->{'caption'}; 227 234 228 if (!defined($maxDownloads)) {229 $maxDownloads="";230 }235 #if (!defined($maxDownloads)) { 236 # $maxDownloads=""; 237 #} 231 238 232 239 my $parameters; -
trunk/gsdl/perllib/classify/DateList.pm
r10218 r10253 43 43 use sorttools; 44 44 45 use strict; 46 no strict 'refs'; # allow filehandles to be variables and viceversa 47 45 48 sub BEGIN { 46 @ ISA = ('BasClas');49 @DateList::ISA = ('BasClas'); 47 50 } 48 51 … … 88 91 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs); 89 92 93 if ($self->{'info_only'}) { 94 # don't worry about any options etc 95 return bless $self, $class; 96 } 97 90 98 # Manually set $self parameters. 91 99 $self->{'list'} = {}; … … 172 180 # classifications, unless nogroup is specified 173 181 if ((scalar @$classlistref) <= 39 && !$self->{'nogroup'}) { 174 foreach $subOID (@$classlistref) {182 foreach my $subOID (@$classlistref) { 175 183 push (@{$classifyinfo->{'contains'}}, {'OID'=>$subOID}); 176 184 } … … 184 192 if (!$self->{'nogroup'}) { # hlist of year+month pairs 185 193 # single level of classifications 186 foreach $classification (@$classlistref) {194 foreach my $classification (@$classlistref) { 187 195 my $date = $self->{'list'}->{$classification}; 188 196 $date =~ s/^(\d\d\d\d)(\d\d).*$/$1 _textmonth$2_/; … … 196 204 197 205 } else { # don't group - individual years and months 198 foreach $classification (@$classlistref) {206 foreach my $classification (@$classlistref) { 199 207 my $date = $self->{'list'}->{$classification}; 200 208 $date =~ s/^(\d\d\d\d)(\d\d).*$/$1 _textmonth$2_/; … … 223 231 push (@{$yearclassify->{'contains'}}, $monthclassify); 224 232 225 foreach $subsubOID233 foreach my $subsubOID 226 234 (@{$classhash->{$subclass}->{$subsubclass}}) { 227 235 push (@{$monthclassify->{'contains'}}, … … 236 244 # not by month 237 245 # first split up the list into separate year classifications 238 foreach $classification (@$classlistref) {246 foreach my $classification (@$classlistref) { 239 247 my $date = $self->{'list'}->{$classification}; 240 248 $date =~ s/^(\d\d\d\d).*$/$1/; … … 248 256 $classhash = $self->compactlist ($classhash); 249 257 } 250 foreach $subclass (sort keys %$classhash) {258 foreach my $subclass (sort keys %$classhash) { 251 259 my $tempclassify = $self->get_entry($subclass, "DateList"); 252 foreach $subsubOID (@{$classhash->{$subclass}}) {260 foreach my $subsubOID (@{$classhash->{$subclass}}) { 253 261 push (@{$tempclassify->{'contains'}}, {'OID'=>$subsubOID}); 254 262 } -
trunk/gsdl/perllib/classify/HFileHierarchy.pm
r10223 r10253 33 33 use cfgread; 34 34 use sorttools; 35 36 use strict; 37 no strict 'refs'; # allow filehandles to be variables and viceversa 35 38 36 39 sub BEGIN { … … 92 95 93 96 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs); 97 98 if ($self->{'info_only'}) { 99 # don't worry about any options etc 100 return bless $self, $class; 101 } 94 102 95 103 my $metadata = $self->{'metadata'}; … … 177 185 # hash is a list of two items. The first item is the OID and the second item 178 186 # is the title 179 foreach $descriptor (keys (%$list)) {187 foreach my $descriptor (keys (%$list)) { 180 188 $self->{'descriptorlist'}->{$descriptor} = $list->{$descriptor}->[0]; 181 189 unless (defined $self->{'locatorlist'}->{$list->{$descriptor}->[0]}) { … … 218 226 219 227 # sorted the keys - otherwise funny things happen - kjdon 03/01/03 220 foreach $OID (sort keys (%$list)) {228 foreach my $OID (sort keys (%$list)) { 221 229 my $tempinfo = $self->get_OID_entry ($OID, $classifyinfo, $list->{$OID}->{'title'}, "VList"); 222 230 223 231 if (defined $self->{'sort'}) { 224 232 if ($self->{'reverse_sort'}) { 225 foreach $subOID (sort {$b->[1] cmp $a->[1];} @{$list->{$OID}->{'contents'}}) {233 foreach my $subOID (sort {$b->[1] cmp $a->[1];} @{$list->{$OID}->{'contents'}}) { 226 234 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]}); 227 235 } 228 236 } 229 237 else { 230 foreach $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) {238 foreach my $subOID (sort {$a->[1] cmp $b->[1];} @{$list->{$OID}->{'contents'}}) { 231 239 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]}); 232 240 } … … 234 242 } 235 243 else { 236 foreach $subOID (@{$list->{$OID}->{'contents'}}) {244 foreach my $subOID (@{$list->{$OID}->{'contents'}}) { 237 245 push (@{$tempinfo->{'contains'}}, {'OID'=>$subOID->[0]}); 238 246 } -
trunk/gsdl/perllib/classify/HTML.pm
r10218 r10253 31 31 use BasClas; 32 32 33 use strict; 34 no strict 'refs'; # allow filehandles to be variables and viceversa 35 33 36 sub BEGIN { 34 @ ISA = ('BasClas');37 @HTML::ISA = ('BasClas'); 35 38 } 36 39 … … 62 65 63 66 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs); 67 68 if ($self->{'info_only'}) { 69 # don't worry about any options etc 70 return bless $self, $class; 71 } 64 72 65 73 if (!$self->{'url'}) { -
trunk/gsdl/perllib/classify/Hierarchy.pm
r10218 r10253 37 37 use sorttools; 38 38 39 use strict; 40 no strict 'refs'; # allow filehandles to be variables and viceversa 41 39 42 sub BEGIN { 40 @ ISA = ('HFileHierarchy');43 @Hierarchy::ISA = ('HFileHierarchy'); 41 44 } 42 45 … … 86 89 my $current_pos; 87 90 91 88 92 foreach my $metavalue (@$metavalues) { 89 93 $path_hash = $self->{'path_hash'}; … … 92 96 pop(@chunks); # remove the last element from the end 93 97 } 94 95 98 foreach my $folderName (@chunks) 96 99 { … … 131 134 my $metavalues = []; 132 135 # find all the metadata values 133 foreach $m (@{$self->{'meta_list'}}) {136 foreach my $m (@{$self->{'meta_list'}}) { 134 137 my $mvalues = $doc_obj->get_metadata($doc_obj->get_top_section(), $m); 135 138 next unless (@{$mvalues}); 136 139 if ($self->{'firstvalueonly'}) { 137 140 # we only want the first metadata value 138 push (@$metavalues, $mvalues [0]);141 push (@$metavalues, $mvalues->[0]); 139 142 last; 140 143 } … … 200 203 my ($num_spaces) = @_; 201 204 202 for ( $i = 0; $i < $num_spaces; $i++){205 for (my $i = 0; $i < $num_spaces; $i++){ 203 206 print STDERR " "; 204 207 } … … 240 243 if (@doc_list) { 241 244 $has_content = 1; 242 foreach $d(@doc_list) {245 foreach my $d (@doc_list) { 243 246 push (@{$entry->{'contains'}}, {'OID'=>$d}); 244 247 } -
trunk/gsdl/perllib/classify/List.pm
r10223 r10253 30 30 package List; 31 31 32 use strict; 33 no strict 'refs'; # allow filehandles to be variables and viceversa 34 32 35 use sorttools; 33 36 34 37 sub BEGIN { 35 @ ISA = ('BasClas');38 @List::ISA = ('BasClas'); 36 39 } 37 40 … … 144 147 my $values_listref= 145 148 $doc_obj->get_metadata($topsection, $m); 146 $array_size = scalar(@{$values_listref});149 my $array_size = scalar(@{$values_listref}); 147 150 if ($array_size==0 || $array_size < $mdoffset+1) { 148 151 $mdoffset = $mdoffset - $array_size; … … 222 225 $classifyinfo{'supportsmemberof'} = $memberof; 223 226 224 foreach $OID (@list) {227 foreach my $OID (@list) { 225 228 my $hashref={}; 226 229 # special oid format, if using offsets (from AZCompactList) -
trunk/gsdl/perllib/classify/Phind.pm
r10218 r10253 39 39 use unicode; 40 40 41 use strict; 42 no strict 'refs'; # allow filehandles to be variables and viceversa 43 41 44 my @removedirs = (); 42 45 … … 56 59 57 60 sub BEGIN { 58 @ ISA = ('BasClas');61 @Phind::ISA = ('BasClas'); 59 62 } 60 63 … … 71 74 closedir DIR; 72 75 73 foreach $file (@files) {76 foreach my $file (@files) { 74 77 next if $file =~ /^\.\.?$/; 75 78 my ($suffix) = $file =~ /\.([^\.]+)$/; … … 159 162 my $self = (defined $hashArgOptLists)? new BasClas($classifierslist,$inputargs,$hashArgOptLists): new BasClas($classifierslist,$inputargs); 160 163 164 if ($self->{'info_only'}) { 165 # don't worry about any options etc 166 return bless $self, $class; 167 } 168 161 169 # Ensure the Phind generate scripts are in place 162 170 my $file1 = &util::filename_cat($ENV{'GSDLHOME'}, "bin", $ENV{'GSDLOS'}, "suffix"); … … 255 263 # record this file 256 264 $self->{'total'} ++; 257 print "file $self->{'total'}: $file\n" if ($self->{'$verbosity'}); 265 # what is $file ??? 266 # print "file $self->{'total'}: $file\n" if ($self->{'$verbosity'}); 258 267 259 268 # Store document details … … 1058 1067 1059 1068 my %suffixtophind; 1060 my @ phindfrequency;1069 my @totalfrequency; 1061 1070 my (@fields, $suffixid); 1062 1071 my $nextphind = 1; … … 1110 1119 if ($linenumber % 1000 == 0) { 1111 1120 print $out "line $linenumber:\t$phindid\t$suffixid\t($text)\n"; 1112 } 1113 print $out "$num: $key\t($text)\n" if ($verbosity > 3); 1121 } 1122 # what are $num and $key?? 1123 #print $out "$num: $key\t($text)\n" if ($verbosity > 3); 1114 1124 } 1115 1125 … … 1136 1146 $documents = shift @fields; 1137 1147 $documents =~ s/d//g; 1138 @documents = split(/;/, $documents);1148 my @documents = split(/;/, $documents); 1139 1149 @documents = sort by_doc_frequency @documents; 1140 1150 … … 1413 1423 # 2. 1414 1424 # Translate phrases.3 to MGPP input files 1415 my ($key, $text, $word, $isThesaurus );1425 my ($key, $text, $word, $isThesaurus, $line); 1416 1426 my @fields; 1417 1427 my $linenumber = 0; -
trunk/gsdl/perllib/classify/SectionList.pm
r10218 r10253 35 35 use sorttools; 36 36 37 use strict; 38 no strict 'refs'; # allow filehandles to be variables and viceversa 39 37 40 sub BEGIN { 38 @ ISA = ('List');41 @SectionList::ISA = ('List'); 39 42 } 40 43 44 my $arguments = []; 41 45 my $options = { 'name' => "SectionList", 42 46 'desc' => "{SectionList.desc}", … … 71 75 my $thissection = undef; 72 76 73 my $option; 74 foreach $option (@options) 77 foreach my $option (@options) 75 78 { 76 79 if ($option =~ m/^section=(\d+)$/i) … … 118 121 if (defined $self->{'meta_list'}) { 119 122 # find the first available metadata 120 foreach $m (@{$self->{'meta_list'}}) {123 foreach my $m (@{$self->{'meta_list'}}) { 121 124 $metavalue = $doc_obj->get_metadata_element($section, $m); 122 125 $metaname = $m;
Note:
See TracChangeset
for help on using the changeset viewer.