Changeset 36670
- Timestamp:
- 2022-10-03T11:14:38+13:00 (18 months ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/classify/DateList.pm
r34109 r36670 34 34 # -metadata, use a different metadata for the date (instead of Date), still expects yyyymmdd format. this affects display cos greenstone displays Date metadata as dd month yyyy, whereas any other date metadata is displayed as yyyymmdd - this needs fixing 35 35 # -sort specifies an additional metadata to use in sorting, will take affect when two docs have the same date. 36 36 # -reverse_sort - sort in reverse order 37 # -no_special_formatting - makes the list a VList instead of a DateList - don't display Months down the side of the list 38 # -valid_date_regex - what constitutes a valid date? deft is \d\d\d\d. eg for Heritage, want to customise this to allow dates like 198? 39 # -allow_invalid_dates - do we include in the classifier documents with invalid dates? deft = no. 40 # -invalid_date_partition_name - if docs with invalid dates are included, they get put into one bucket. this gives the name of that bucket. eg "no date" 37 41 package DateList; 38 42 … … 72 76 'desc' => "{DateList.no_special_formatting}", 73 77 'type' => "flag", 74 'reqd' => "no" } 75 78 'reqd' => "no" }, 79 { 'name' => "valid_date_regex", 80 'desc' => "{DateList.valid_date_regex}", 81 'type' => "regexp", 82 'deft' => "\\d\\d\\d\\d", 83 'reqd' => "no" }, 84 { 'name' => "allow_invalid_dates", 85 'desc' => "{DateList.allow_invalid_dates}", 86 'type' => "flag", 87 'reqd' => "no" }, 88 { 'name' => "invalid_date_partition_name", 89 'desc' => "{DateList.invalid_date_partition_name}", 90 'type' => "string", 91 'deft' => "No Date" } 76 92 ]; 77 93 … … 137 153 # find the first available metadata 138 154 my $date; 155 my $invalid = 0; 156 my $validre = $self->{'valid_date_regex'}; 157 139 158 foreach my $m (@{$self->{'meta_list'}}) { 140 159 $date = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $m); 141 160 last if defined $date; 142 } 143 161 } 162 144 163 if (!defined $date || $date eq "") { 145 # if this document doesn't contain Date element we won't 146 # include it in this classification 147 print $outhandle "DateList: $doc_OID has no date, not including it\n" if $verbosity >=2; 148 return; 164 if (!$self->{'allow_invalid_dates'}) { 165 # if this document doesn't contain Date element we won't 166 # include it in this classification 167 print $outhandle "DateList: $doc_OID has no date, not including it\n" if $verbosity >=2; 168 return; 169 } else { 170 $invalid = 1; 171 $date = "INVALID"; 172 } 149 173 } 150 174 151 175 # sanity check date 152 if ($date !~ /^\d\d\d\d.*/) { 153 print $outhandle "DateList: $doc_OID date: '$date' malformed: expected it to start with yyyy; not classifying\n" if $verbosity >=2; 154 return; 155 } 156 if ($self->{'bymonth'}) { 176 #if ($date !~ /^\d\d\d\d.*/) { 177 if ($date !~ /^$validre.*/) { 178 if (!$self->{'allow_invalid_dates'}) { 179 print $outhandle "DateList: $doc_OID date: '$date' malformed: expected it to start with $validre; not classifying\n" if $verbosity >=2; 180 return; 181 } 182 else { 183 $invalid = 1; 184 $date = "INVALID"; 185 } 186 } 187 if ($self->{'bymonth'} && !$invalid) { 157 188 # check that we have valid month - if not, set it to 00 == undefined 158 if ($date !~ /^ \d\d\d\d-?\d\d/) {189 if ($date !~ /^$validre-?\d\d/) { 159 190 print $outhandle "DateList $doc_OID date: '$date' has no month (expecting yyyymm... or yyyy-mm...), setting date to yyyy-00\n" if $verbosity >=2; 160 $date =~ s/^( \d\d\d\d).*$/$1-00/;191 $date =~ s/^($validre).*$/$1-00/; 161 192 } else { 162 my ($year, $month) = $date =~ /^( \d\d\d\d)-?(\d\d)/;193 my ($year, $month) = $date =~ /^($validre)-?(\d\d)/; 163 194 if ($month > 12) { 164 195 print $outhandle "DateList $doc_OID date: '$date' has invalid month, setting date to $year-00\n" if $verbosity >=2; … … 236 267 } 237 268 238 269 my $validre = $self->{'valid_date_regex'}; 270 my $invalid_bucket = $self->{'invalid_date_partition_name'}; 239 271 240 272 if ($self->{'bymonth'}) { … … 299 331 foreach my $classification (@$classlistref) { 300 332 my $date = $self->{'list'}->{$classification}; 301 $date =~ s/^(\d\d\d\d).*$/$1/; 333 if ($date =~ /^INVALID/) { 334 $date = $invalid_bucket; 335 } else { 336 $date =~ s/^($validre).*$/$1/; 337 } 302 338 $classhash->{$date} = [] unless defined $classhash->{$date}; 303 339 push (@{$classhash->{$date}}, $classification); … … 305 341 306 342 } 307 343 308 344 # only compact the list if nogroup not specified 309 345 if (!$self->{'nogroup'}) { 346 #print STDERR "compacting list\n"; 310 347 $classhash = $self->compactlist ($classhash); 311 348 } … … 346 383 } 347 384 foreach my $subsection (@subsectionlist) { 385 if ($subsection eq $self->{'invalid_date_partition_name'}) { 386 # leave this one as is 387 $compactedhash->{$subsection} = $classhashref->{$subsection}; 388 next; 389 } 390 # print STDERR "in sub $subsection\n"; 348 391 $currentfirstdate = $subsection if $currentfirstdate eq ""; 349 392 if ((scalar (@currentOIDs) < $min) ||
Note:
See TracChangeset
for help on using the changeset viewer.