Changeset 34109 for main


Ignore:
Timestamp:
2020-05-04T08:39:22+12:00 (4 years ago)
Author:
kjdon
Message:

tidied this up a bit. Now we leave in _textmonth00_ if the month is undefined. in gs2 this is resolved to empty string, in gs3 I have added 'undefined' string. when classifying the doc, sanity check the date rather than doing it later on, in multiple places in the code. if we are classifying bymonth and the month is absent or invalid, then set it to 00. no longer allows dates that don't at least start with yyyy.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/DateList.pm

    r33902 r34109  
    2828# date is assumed to be in the form yyyymmdd
    2929
    30 # at present dates are split by year - this should change
    31 # jrm21 - added option "bymonth", which splits by year and month.
    32 
    33 # 23/09/03 Added some more options -kjdon.
    34 # these include:
     30# options
     31# -bymonth: splits by month as well as year
    3532# -nogroup, which makes each year (or year+month) an individual entry in
    3633# the horizontal list and prevents compaction
     
    111108    $self->{'sort'} = $self->strip_ex_from_metadata($self->{'sort'});
    112109 
    113     # now can have comma separated list of Dates - we just use the first one (for now)
     110    # now can have comma separated list of Dates - we just use the first value found (for now)
    114111    my @meta_list = split(/,/, $self->{"metadata"});
    115112    $self->{'meta_list'} = \@meta_list;
     
    136133
    137134    my $doc_OID = $doc_obj->get_OID();
    138 
     135    my $outhandle = $self->{'outhandle'};
     136    my $verbosity = $self->{'verbosity'};
    139137    # find the first available metadata
    140138    my $date;
     
    147145    # if this document doesn't contain Date element we won't
    148146    # include it in this classification
     147    print $outhandle "DateList: $doc_OID has no date, not including it\n" if $verbosity >=2;
    149148    return;
    150149    }
    151150
     151    # sanity check date
     152    if ($date !~ /^\d\d\d\d.*/) {
     153    print $outhandle "DateList: $doc_OID date: '$date' malformed: expected it to start with yyyy; not classifying\n" if $verbosity >=2;
     154    return;
     155    }
     156    if ($self->{'bymonth'}) {
     157    # check that we have valid month - if not, set it to 00 == undefined
     158    if ($date !~ /^\d\d\d\d-?\d\d/) {
     159    print $outhandle "DateList $doc_OID date: '$date' has no month (expecting yyyymm... or yyyy-mm...), setting date to yyyy-00\n" if $verbosity >=2;
     160    $date =~ s/^(\d\d\d\d).*$/$1-00/;
     161    } else {
     162        my ($year, $month) = $date =~ /^(\d\d\d\d)-?(\d\d)/;
     163        if ($month > 12) {
     164        print $outhandle "DateList $doc_OID date: '$date' has invalid month, setting date to $year-00\n" if $verbosity >=2;
     165        $date = "$year-00";
     166        }
     167    }
     168    }
     169   
    152170    my $sort_other = "";
    153171    if (defined $self->{'sort'} && $self->{'sort'} ne "") {
     
    157175   
    158176    if (defined $self->{'list'}->{$doc_OID}) {
    159     my $outhandle = $self->{'outhandle'};
    160     print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID\n";
     177   
     178    print $outhandle "WARNING: DateList::classify called multiple times for $doc_OID, overwriting previous stored date value ($self->{'list'}->{$doc_OID}) with $date$sort_other \n";
    161179    }
    162        
     180   
    163181    $self->{'list'}->{$doc_OID} = "$date$sort_other";
    164182
     
    218236    }
    219237
     238   
    220239
    221240    if ($self->{'bymonth'}) {
     
    227246        my $date = $self->{'list'}->{$classification};
    228247        $date =~ s/^(\d\d\d\d)-?(\d\d).*$/$1 _textmonth$2_/;
    229         # sanity check if month is zero
    230         if ($date =~ /00_$/) {
    231             $date =~ s/^(\d\d\d\d).*$/$1/g;
    232         }
    233248        $classhash->{$date} = [] unless defined $classhash->{$date};
    234249        push (@{$classhash->{$date}}, $classification);
     
    240255        $date =~ s/^(\d\d\d\d)-?(\d\d).*$/$1 _textmonth$2_/;
    241256        my ($year, $month)=($1,$2);
    242         # sanity check if month is zero
    243         if ($date =~ /00_$/) {
    244             $date =~ s/^(\d\d\d\d).*$/$1/g;
    245         }
    246257        # create subclass if it doesn't already exist
    247258        $classhash->{$year} = () unless defined $classhash->{$year};
     
    268279          foreach my $subsubclass (@subsubclasslist) {
    269280              my $monthname=$subsubclass;
    270               if ($monthname >= 1 && $monthname <= 12) {
    271               $monthname="_textmonth" . $monthname . "_";
    272               }
     281              $monthname="_textmonth" . $monthname . "_";
    273282              my $monthclassify=$self->get_entry($monthname, $self->{'childtype'});
    274283              push (@{$yearclassify->{'contains'}}, $monthclassify);
Note: See TracChangeset for help on using the changeset viewer.