Changeset 6482


Ignore:
Timestamp:
2004-01-14T12:14:32+13:00 (20 years ago)
Author:
jrm21
Message:

use the lowercase name of the metadata has the hash key, and then do a
hash lookup for each item rather than a search of the keys.
(This is MUCH faster, especially for large collections).

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/classify/AZCompactList.pm

    r6408 r6482  
    485485        }
    486486
    487         $self->{'classifiers'}->{$metavalue}
     487        # use the lower case, for speed of lookup.
     488        my $meta_lc=lc($metavalue);
     489        $self->{'classifiers'}->{$meta_lc}
    488490        = { 'classifyobj'   => $listclassobj,
    489491            'formattednode' => $formatted_node };
     
    510512
    511513    # find metavalue in list of sub-classifiers
    512     my $found = 0;
    513     my $node_name;
    514     foreach $node_name (keys %{$self->{'classifiers'}})
     514    # check if we have a key (lower case) for this metadata value
     515    my $node_name=lc($metavalue);
     516    if (exists $self->{'classifiers'}->{$node_name})
    515517    {
    516         $resafe_node_name = $node_name;
    517         # escape chars that mean something to perl...
    518         $resafe_node_name =~ s/([\\\(\)\[\]\{\}\^\$\.\+\*\?\|])/\\$1/g;
    519         if ($metavalue =~ m/^$resafe_node_name$/i)
    520         {
    521         my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
    522 
    523         ## date appears to not be used in classifier call ####
    524 
    525         if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
    526         {
    527             my $section=$1;
    528             if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
    529             $self->{'classifiers'}->{$node_name}->{'classifyobj'}
    530             ->classify($doc_obj, "Section=$section");
    531             } else { # section level
    532             # Thanks to Don Gourley for this...
    533             # classify can't handle multi-level section
    534             $self->{'classifiers'}->{$node_name}->{'classifyobj'}
    535             ->classify_section($section, $doc_obj, $date);
    536             }
    537         }
    538         else
    539         {
     518        my ($doc_obj,$date) = @{$self->{'reclassify'}->{$doc_OID}};
     519
     520        ## date appears to not be used in classifier call ####
     521
     522        if ($doc_OID =~ m/^[^\.]*\.([\d\.]+)$/)
     523        {
     524        my $section=$1;
     525        if ($self->{'doclevel'} =~ m/^top/i) { # toplevel
    540526            $self->{'classifiers'}->{$node_name}->{'classifyobj'}
    541             ->classify($doc_obj);
    542         }
    543        
    544         $found = 1;
    545         last;
    546         }
    547     }
    548    
    549     if (!$found)
    550     {
     527            ->classify($doc_obj, "Section=$section");
     528        } else { # section level
     529            # Thanks to Don Gourley for this...
     530            # classify can't handle multi-level section
     531            $self->{'classifiers'}->{$node_name}->{'classifyobj'}
     532            ->classify_section($section, $doc_obj, $date);
     533        }
     534        }
     535        else
     536        {
     537        $self->{'classifiers'}->{$node_name}->{'classifyobj'}
     538        ->classify($doc_obj);
     539        }
     540    } else { # this key is not in the hash
    551541        my $outhandle=$self->{outhandle};
    552542        print $outhandle "Warning: AZCompactList::reclassify ";
     
    730720    my $title = $self->{'reclassifylist'}->{$classification};
    731721    $title =~ s/&(.){2,4};//g; # remove any HTML special chars
    732 ### $title =~ s/^\s+//g; # remove a leading spaces
    733 ### $title =~ s/^_+//g; # remove a leading underscores
    734     $title =~ s/^\W+//g;
    735 ### $title =~ s/^(\'|\`|\"|\:|\()//g; # remove any opening punctutation
     722    $title =~ s/^\W+//g; # remove leading non-word chars
    736723
    737724# only want first character for classification
    738     $title =~ m/^(.)/; $title=$1;
     725    $title =~ m/^(.)/;
     726    if ($1) {
     727        $title=$1;
     728    } else {
     729        print STDERR "no first character found for \"$title\" - \"" .
     730        $self->{'reclassifylist'}->{$classification} . "\"\n";
     731    }
    739732    $title =~ tr/[a-z]/[A-Z]/;
    740733
Note: See TracChangeset for help on using the changeset viewer.