Changeset 23116

Show
Ignore:
Timestamp:
12.10.2010 13:55:07 (9 years ago)
Author:
kjdon
Message:

for incremental build, classifiers are not really done incrementally. Previously, we reconstructed all the docs from the database, and classified them, then processed any new/edited/deleted docs, updating the classifier as necessary. Now, we process all new/updated docs, then reconstruct the docs from the database, but only classify those not changed/deleted. This means that we are only ever adding docs to a classifier, never updating or deleting. I have removed edit_mode and all code handling deleting stuff from the classifier.

Location:
main/trunk/greenstone2/perllib/classify
Files:
15 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/AZCompactList.pm

    r22038 r23116  
    184184{ 
    185185    my $self = shift (@_); 
    186     my ($doc_obj,$edit_mode) = @_; 
     186    my ($doc_obj) = @_; 
    187187 
    188188    my $doc_OID = $doc_obj->get_OID(); 
     
    215215    my $full_doc_OID  
    216216        = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID; 
    217  
    218     if (($edit_mode eq "delete") || ($edit_mode eq "update")) { 
    219         print $outhandle "  Deleting $full_doc_OID for AZCompactList\n"; 
    220         delete $self->{'list'}->{$full_doc_OID}; 
    221         delete $self->{'listmetavalue'}->{$full_doc_OID}; 
    222         delete $self->{'list_mvpair'}->{$full_doc_OID}; 
    223         delete $self->{'reclassify'}->{$full_doc_OID}; 
    224  
    225         next if ($edit_mode eq "delete"); 
    226     } 
    227  
    228217 
    229218    if (defined $self->{'list_mvpair'}->{$full_doc_OID})  
  • main/trunk/greenstone2/perllib/classify/AZList.pm

    r20428 r23116  
    121121sub classify { 
    122122    my $self = shift (@_); 
    123     my ($doc_obj,$edit_mode) = @_; 
     123    my ($doc_obj) = @_; 
    124124 
    125125    my $doc_OID = $doc_obj->get_OID(); 
    126126    my $outhandle = $self->{'outhandle'}; 
    127  
    128     if (($edit_mode eq "delete") || ($edit_mode eq "update")) { 
    129     $self->oid_hash_delete($doc_OID,'list'); 
    130     return if ($edit_mode eq "delete");      
    131     } 
    132127 
    133128    my $metavalue; 
  • main/trunk/greenstone2/perllib/classify/AZSectionList.pm

    r18455 r23116  
    7171sub classify { 
    7272    my $self = shift (@_); 
    73     my ($doc_obj,$edit_mode) = @_; 
     73    my ($doc_obj) = @_; 
    7474 
    7575    my $doc_OID = $doc_obj->get_OID(); 
     
    7777 
    7878    while (defined $thissection) { 
    79     $self->classify_section ($thissection, $doc_obj, $edit_mode); 
     79    $self->classify_section ($thissection, $doc_obj); 
    8080    $thissection = $doc_obj->get_next_section ($thissection); 
    8181    } 
     
    8484sub classify_section { 
    8585    my $self = shift (@_); 
    86     my ($section, $doc_obj, $edit_mode) = @_; 
     86    my ($section, $doc_obj) = @_; 
    8787 
    8888    my $doc_OID = $doc_obj->get_OID(); 
    89  
    90     if ($edit_mode eq "delete") { 
    91     $self->oid_hash_delete("$doc_OID$section",'list'); 
    92     return; 
    93     } 
    9489 
    9590    my $metavalue; 
  • main/trunk/greenstone2/perllib/classify/AllList.pm

    r22220 r23116  
    7272sub classify { 
    7373    my $self = shift (@_); 
    74     my ($doc_obj, $edit_mode) = @_; 
     74    my ($doc_obj) = @_; 
    7575     
    7676    my $doc_OID = $doc_obj->get_OID(); 
    7777    
    78     if ($edit_mode eq "delete") { 
    79     $self->oid_array_delete($doc_OID,'list'); 
    80      
    81     } 
    82     elsif ($edit_mode eq "update") { 
    83     # do nothing. Doc is already in the list. Is the order important?? 
    84     } 
    85     else { 
    86     push (@{$self->{'list'}}, $doc_OID); 
    87     } 
    88  
     78    push (@{$self->{'list'}}, $doc_OID); 
     79     
    8980    return; 
    9081} 
  • main/trunk/greenstone2/perllib/classify/BaseClassifier.pm

    r20453 r23116  
    330330sub classify { 
    331331    my $self = shift (@_); 
    332     my ($doc_obj, $edit_mode) = @_; 
     332    my ($doc_obj) = @_; 
    333333 
    334334    my $outhandle = $self->{'outhandle'}; 
  • main/trunk/greenstone2/perllib/classify/Collage.pm

    r18455 r23116  
    139139    
    140140    my $self = shift (@_); 
    141     my ($doc_obj,$edit_mode) = @_; 
     141    my ($doc_obj) = @_; 
    142142 
    143143    my $has_image_type = 0; 
     
    162162    my $doc_OID = $doc_obj->get_OID(); 
    163163 
    164     if ($edit_mode eq "delete") { 
    165         $self->oid_array_delete($doc_OID,'list'); 
    166     } 
    167     else { 
    168         push (@{$self->{'list'}}, $doc_OID); 
    169     } 
     164    push (@{$self->{'list'}}, $doc_OID); 
     165     
    170166    } 
    171167 
  • main/trunk/greenstone2/perllib/classify/DateList.pm

    r20828 r23116  
    133133sub classify { 
    134134    my $self = shift (@_); 
    135     my ($doc_obj, $edit_mode) = @_; 
     135    my ($doc_obj) = @_; 
    136136 
    137137    my $doc_OID = $doc_obj->get_OID(); 
     
    151151    } 
    152152     
    153     if (($edit_mode eq "delete") || ($edit_mode eq "update")) { 
    154     $self->oid_hash_delete($doc_OID,'list'); 
    155     return if ($edit_mode eq "delete"); 
    156     } 
    157  
    158153    my $sort_other = ""; 
    159154    if (defined $self->{'sort'} && $self->{'sort'} ne "") { 
  • main/trunk/greenstone2/perllib/classify/HFileHierarchy.pm

    r23081 r23116  
    181181{ 
    182182    my $self = shift (@_); 
    183     my ($doc_obj,$edit_mode,$sortmeta,$metavalues) = @_; 
     183    my ($doc_obj,$sortmeta,$metavalues) = @_; 
    184184 
    185185    my $outhandle = $self->{'outhandle'}; 
     
    191191        (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) { 
    192192 
    193         if ($edit_mode eq "delete") { 
    194         # find it, and remove it 
    195         my $existing_list = $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}; 
    196          
    197         my $filtered_list = []; 
    198         foreach my $existing_oid_pair (@$existing_list) { 
    199             if ($existing_oid_pair->[0] eq $doc_OID) { 
    200             print $outhandle "  Deleting $doc_OID for $metaelement in hierarchy\n"; 
    201             } 
    202             else { 
    203             push(@$filtered_list,$existing_oid_pair); 
    204             } 
    205         } 
    206         $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'} = $filtered_list; 
    207         } 
    208         else { 
    209         push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},  
    210               [$doc_OID, $sortmeta]); 
    211         my $localid = $self->{'descriptorlist'}->{$metaelement}; 
    212         my $classid = $self->get_number(); 
    213  
    214         $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL$classid.$localid"); 
    215         } 
     193        push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},  
     194          [$doc_OID, $sortmeta]); 
     195        my $localid = $self->{'descriptorlist'}->{$metaelement}; 
     196        my $classid = $self->get_number(); 
     197         
     198        $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL$classid.$localid"); 
     199         
    216200    } 
    217201    } 
  • main/trunk/greenstone2/perllib/classify/HTML.pm

    r18455 r23116  
    8686sub classify { 
    8787    my $self = shift (@_); 
    88     my ($doc_obj,$edit_mode) = @_; 
     88    my ($doc_obj) = @_; 
    8989 
    9090    # we don't do anything for individual documents 
  • main/trunk/greenstone2/perllib/classify/Hierarchy.pm

    r18522 r23116  
    8686sub auto_classify { 
    8787    my $self = shift (@_); 
    88     my ($doc_obj,$edit_mode,$nosort,$sortmeta,$metavalues) = @_; 
     88    my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_; 
    8989 
    9090    my $doc_OID = $doc_obj->get_OID(); 
    91  
    92     if (($edit_mode eq "delete") || ($edit_mode eq "update")) { 
    93     if ($nosort) { 
    94         $self->oid_array_delete($doc_OID,'docs'); 
    95     } 
    96     else { 
    97         $self->oid_hash_delete($doc_OID,'docs'); 
    98     } 
    99     return if ($edit_mode eq "delete"); 
    100     } 
    10191 
    10292    #Add all the metadata values to the hash 
     
    143133sub classify { 
    144134    my $self = shift (@_); 
    145     my ($doc_obj,$edit_mode) = @_; 
     135    my ($doc_obj) = @_; 
    146136 
    147137    my $doc_OID = $doc_obj->get_OID(); 
     
    185175 
    186176    if (defined $self->{'subjectfile'}) { 
    187     $self->hfile_classify($doc_obj,$edit_mode,$sortmeta,$metavalues); 
     177    $self->hfile_classify($doc_obj,$sortmeta,$metavalues); 
    188178    } 
    189179    else { 
    190     $self->auto_classify($doc_obj,$edit_mode,$nosort,$sortmeta,$metavalues); 
     180    $self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues); 
    191181    } 
    192182} 
  • main/trunk/greenstone2/perllib/classify/List.pm

    r22667 r23116  
    290290{ 
    291291    my $self = shift(@_); 
    292     my ($doc_obj,$edit_mode) = @_; 
     292    my ($doc_obj) = @_; 
    293293 
    294294    # If "-classify_sections" is set, classify every section of the document 
     
    296296    my $section = $doc_obj->get_next_section($doc_obj->get_top_section()); 
    297297    while (defined $section) { 
    298         $self->classify_section($doc_obj, $doc_obj->get_OID() . ".$section", $section, $edit_mode); 
     298        $self->classify_section($doc_obj, $doc_obj->get_OID() . ".$section", $section); 
    299299        $section = $doc_obj->get_next_section($section); 
    300300    } 
     
    302302    # Otherwise just classify the top document section 
    303303    else { 
    304     $self->classify_section($doc_obj, $doc_obj->get_OID(), $doc_obj->get_top_section(), $edit_mode); 
     304    $self->classify_section($doc_obj, $doc_obj->get_OID(), $doc_obj->get_top_section()); 
    305305    } 
    306306} 
     
    310310{ 
    311311    my $self = shift(@_); 
    312     my ($doc_obj,$section_OID,$section,$edit_mode) = @_; 
     312    my ($doc_obj,$section_OID,$section) = @_; 
    313313 
    314314    my @metadata_groups = @{$self->{'metadata_groups'}}; 
     
    342342    # We're not classifying this section because it doesn't have the required metadata 
    343343    return if (!$classify_section); 
    344  
    345     if (($edit_mode eq "delete") || ($edit_mode eq "update")) { 
    346     $self->oid_array_delete($section_OID,'OIDs'); 
    347     if ($edit_mode eq "delete") { 
    348         return; 
    349     } 
    350     } 
    351344    
    352345    # Otherwise, include this section in the classifier 
     
    358351    # Take care not to do a metadata group more than once 
    359352    unless ($metadata_groups_done{$metadata_group}) { 
    360         if ($edit_mode eq "update") { 
    361         # if we are updating, we delete all the old values before  
    362         # adding the new ones, otherwise, the section will end up in  
    363         # the classifier twice. 
    364         delete $self->{$metadata_group . ".list"}->{$section_OID}; 
    365         } 
    366  
    367353        my $remove_prefix_expr = $self->{$metadata_group . ".remove_prefix_expr"}; 
    368354        my $remove_suffix_expr = $self->{$metadata_group . ".remove_suffix_expr"}; 
  • main/trunk/greenstone2/perllib/classify/Phind.pm

    r22382 r23116  
    249249sub classify { 
    250250    my $self = shift (@_); 
    251     my ($doc_obj,$edit_mode) = @_; 
     251    my ($doc_obj) = @_; 
    252252 
    253253    my $verbosity = $self->{'verbosity'}; 
     
    267267    my $phrlanguage = $self->{'language'}; 
    268268    return if ($doclanguage && ($doclanguage !~ /$phrlanguage/i)); 
    269  
    270     if ($edit_mode eq "delete") { 
    271     # This classifier works quite differently to the others 
    272     # Probably doesn't support incremental building anyway 
    273     return; 
    274     } 
    275269 
    276270    # record this file 
  • main/trunk/greenstone2/perllib/classify/RecentDocumentsList.pm

    r20454 r23116  
    126126sub classify { 
    127127    my $self = shift (@_); 
    128     my ($doc_obj,$edit_mode) = @_; 
     128    my ($doc_obj) = @_; 
    129129 
    130130    my $doc_OID = $doc_obj->get_OID(); 
     
    132132    if (!defined $lastmodified || $lastmodified eq "") { 
    133133    print $self->{'outhandle'}, "RecentDocumentsList: $doc_OID has no lastmodified metadata, not classifying\n"; 
    134     return; 
    135     } 
    136  
    137     if ($edit_mode eq "delete") { 
    138     $self->oid_hash_delete($doc_OID,'list'); 
    139134    return; 
    140135    } 
  • main/trunk/greenstone2/perllib/classify/SectionList.pm

    r18566 r23116  
    6363sub classify { 
    6464    my $self = shift (@_); 
    65     my ($doc_obj, $edit_mode, @options) = @_; 
     65    my ($doc_obj, @options) = @_; 
    6666     
    6767    # @options used by AZCompactList when is uses SectionList internally 
     
    9797    if (defined $thissection) { 
    9898    # just classify the one section 
    99     $self->classify_section($thissection, $doc_obj, $edit_mode, $sortmeta, $nosort); 
     99    $self->classify_section($thissection, $doc_obj, $sortmeta, $nosort); 
    100100    } else     
    101101    { 
    102102    $thissection = $doc_obj->get_next_section ($doc_obj->get_top_section()); 
    103103    while (defined $thissection) { 
    104         $self->classify_section($thissection, $doc_obj, $edit_mode, $sortmeta, $nosort); 
     104        $self->classify_section($thissection, $doc_obj, $sortmeta, $nosort); 
    105105        $thissection = $doc_obj->get_next_section ($thissection); 
    106106    } 
     
    110110sub classify_section { 
    111111    my $self = shift (@_); 
    112     my ($section, $doc_obj, $edit_mode, $sortmeta, $nosort) = @_; 
     112    my ($section, $doc_obj, $sortmeta, $nosort) = @_; 
    113113 
    114114    my $doc_OID = $doc_obj->get_OID(); 
  • main/trunk/greenstone2/perllib/classify/SimpleList.pm

    r20654 r23116  
    128128sub classify { 
    129129    my $self = shift (@_); 
    130     my ($doc_obj, $edit_mode) = @_; 
     130    my ($doc_obj) = @_; 
    131131 
    132132    my $doc_OID = $doc_obj->get_OID(); 
     
    139139    } 
    140140     
    141     if ($edit_mode eq "delete") { 
    142     if ($nosort) { 
    143         $self->oid_array_delete($doc_OID,'list'); 
    144     } 
    145     else { 
    146         $self->oid_hash_delete($doc_OID,'list'); 
    147     } 
    148     return; 
    149     } 
    150  
    151141    my $metavalue; 
    152142    my $metaname;