Changeset 23116 for main/trunk


Ignore:
Timestamp:
2010-10-12T13:55:07+13:00 (14 years ago)
Author:
kjdon
Message:

for incremental build, classifiers are not really done incrementally. Previously, we reconstructed all the docs from the database, and classified them, then processed any new/edited/deleted docs, updating the classifier as necessary. Now, we process all new/updated docs, then reconstruct the docs from the database, but only classify those not changed/deleted. This means that we are only ever adding docs to a classifier, never updating or deleting. I have removed edit_mode and all code handling deleting stuff from the classifier.

Location:
main/trunk/greenstone2/perllib/classify
Files:
15 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/classify/AZCompactList.pm

    r22038 r23116  
    184184{
    185185    my $self = shift (@_);
    186     my ($doc_obj,$edit_mode) = @_;
     186    my ($doc_obj) = @_;
    187187
    188188    my $doc_OID = $doc_obj->get_OID();
     
    215215    my $full_doc_OID
    216216        = ($thissection ne "") ? "$doc_OID.$thissection" : $doc_OID;
    217 
    218     if (($edit_mode eq "delete") || ($edit_mode eq "update")) {
    219         print $outhandle "  Deleting $full_doc_OID for AZCompactList\n";
    220         delete $self->{'list'}->{$full_doc_OID};
    221         delete $self->{'listmetavalue'}->{$full_doc_OID};
    222         delete $self->{'list_mvpair'}->{$full_doc_OID};
    223         delete $self->{'reclassify'}->{$full_doc_OID};
    224 
    225         next if ($edit_mode eq "delete");
    226     }
    227 
    228217
    229218    if (defined $self->{'list_mvpair'}->{$full_doc_OID})
  • main/trunk/greenstone2/perllib/classify/AZList.pm

    r20428 r23116  
    121121sub classify {
    122122    my $self = shift (@_);
    123     my ($doc_obj,$edit_mode) = @_;
     123    my ($doc_obj) = @_;
    124124
    125125    my $doc_OID = $doc_obj->get_OID();
    126126    my $outhandle = $self->{'outhandle'};
    127 
    128     if (($edit_mode eq "delete") || ($edit_mode eq "update")) {
    129     $self->oid_hash_delete($doc_OID,'list');
    130     return if ($edit_mode eq "delete");     
    131     }
    132127
    133128    my $metavalue;
  • main/trunk/greenstone2/perllib/classify/AZSectionList.pm

    r18455 r23116  
    7171sub classify {
    7272    my $self = shift (@_);
    73     my ($doc_obj,$edit_mode) = @_;
     73    my ($doc_obj) = @_;
    7474
    7575    my $doc_OID = $doc_obj->get_OID();
     
    7777
    7878    while (defined $thissection) {
    79     $self->classify_section ($thissection, $doc_obj, $edit_mode);
     79    $self->classify_section ($thissection, $doc_obj);
    8080    $thissection = $doc_obj->get_next_section ($thissection);
    8181    }
     
    8484sub classify_section {
    8585    my $self = shift (@_);
    86     my ($section, $doc_obj, $edit_mode) = @_;
     86    my ($section, $doc_obj) = @_;
    8787
    8888    my $doc_OID = $doc_obj->get_OID();
    89 
    90     if ($edit_mode eq "delete") {
    91     $self->oid_hash_delete("$doc_OID$section",'list');
    92     return;
    93     }
    9489
    9590    my $metavalue;
  • main/trunk/greenstone2/perllib/classify/AllList.pm

    r22220 r23116  
    7272sub classify {
    7373    my $self = shift (@_);
    74     my ($doc_obj, $edit_mode) = @_;
     74    my ($doc_obj) = @_;
    7575   
    7676    my $doc_OID = $doc_obj->get_OID();
    7777   
    78     if ($edit_mode eq "delete") {
    79     $self->oid_array_delete($doc_OID,'list');
    80    
    81     }
    82     elsif ($edit_mode eq "update") {
    83     # do nothing. Doc is already in the list. Is the order important??
    84     }
    85     else {
    86     push (@{$self->{'list'}}, $doc_OID);
    87     }
    88 
     78    push (@{$self->{'list'}}, $doc_OID);
     79   
    8980    return;
    9081}
  • main/trunk/greenstone2/perllib/classify/BaseClassifier.pm

    r20453 r23116  
    330330sub classify {
    331331    my $self = shift (@_);
    332     my ($doc_obj, $edit_mode) = @_;
     332    my ($doc_obj) = @_;
    333333
    334334    my $outhandle = $self->{'outhandle'};
  • main/trunk/greenstone2/perllib/classify/Collage.pm

    r18455 r23116  
    139139   
    140140    my $self = shift (@_);
    141     my ($doc_obj,$edit_mode) = @_;
     141    my ($doc_obj) = @_;
    142142
    143143    my $has_image_type = 0;
     
    162162    my $doc_OID = $doc_obj->get_OID();
    163163
    164     if ($edit_mode eq "delete") {
    165         $self->oid_array_delete($doc_OID,'list');
    166     }
    167     else {
    168         push (@{$self->{'list'}}, $doc_OID);
    169     }
     164    push (@{$self->{'list'}}, $doc_OID);
     165   
    170166    }
    171167
  • main/trunk/greenstone2/perllib/classify/DateList.pm

    r20828 r23116  
    133133sub classify {
    134134    my $self = shift (@_);
    135     my ($doc_obj, $edit_mode) = @_;
     135    my ($doc_obj) = @_;
    136136
    137137    my $doc_OID = $doc_obj->get_OID();
     
    151151    }
    152152   
    153     if (($edit_mode eq "delete") || ($edit_mode eq "update")) {
    154     $self->oid_hash_delete($doc_OID,'list');
    155     return if ($edit_mode eq "delete");
    156     }
    157 
    158153    my $sort_other = "";
    159154    if (defined $self->{'sort'} && $self->{'sort'} ne "") {
  • main/trunk/greenstone2/perllib/classify/HFileHierarchy.pm

    r23081 r23116  
    181181{
    182182    my $self = shift (@_);
    183     my ($doc_obj,$edit_mode,$sortmeta,$metavalues) = @_;
     183    my ($doc_obj,$sortmeta,$metavalues) = @_;
    184184
    185185    my $outhandle = $self->{'outhandle'};
     
    191191        (defined $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}})) {
    192192
    193         if ($edit_mode eq "delete") {
    194         # find it, and remove it
    195         my $existing_list = $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'};
    196        
    197         my $filtered_list = [];
    198         foreach my $existing_oid_pair (@$existing_list) {
    199             if ($existing_oid_pair->[0] eq $doc_OID) {
    200             print $outhandle "  Deleting $doc_OID for $metaelement in hierarchy\n";
    201             }
    202             else {
    203             push(@$filtered_list,$existing_oid_pair);
    204             }
    205         }
    206         $self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'} = $filtered_list;
    207         }
    208         else {
    209         push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
    210               [$doc_OID, $sortmeta]);
    211         my $localid = $self->{'descriptorlist'}->{$metaelement};
    212         my $classid = $self->get_number();
    213 
    214         $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL$classid.$localid");
    215         }
     193        push (@{$self->{'locatorlist'}->{$self->{'descriptorlist'}->{$metaelement}}->{'contents'}},
     194          [$doc_OID, $sortmeta]);
     195        my $localid = $self->{'descriptorlist'}->{$metaelement};
     196        my $classid = $self->get_number();
     197       
     198        $doc_obj->add_metadata($doc_obj->get_top_section(), "memberof", "CL$classid.$localid");
     199       
    216200    }
    217201    }
  • main/trunk/greenstone2/perllib/classify/HTML.pm

    r18455 r23116  
    8686sub classify {
    8787    my $self = shift (@_);
    88     my ($doc_obj,$edit_mode) = @_;
     88    my ($doc_obj) = @_;
    8989
    9090    # we don't do anything for individual documents
  • main/trunk/greenstone2/perllib/classify/Hierarchy.pm

    r18522 r23116  
    8686sub auto_classify {
    8787    my $self = shift (@_);
    88     my ($doc_obj,$edit_mode,$nosort,$sortmeta,$metavalues) = @_;
     88    my ($doc_obj,$nosort,$sortmeta,$metavalues) = @_;
    8989
    9090    my $doc_OID = $doc_obj->get_OID();
    91 
    92     if (($edit_mode eq "delete") || ($edit_mode eq "update")) {
    93     if ($nosort) {
    94         $self->oid_array_delete($doc_OID,'docs');
    95     }
    96     else {
    97         $self->oid_hash_delete($doc_OID,'docs');
    98     }
    99     return if ($edit_mode eq "delete");
    100     }
    10191
    10292    #Add all the metadata values to the hash
     
    143133sub classify {
    144134    my $self = shift (@_);
    145     my ($doc_obj,$edit_mode) = @_;
     135    my ($doc_obj) = @_;
    146136
    147137    my $doc_OID = $doc_obj->get_OID();
     
    185175
    186176    if (defined $self->{'subjectfile'}) {
    187     $self->hfile_classify($doc_obj,$edit_mode,$sortmeta,$metavalues);
     177    $self->hfile_classify($doc_obj,$sortmeta,$metavalues);
    188178    }
    189179    else {
    190     $self->auto_classify($doc_obj,$edit_mode,$nosort,$sortmeta,$metavalues);
     180    $self->auto_classify($doc_obj,$nosort,$sortmeta,$metavalues);
    191181    }
    192182}
  • main/trunk/greenstone2/perllib/classify/List.pm

    r22667 r23116  
    290290{
    291291    my $self = shift(@_);
    292     my ($doc_obj,$edit_mode) = @_;
     292    my ($doc_obj) = @_;
    293293
    294294    # If "-classify_sections" is set, classify every section of the document
     
    296296    my $section = $doc_obj->get_next_section($doc_obj->get_top_section());
    297297    while (defined $section) {
    298         $self->classify_section($doc_obj, $doc_obj->get_OID() . ".$section", $section, $edit_mode);
     298        $self->classify_section($doc_obj, $doc_obj->get_OID() . ".$section", $section);
    299299        $section = $doc_obj->get_next_section($section);
    300300    }
     
    302302    # Otherwise just classify the top document section
    303303    else {
    304     $self->classify_section($doc_obj, $doc_obj->get_OID(), $doc_obj->get_top_section(), $edit_mode);
     304    $self->classify_section($doc_obj, $doc_obj->get_OID(), $doc_obj->get_top_section());
    305305    }
    306306}
     
    310310{
    311311    my $self = shift(@_);
    312     my ($doc_obj,$section_OID,$section,$edit_mode) = @_;
     312    my ($doc_obj,$section_OID,$section) = @_;
    313313
    314314    my @metadata_groups = @{$self->{'metadata_groups'}};
     
    342342    # We're not classifying this section because it doesn't have the required metadata
    343343    return if (!$classify_section);
    344 
    345     if (($edit_mode eq "delete") || ($edit_mode eq "update")) {
    346     $self->oid_array_delete($section_OID,'OIDs');
    347     if ($edit_mode eq "delete") {
    348         return;
    349     }
    350     }
    351344   
    352345    # Otherwise, include this section in the classifier
     
    358351    # Take care not to do a metadata group more than once
    359352    unless ($metadata_groups_done{$metadata_group}) {
    360         if ($edit_mode eq "update") {
    361         # if we are updating, we delete all the old values before
    362         # adding the new ones, otherwise, the section will end up in
    363         # the classifier twice.
    364         delete $self->{$metadata_group . ".list"}->{$section_OID};
    365         }
    366 
    367353        my $remove_prefix_expr = $self->{$metadata_group . ".remove_prefix_expr"};
    368354        my $remove_suffix_expr = $self->{$metadata_group . ".remove_suffix_expr"};
  • main/trunk/greenstone2/perllib/classify/Phind.pm

    r22382 r23116  
    249249sub classify {
    250250    my $self = shift (@_);
    251     my ($doc_obj,$edit_mode) = @_;
     251    my ($doc_obj) = @_;
    252252
    253253    my $verbosity = $self->{'verbosity'};
     
    267267    my $phrlanguage = $self->{'language'};
    268268    return if ($doclanguage && ($doclanguage !~ /$phrlanguage/i));
    269 
    270     if ($edit_mode eq "delete") {
    271     # This classifier works quite differently to the others
    272     # Probably doesn't support incremental building anyway
    273     return;
    274     }
    275269
    276270    # record this file
  • main/trunk/greenstone2/perllib/classify/RecentDocumentsList.pm

    r20454 r23116  
    126126sub classify {
    127127    my $self = shift (@_);
    128     my ($doc_obj,$edit_mode) = @_;
     128    my ($doc_obj) = @_;
    129129
    130130    my $doc_OID = $doc_obj->get_OID();
     
    132132    if (!defined $lastmodified || $lastmodified eq "") {
    133133    print $self->{'outhandle'}, "RecentDocumentsList: $doc_OID has no lastmodified metadata, not classifying\n";
    134     return;
    135     }
    136 
    137     if ($edit_mode eq "delete") {
    138     $self->oid_hash_delete($doc_OID,'list');
    139134    return;
    140135    }
  • main/trunk/greenstone2/perllib/classify/SectionList.pm

    r18566 r23116  
    6363sub classify {
    6464    my $self = shift (@_);
    65     my ($doc_obj, $edit_mode, @options) = @_;
     65    my ($doc_obj, @options) = @_;
    6666   
    6767    # @options used by AZCompactList when is uses SectionList internally
     
    9797    if (defined $thissection) {
    9898    # just classify the one section
    99     $self->classify_section($thissection, $doc_obj, $edit_mode, $sortmeta, $nosort);
     99    $self->classify_section($thissection, $doc_obj, $sortmeta, $nosort);
    100100    } else   
    101101    {
    102102    $thissection = $doc_obj->get_next_section ($doc_obj->get_top_section());
    103103    while (defined $thissection) {
    104         $self->classify_section($thissection, $doc_obj, $edit_mode, $sortmeta, $nosort);
     104        $self->classify_section($thissection, $doc_obj, $sortmeta, $nosort);
    105105        $thissection = $doc_obj->get_next_section ($thissection);
    106106    }
     
    110110sub classify_section {
    111111    my $self = shift (@_);
    112     my ($section, $doc_obj, $edit_mode, $sortmeta, $nosort) = @_;
     112    my ($section, $doc_obj, $sortmeta, $nosort) = @_;
    113113
    114114    my $doc_OID = $doc_obj->get_OID();
  • main/trunk/greenstone2/perllib/classify/SimpleList.pm

    r20654 r23116  
    128128sub classify {
    129129    my $self = shift (@_);
    130     my ($doc_obj, $edit_mode) = @_;
     130    my ($doc_obj) = @_;
    131131
    132132    my $doc_OID = $doc_obj->get_OID();
     
    139139    }
    140140   
    141     if ($edit_mode eq "delete") {
    142     if ($nosort) {
    143         $self->oid_array_delete($doc_OID,'list');
    144     }
    145     else {
    146         $self->oid_hash_delete($doc_OID,'list');
    147     }
    148     return;
    149     }
    150 
    151141    my $metavalue;
    152142    my $metaname;
Note: See TracChangeset for help on using the changeset viewer.