Changeset 28637

Show
Ignore:
Timestamp:
19.11.2013 11:10:11 (6 years ago)
Author:
kjdon
Message:

added an extra field to the database: group-position. When we are processing documents into grouped doc.xml files, then this field will give the position in the doc.xml file, starting with the first document at 1. When we are reading the database to find the list of files to process for indexing, we must not process items where the group-position is > 1 - we have already seen this doc.xml file once, don't process it again.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/arcinfo.pm

    r28211 r28637  
    4040use constant INFO_STATUS_INDEX  => 1; 
    4141 
     42use constant INFO_GROUPPOS_INDEX  => 3; 
    4243use strict; 
    4344 
     
    112113    my ($index_status) = ($vals=~/^<index-status>(.*)$/m); 
    113114    my ($sortmeta) = ($vals=~/^<sort-meta>(.*)$/m); 
    114     $self->add_info ($oid,$doc_file,$index_status,$sortmeta); 
     115    my ($group_position) = ($vals=~/^<group-position>(.*)$/m); 
     116    $self->add_info ($oid,$doc_file,$index_status,$sortmeta, $group_position); 
    115117    } 
    116118} 
     
    268270    my $self = shift (@_); 
    269271    my ($filename) = @_; 
    270  
    271272    if ($filename =~ m/(contents)|(\.inf)$/) { 
    272273    $self->_save_info_txt($filename); 
     
    298299sub add_info { 
    299300    my $self = shift (@_); 
    300     my ($OID, $doc_file, $index_status, $sortmeta) = @_; 
     301    my ($OID, $doc_file, $index_status, $sortmeta, $group_position) = @_; 
    301302    $sortmeta = "" unless defined $sortmeta; 
    302303    $index_status = "I" unless defined $index_status; # I = needs indexing 
    303  
    304304    if (! defined($OID)) { 
    305305    # only happens when no files can be processed? 
     
    331331    } 
    332332 
    333     $self->{'info'}->{$OID} = [$doc_file,$index_status,$sortmeta]; 
     333    $self->{'info'}->{$OID} = [$doc_file,$index_status,$sortmeta, $group_position]; 
    334334    push (@{$self->{'order'}}, [$OID, $sortmeta]); # ORDER_OID_INDEX and ORDER_SORT_INDEX 
    335335 
     
    364364} 
    365365 
    366  
     366sub get_group_position { 
     367    my $self = shift (@_); 
     368    my ($OID) = @_; 
     369 
     370    my $group_position = undef; 
     371    my $OID_info = $self->{'info'}->{$OID}; 
     372    if (defined $OID_info) { 
     373    $group_position = $OID_info->[INFO_GROUPPOS_INDEX]; 
     374    } 
     375    else { 
     376    die "Unable to find document id $OID\n"; 
     377    } 
     378    return $group_position; 
     379     
     380} 
    367381sub add_reverseinfo { 
    368382    my $self = shift (@_); 
     
    466480 
    467481 
    468 # returns a list of the form [doc_file,index_status,$sort_meta] 
     482# returns a list of the form [doc_file,index_status,$sort_meta, $group_position] 
    469483sub get_info { 
    470484    my $self = shift (@_);