Changeset 28637


Ignore:
Timestamp:
2013-11-19T11:10:11+13:00 (10 years ago)
Author:
kjdon
Message:

added an extra field to the database: group-position. When we are processing documents into grouped doc.xml files, then this field will give the position in the doc.xml file, starting with the first document at 1. When we are reading the database to find the list of files to process for indexing, we must not process items where the group-position is > 1 - we have already seen this doc.xml file once, don't process it again.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/perllib/arcinfo.pm

    r28211 r28637  
    4040use constant INFO_STATUS_INDEX  => 1;
    4141
     42use constant INFO_GROUPPOS_INDEX  => 3;
    4243use strict;
    4344
     
    112113    my ($index_status) = ($vals=~/^<index-status>(.*)$/m);
    113114    my ($sortmeta) = ($vals=~/^<sort-meta>(.*)$/m);
    114     $self->add_info ($oid,$doc_file,$index_status,$sortmeta);
     115    my ($group_position) = ($vals=~/^<group-position>(.*)$/m);
     116    $self->add_info ($oid,$doc_file,$index_status,$sortmeta, $group_position);
    115117    }
    116118}
     
    268270    my $self = shift (@_);
    269271    my ($filename) = @_;
    270 
    271272    if ($filename =~ m/(contents)|(\.inf)$/) {
    272273    $self->_save_info_txt($filename);
     
    298299sub add_info {
    299300    my $self = shift (@_);
    300     my ($OID, $doc_file, $index_status, $sortmeta) = @_;
     301    my ($OID, $doc_file, $index_status, $sortmeta, $group_position) = @_;
    301302    $sortmeta = "" unless defined $sortmeta;
    302303    $index_status = "I" unless defined $index_status; # I = needs indexing
    303 
    304304    if (! defined($OID)) {
    305305    # only happens when no files can be processed?
     
    331331    }
    332332
    333     $self->{'info'}->{$OID} = [$doc_file,$index_status,$sortmeta];
     333    $self->{'info'}->{$OID} = [$doc_file,$index_status,$sortmeta, $group_position];
    334334    push (@{$self->{'order'}}, [$OID, $sortmeta]); # ORDER_OID_INDEX and ORDER_SORT_INDEX
    335335
     
    364364}
    365365
    366 
     366sub get_group_position {
     367    my $self = shift (@_);
     368    my ($OID) = @_;
     369
     370    my $group_position = undef;
     371    my $OID_info = $self->{'info'}->{$OID};
     372    if (defined $OID_info) {
     373    $group_position = $OID_info->[INFO_GROUPPOS_INDEX];
     374    }
     375    else {
     376    die "Unable to find document id $OID\n";
     377    }
     378    return $group_position;
     379   
     380}
    367381sub add_reverseinfo {
    368382    my $self = shift (@_);
     
    466480
    467481
    468 # returns a list of the form [doc_file,index_status,$sort_meta]
     482# returns a list of the form [doc_file,index_status,$sort_meta, $group_position]
    469483sub get_info {
    470484    my $self = shift (@_);
Note: See TracChangeset for help on using the changeset viewer.