Ignore:
Timestamp:
2009-02-03T09:48:19+13:00 (15 years ago)
Author:
davidb
Message:

Additions to support the deleting of documents from the index. Only works for indexers that support incremental building, e.g. lucene

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/basebuildproc.pm

    r17579 r18456  
    3535use doc;
    3636use docproc;
    37 use strict; no strict 'subs';
     37use strict;
     38no strict 'subs';
     39no strict 'refs';
    3840use util;
    3941
     
    366368
    367369
    368 sub infodb {
    369     my $self = shift (@_);
    370     my ($doc_obj, $filename) = @_;
     370sub infodbedit {
     371    my $self = shift (@_);
     372    my ($doc_obj, $filename, $edit_mode) = @_;
    371373
    372374    # only output this document if it is a "indexed_doc" or "info_doc" (database only) document
     
    396398    }
    397399
    398     #add this document to the browse structure
    399     push(@{$self->{'doclist'}},$doc_obj->get_OID())
    400     unless ($doctype eq "classification");
     400    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) {
     401    #add this document to the browse structure
     402    push(@{$self->{'doclist'}},$doc_obj->get_OID())
     403        unless ($doctype eq "classification");
     404    }
     405    else {
     406    # delete => remove this doc from browse structure
     407    my $del_doc_oid = $doc_obj->get_OID();
     408
     409    my @filtered_doc_list = ();
     410    foreach my $oid (@{$self->{'doclist'}}) {
     411        push(@filtered_doc_list,$oid) if ($oid ne $del_doc_oid);
     412    }
     413    $self->{'doclist'} = \@filtered_doc_list;
     414    }
     415
    401416
    402417    # classify this document
    403     &classify::classify_doc ($self->{'classifiers'}, $doc_obj);
    404 
    405     # this is another document
    406     $self->{'num_docs'} += 1 unless ($doctype eq "classification");
     418    &classify::classify_doc ($self->{'classifiers'}, $doc_obj, $edit_mode);
     419
     420    if (($edit_mode eq "add") || ($edit_mode eq "reindex")) {
     421    # this is another document
     422    $self->{'num_docs'} += 1 unless ($doctype eq "classification");
     423    }
     424    else {
     425    # delete
     426    $self->{'num_docs'} -= 1 unless ($doctype eq "classification");
     427    return;
     428    }
    407429
    408430    # is this a paged or a hierarchical document
     
    563585
    564586
     587
     588
     589sub infodb {
     590    my $self = shift (@_);
     591    my ($doc_obj, $filename) = @_;
     592
     593    $self->infodbedit($doc_obj,$filename,"add");
     594}
     595
     596sub infodbreindex {
     597    my $self = shift (@_);
     598    my ($doc_obj, $filename) = @_;
     599
     600    $self->infodbedit($doc_obj,$filename,"reindex");
     601}
     602
     603sub infodbdelete {
     604    my $self = shift (@_);
     605    my ($doc_obj, $filename) = @_;
     606
     607    $self->infodbedit($doc_obj,$filename,"delete");
     608}
     609
     610
    565611sub text {
    566612    my $self = shift (@_);
     
    571617    die "\n";
    572618}
     619
     620sub textreindex
     621{
     622    my $self = shift @_;
     623
     624    my $outhandle = $self->{'outhandle'};
     625    print $outhandle "basebuildproc::textreindex function must be implemented in sub classes\n";
     626    if (!$self->is_incremental_capable()) {
     627
     628    print $outhandle "  This operation is only possible with indexing tools with that support\n";
     629    print $outhandle "  incremental building\n";
     630    }
     631    die "\n";
     632}
     633
     634sub textdelete
     635{
     636    my $self = shift @_;
     637
     638    my $outhandle = $self->{'outhandle'};
     639    print $outhandle "basebuildproc::textdelete function must be implemented in sub classes\n";
     640    if (!$self->is_incremental_capable()) {
     641
     642    print $outhandle "  This operation is only possible with indexing tools with that support\n";
     643    print $outhandle "  incremental building\n";
     644    }
     645    die "\n";
     646}
     647
    573648
    574649# should the document be indexed - according to the subcollection and language
     
    692767}
    693768
    694 sub assoc_files() {
     769sub assoc_files
     770{
    695771    my $self = shift (@_);
    696772    my ($doc_obj, $archivedir) = @_;
Note: See TracChangeset for help on using the changeset viewer.