Changeset 15696


Ignore:
Timestamp:
2008-05-26T13:05:16+12:00 (16 years ago)
Author:
mdewsnip
Message:

(Adding new DB support) Moved the GDBM-specific stuff out of infodb() into a new write_infodb_entry_gdbm() function, in preparation for adding sqlite support.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/basebuildproc.pm

    r15695 r15696  
    3434use doc;
    3535use docproc;
     36use strict; no strict 'subs';
    3637use util;
    3738
     
    330331    my $self = shift (@_);
    331332    my ($doc_obj, $filename) = @_;
    332     my $handle = $self->{'output_handle'};
    333 
     333
     334    # only output this document if it is a "indexed_doc" or "info_doc" (database only) document
    334335    my $doctype = $doc_obj->get_doc_type();
    335 
    336     # only output this document if it is a "indexed_doc" or "info_doc" (database only) document
    337336    return if ($doctype ne "indexed_doc" && $doctype ne "info_doc");
    338337
    339338    my $archivedir = "";
    340 
    341339    if (defined $filename)
    342340    {
    343341    # doc_obj derived directly from file
    344 
    345342    my ($dir) = $filename =~ /^(.*?)(?:\/|\\)[^\/\\]*$/;
    346343    $dir = "" unless defined $dir;
     
    361358    }
    362359
    363 
    364     #GRB: moved 1/06/2004 from GRB01062004
    365360    #add this document to the browse structure
    366361    push(@{$self->{'doclist'}},$doc_obj->get_OID())
     
    369364    # classify this document
    370365    &classify::classify_doc ($self->{'classifiers'}, $doc_obj);
    371     #GRB: end of moved block
    372366
    373367    # this is another document
     
    380374    my $doc_OID = $doc_obj->get_OID();
    381375    my $first = 1;
    382     my $url = "";
    383376
    384377    $self->{'doc_mdprefix_fields'} = {};
     
    391384        $section_OID = $doc_OID . "." . $section;
    392385    }
     386    my %section_infodb = ();
    393387
    394388    # update a few statistics
    395389    $self->{'num_bytes'} += $doc_obj->get_text_length ($section);
    396390    $self->{'num_sections'} += 1 unless ($doctype eq "classification");
    397 
    398     # output the section name
    399     print $handle "[$section_OID]\n";
    400391
    401392    # output the fact that this document is a document (unless doctype
     
    403394    my $dtype = $doc_obj->get_metadata_element ($section, "doctype");
    404395    if (!defined $dtype || $dtype !~ /\w/) {
    405         print $handle "<doctype>doc\n";
     396        $section_infodb{"doctype"} = "doc";
    406397    }
    407398
     
    411402    # explicitly add <hastxt> as this is preserved as metadata when
    412403    # the database file is loaded in
    413 
    414404    if (defined $filename)
    415405    {
    416406        # doc_obj derived directly from file
    417407        if ($doc_obj->get_text_length($section) > 0) {
    418         print $handle "<hastxt>1\n";
     408        $section_infodb{"hastxt"} = "1";
    419409        } else {
    420         print $handle "<hastxt>0\n";
     410        $section_infodb{"hastxt"} = "0";
    421411        }
    422412    }
     
    443433        # special case for URL metadata
    444434        if ($field =~ /^URL$/i) {
    445                     $url .= "[$value]\n";
    446             $url .= "<section>$section_OID\n";
    447                     $url .= '-' x 70 . "\n";
     435            $self->write_infodb_entry($value, { 'section'  => $section_OID });
    448436        }
    449437
    450438        if (!defined $self->{'dontdb'}->{$field}) {
    451             print $handle "<$field>$value\n";
     439            $section_infodb{$field} = $value;
    452440
    453441            if ($section eq "")
     
    465453        foreach my $prefix (keys %$doc_mdprefix_fields)
    466454        {
    467         print $handle "<metadataset>$prefix\n";
     455        $section_infodb{"metadataset"} = $prefix;
    468456
    469457        foreach my $field (keys %{$doc_mdprefix_fields->{$prefix}})
     
    471459            my $val = $doc_mdprefix_fields->{$prefix}->{$field};
    472460
    473             print $handle "<metadatalist-$prefix>$field\n";
    474             print $handle "<metadatafreq-$prefix-$field>$val\n";
     461            $section_infodb{"metadatalist-$prefix"} = $field;
     462            $section_infodb{"metadatafreq-$prefix-$field"} = $val;
    475463        }
    476 
    477464        }
    478465    }
     
    481468    # explicitly add <archivedir> as this is preserved as metadata when
    482469    # the database file is loaded in
    483 
    484470    if (defined $filename)
    485471    {
    486472        # output archivedir if at top level
    487473        if ($section eq $doc_obj->get_top_section()) {
    488         print $handle "<archivedir>$archivedir\n";
     474        $section_infodb{"archivedir"} = $archivedir;
    489475        }
    490476    }
     
    492478    # output document display type
    493479    if ($first) {
    494         print $handle "<thistype>$thistype\n";
    495     }
    496 
     480        $section_infodb{"thistype"} = $thistype;
     481    }
    497482
    498483    if ($self->{'db_level'} eq "document") {
    499484        # doc num is num_docs not num_sections
    500485        # output the matching document number
    501         print $handle "<docnum>$self->{'num_docs'}\n";
    502            
    503     } else {
     486        $section_infodb{"docnum"} = $self->{'num_docs'};
     487    }
     488    else {
    504489        # output a list of children
    505490        my $children = $doc_obj->get_children ($section);
    506491        if (scalar(@$children) > 0) {
    507         print $handle "<childtype>$childtype\n";
    508         print $handle "<contains>";
    509         my $firstchild = 1;
    510         foreach my $child (@$children) {
    511             print $handle ";" unless $firstchild;
    512             $firstchild = 0;
    513             if ($child =~ /^.*?\.(\d+)$/) {
    514             print $handle "\".$1";
    515             } else {
    516             print $handle "\".$child";
     492        $section_infodb{"childtype"} = $childtype;
     493        my $contains = "";
     494        foreach my $child (@$children)
     495        {
     496            $contains .= ";" unless ($contains eq "");
     497            if ($child =~ /^.*?\.(\d+)$/)
     498            {
     499            $contains .= "\".$1";
     500            }
     501            else {
     502            $contains .= "\".$child";
    517503            }
    518504        }
    519         print $handle "\n";
    520         }
    521         #output the matching doc number
    522         print $handle "<docnum>$self->{'num_sections'}\n";
    523        
     505        $section_infodb{"contains"} = $contains;
     506        }
     507        # output the matching doc number
     508        $section_infodb{"docnum"} = $self->{'num_sections'};
    524509    }
    525510   
    526     print $handle '-' x 70, "\n";
    527 
     511    $self->write_infodb_entry($section_OID, \%section_infodb);
    528512   
    529513    # output a database entry for the document number
    530514    if ($self->{'db_level'} eq "document") {
    531         print $handle "[$self->{'num_docs'}]\n";
    532         print $handle "<section>$doc_OID\n";
     515        $self->write_infodb_entry($self->{'num_docs'}, { 'section'  => $doc_OID });
    533516    }
    534517    else {
    535         print $handle "[$self->{'num_sections'}]\n";
    536         print $handle "<section>$section_OID\n";
    537     }
    538     print $handle '-' x 70, "\n";
    539    
    540         # output entry for url
    541         if ($url ne "") {
    542             print $handle $url;
    543         }
     518        $self->write_infodb_entry($self->{'num_sections'}, { 'section'  => $section_OID });
     519    }
    544520
    545521    $first = 0;
     
    547523    last if ($self->{'db_level'} eq "document"); # if no sections wanted, only add the docs
    548524    }
    549 
    550     #GRB01062004: see code above moved from here
     525}
     526
     527
     528sub write_infodb_entry
     529{
     530    my $self = shift(@_);
     531
     532    $self->write_infodb_entry_gdbm(@_);
     533}
     534
     535   
     536sub write_infodb_entry_gdbm
     537{
     538    my $self = shift(@_);
     539    my $infodb_key = shift(@_);
     540    my $infodb_map = shift(@_);
     541
     542    my $handle = $self->{'output_handle'};
     543
     544    print $handle "[$infodb_key]\n";
     545    foreach my $infodb_value_key (keys(%$infodb_map))
     546    {
     547    print $handle "<$infodb_value_key>" . $infodb_map->{$infodb_value_key} . "\n";
     548    }
     549    print $handle '-' x 70, "\n";
    551550}
    552551
Note: See TracChangeset for help on using the changeset viewer.