Changeset 15696

Show
Ignore:
Timestamp:
26.05.2008 13:05:16 (11 years ago)
Author:
mdewsnip
Message:

(Adding new DB support) Moved the GDBM-specific stuff out of infodb() into a new write_infodb_entry_gdbm() function, in preparation for adding sqlite support.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • gsdl/trunk/perllib/basebuildproc.pm

    r15695 r15696  
    3434use doc; 
    3535use docproc; 
     36use strict; no strict 'subs'; 
    3637use util; 
    3738 
     
    330331    my $self = shift (@_); 
    331332    my ($doc_obj, $filename) = @_; 
    332     my $handle = $self->{'output_handle'}; 
    333  
     333 
     334    # only output this document if it is a "indexed_doc" or "info_doc" (database only) document 
    334335    my $doctype = $doc_obj->get_doc_type(); 
    335  
    336     # only output this document if it is a "indexed_doc" or "info_doc" (database only) document 
    337336    return if ($doctype ne "indexed_doc" && $doctype ne "info_doc"); 
    338337 
    339338    my $archivedir = ""; 
    340  
    341339    if (defined $filename) 
    342340    { 
    343341    # doc_obj derived directly from file 
    344  
    345342    my ($dir) = $filename =~ /^(.*?)(?:\/|\\)[^\/\\]*$/; 
    346343    $dir = "" unless defined $dir; 
     
    361358    } 
    362359 
    363  
    364     #GRB: moved 1/06/2004 from GRB01062004 
    365360    #add this document to the browse structure 
    366361    push(@{$self->{'doclist'}},$doc_obj->get_OID())  
     
    369364    # classify this document 
    370365    &classify::classify_doc ($self->{'classifiers'}, $doc_obj); 
    371     #GRB: end of moved block 
    372366 
    373367    # this is another document 
     
    380374    my $doc_OID = $doc_obj->get_OID(); 
    381375    my $first = 1; 
    382     my $url = ""; 
    383376 
    384377    $self->{'doc_mdprefix_fields'} = {}; 
     
    391384        $section_OID = $doc_OID . "." . $section; 
    392385    } 
     386    my %section_infodb = (); 
    393387 
    394388    # update a few statistics 
    395389    $self->{'num_bytes'} += $doc_obj->get_text_length ($section); 
    396390    $self->{'num_sections'} += 1 unless ($doctype eq "classification"); 
    397  
    398     # output the section name 
    399     print $handle "[$section_OID]\n"; 
    400391 
    401392    # output the fact that this document is a document (unless doctype 
     
    403394    my $dtype = $doc_obj->get_metadata_element ($section, "doctype"); 
    404395    if (!defined $dtype || $dtype !~ /\w/) { 
    405         print $handle "<doctype>doc\n"; 
     396        $section_infodb{"doctype"} = "doc"; 
    406397    } 
    407398 
     
    411402    # explicitly add <hastxt> as this is preserved as metadata when 
    412403    # the database file is loaded in 
    413  
    414404    if (defined $filename) 
    415405    { 
    416406        # doc_obj derived directly from file 
    417407        if ($doc_obj->get_text_length($section) > 0) { 
    418         print $handle "<hastxt>1\n"; 
     408        $section_infodb{"hastxt"} = "1"; 
    419409        } else { 
    420         print $handle "<hastxt>0\n"; 
     410        $section_infodb{"hastxt"} = "0"; 
    421411        } 
    422412    } 
     
    443433        # special case for URL metadata 
    444434        if ($field =~ /^URL$/i) { 
    445                     $url .= "[$value]\n"; 
    446             $url .= "<section>$section_OID\n"; 
    447                     $url .= '-' x 70 . "\n"; 
     435            $self->write_infodb_entry($value, { 'section'  => $section_OID }); 
    448436        } 
    449437 
    450438        if (!defined $self->{'dontdb'}->{$field}) { 
    451             print $handle "<$field>$value\n"; 
     439            $section_infodb{$field} = $value; 
    452440 
    453441            if ($section eq "") 
     
    465453        foreach my $prefix (keys %$doc_mdprefix_fields) 
    466454        { 
    467         print $handle "<metadataset>$prefix\n"; 
     455        $section_infodb{"metadataset"} = $prefix; 
    468456 
    469457        foreach my $field (keys %{$doc_mdprefix_fields->{$prefix}}) 
     
    471459            my $val = $doc_mdprefix_fields->{$prefix}->{$field}; 
    472460 
    473             print $handle "<metadatalist-$prefix>$field\n"; 
    474             print $handle "<metadatafreq-$prefix-$field>$val\n"; 
     461            $section_infodb{"metadatalist-$prefix"} = $field; 
     462            $section_infodb{"metadatafreq-$prefix-$field"} = $val; 
    475463        } 
    476  
    477464        } 
    478465    } 
     
    481468    # explicitly add <archivedir> as this is preserved as metadata when 
    482469    # the database file is loaded in 
    483  
    484470    if (defined $filename) 
    485471    { 
    486472        # output archivedir if at top level 
    487473        if ($section eq $doc_obj->get_top_section()) { 
    488         print $handle "<archivedir>$archivedir\n"; 
     474        $section_infodb{"archivedir"} = $archivedir; 
    489475        } 
    490476    } 
     
    492478    # output document display type 
    493479    if ($first) { 
    494         print $handle "<thistype>$thistype\n"; 
    495     } 
    496  
     480        $section_infodb{"thistype"} = $thistype; 
     481    } 
    497482 
    498483    if ($self->{'db_level'} eq "document") { 
    499484        # doc num is num_docs not num_sections 
    500485        # output the matching document number 
    501         print $handle "<docnum>$self->{'num_docs'}\n"; 
    502              
    503     } else { 
     486        $section_infodb{"docnum"} = $self->{'num_docs'}; 
     487    } 
     488    else { 
    504489        # output a list of children 
    505490        my $children = $doc_obj->get_children ($section); 
    506491        if (scalar(@$children) > 0) { 
    507         print $handle "<childtype>$childtype\n"; 
    508         print $handle "<contains>"; 
    509         my $firstchild = 1; 
    510         foreach my $child (@$children) { 
    511             print $handle ";" unless $firstchild; 
    512             $firstchild = 0; 
    513             if ($child =~ /^.*?\.(\d+)$/) { 
    514             print $handle "\".$1"; 
    515             } else { 
    516             print $handle "\".$child"; 
     492        $section_infodb{"childtype"} = $childtype; 
     493        my $contains = ""; 
     494        foreach my $child (@$children) 
     495        { 
     496            $contains .= ";" unless ($contains eq ""); 
     497            if ($child =~ /^.*?\.(\d+)$/) 
     498            { 
     499            $contains .= "\".$1"; 
     500            } 
     501            else { 
     502            $contains .= "\".$child"; 
    517503            } 
    518504        } 
    519         print $handle "\n"; 
    520         } 
    521         #output the matching doc number 
    522         print $handle "<docnum>$self->{'num_sections'}\n"; 
    523          
     505        $section_infodb{"contains"} = $contains; 
     506        } 
     507        # output the matching doc number 
     508        $section_infodb{"docnum"} = $self->{'num_sections'}; 
    524509    }  
    525510     
    526     print $handle '-' x 70, "\n"; 
    527  
     511    $self->write_infodb_entry($section_OID, \%section_infodb); 
    528512     
    529513    # output a database entry for the document number 
    530514    if ($self->{'db_level'} eq "document") { 
    531         print $handle "[$self->{'num_docs'}]\n"; 
    532         print $handle "<section>$doc_OID\n"; 
     515        $self->write_infodb_entry($self->{'num_docs'}, { 'section'  => $doc_OID }); 
    533516    } 
    534517    else { 
    535         print $handle "[$self->{'num_sections'}]\n"; 
    536         print $handle "<section>$section_OID\n"; 
    537     } 
    538     print $handle '-' x 70, "\n"; 
    539      
    540         # output entry for url 
    541         if ($url ne "") { 
    542             print $handle $url; 
    543         } 
     518        $self->write_infodb_entry($self->{'num_sections'}, { 'section'  => $section_OID }); 
     519    } 
    544520 
    545521    $first = 0; 
     
    547523    last if ($self->{'db_level'} eq "document"); # if no sections wanted, only add the docs 
    548524    } 
    549  
    550     #GRB01062004: see code above moved from here 
     525} 
     526 
     527 
     528sub write_infodb_entry 
     529{ 
     530    my $self = shift(@_); 
     531 
     532    $self->write_infodb_entry_gdbm(@_); 
     533} 
     534 
     535     
     536sub write_infodb_entry_gdbm 
     537{ 
     538    my $self = shift(@_); 
     539    my $infodb_key = shift(@_); 
     540    my $infodb_map = shift(@_); 
     541 
     542    my $handle = $self->{'output_handle'}; 
     543 
     544    print $handle "[$infodb_key]\n"; 
     545    foreach my $infodb_value_key (keys(%$infodb_map)) 
     546    { 
     547    print $handle "<$infodb_value_key>" . $infodb_map->{$infodb_value_key} . "\n"; 
     548    } 
     549    print $handle '-' x 70, "\n"; 
    551550} 
    552551