Ignore:
Timestamp:
1999-12-13T16:51:57+13:00 (24 years ago)
Author:
davidb
Message:

Object modified to have basedoc

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/doc.pm

    r537 r832  
    2828package doc;
    2929
    30 use unicode;
    31 use util;
    32 use html;
     30use basedoc;
     31
     32BEGIN {
     33    @ISA = ('basedoc');
     34}
    3335
    3436# the document type may be indexed_doc, nonindexed_doc, or
     
    3941    my ($source_filename, $doc_type) = @_;
    4042   
    41     my $self = bless {'associated_files'=>[],
    42               'subsection_order'=>[],
    43               'next_subsection'=>1,
    44               'subsections'=>{},
    45               'metadata'=>[],
    46               'text'=>""}, $class;
    47 
     43    my $self = new basedoc();
    4844
    4945#    $self->set_source_filename ($source_filename) if defined $source_filename;
     
    5450    if defined $doc_type;
    5551
     52    bless($self,$class);
    5653    return $self;
    5754}
    5855
    59 sub set_source_filename {
    60     my $self = shift (@_);
    61     my ($source_filename) = @_;
    62 
    63     $self->set_metadata_element ($self->get_top_section(),
    64                  "gsdlsourcefilename",
    65                  $source_filename);
    66 }
    67 
    68 # returns the source_filename as it was provided
    69 sub get_source_filename {
    70     my $self = shift (@_);
    71 
    72     return $self->get_metadata_element ($self->get_top_section(), "gsdlsourcefilename");
    73 }
    74 
    75 sub set_doc_type {
    76     my $self = shift (@_);
    77     my ($doc_type) = @_;
    78 
    79     $self->set_metadata_element ($self->get_top_section(),
    80                  "gsdldoctype",
    81                  $doc_type);
    82 }
    83 
    84 # returns the source_filename as it was provided
    85 # the default of "indexed_doc" is used if no document
    86 # type was provided
    87 sub get_doc_type {
    88     my $self = shift (@_);
    89 
    90     my $doc_type = $self->get_metadata_element ($self->get_top_section(), "gsdldoctype");
    91     return $doc_type if (defined $doc_type);
    92     return "indexed_doc";
    93 }
    94 
    95 sub _escape_text {
    96     my ($text) = @_;
    97 
    98     # special characters in the gml encoding
    99     $text =~ s/&/&/g; # this has to be first...
    100     $text =~ s/</&lt;/g;
    101     $text =~ s/>/&gt;/g;
    102     $text =~ s/\"/&quot;/g;
    103 
    104     return $text;
    105 }
    106 
    107 sub output_section {
    108     my $self = shift (@_);
    109     my ($handle, $section, $suppress_subject_info) = @_;
    110     $suppress_subject_info = 0 unless defined $suppress_subject_info;
    111     my ($data, $subsection);
    112    
    113     my $section_ptr = $self->_lookup_section ($section);
    114     my ($section_num) = $section =~ /(\d+)$/;
    115    
    116     return unless defined $section_ptr;
    117 
    118     # output the section header (including the section number
    119     # and metadata)
    120     print $handle "<gsdlsection";
    121     print $handle " gsdlnum=\"$section_num\"" if defined $section_num;
    122     foreach $data (@{$section_ptr->{'metadata'}}) {
    123     print $handle " $data->[0]=\"" . &_escape_text($data->[1]) . "\""
    124         unless $suppress_subject_info && $data->[0] eq "Subject";
    125     }
    126     print $handle ">";
    127 
    128     # output the text
    129     print $handle &_escape_text($section_ptr->{'text'});
    130 
    131     # output all the subsections
    132     foreach $subsection (@{$section_ptr->{'subsection_order'}}) {
    133     $self->output_section($handle, "$section.$subsection", $suppress_subject_info);
    134     }
    135    
    136     # output the closing tag
    137     print $handle "</gsdlsection>\n";
    138 }
    139 
    140 # look up the reference to the a particular section
    141 sub _lookup_section {
    142     my $self = shift (@_);
    143     my ($section) = @_;
    144 
    145     my ($num);
    146     my $sectionref = $self;
    147 
    148     while (defined $section && $section ne "") {
    149     ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
    150     $num =~ s/^0+(\d)/$1/; # remove leading 0s
    151     $section = "" unless defined $section;
    152    
    153     if (defined $num && defined $sectionref->{'subsections'}->{$num}) {
    154         $sectionref = $sectionref->{'subsections'}->{$num};
    155     } else {
    156         return undef;
    157     }
    158     }
    159    
    160     return $sectionref;
    161 }
    16256
    16357sub _calc_OID {
    164     my $self = shift (@_);
    165     my ($filename) = @_;
    166 
    167     my $osexe = &util::get_os_exe();
    168 
    169     my $hashfile_exe = &util::filename_cat($ENV{'GSDLHOME'},"bin",
    170                        $ENV{'GSDLOS'},"hashfile$osexe");
    171     my $result = "NULL";
    172 
    173     if (-e "$hashfile_exe") {
    174     $result = `$hashfile_exe \"$filename\"`;
    175     ($result) = $result =~ /:\s*([0-9a-f]+)/i;
    176 
    177     } else {
    178     print STDERR "doc::_calc_OID $hashfile_exe could not be found\n";
    179     }
    180 
    181     return "HASH$result";
    182 }
    183 
    184 # methods dealing with OID, not groups of them.
    185 
    186 # if $OID is not provided one is calculated from hashing the
    187 # current contents of the document
    188 # An OID are actually stored as metadata of the document
    189 sub set_OID {
    190     my $self = shift (@_);
    191     my ($OID) = @_;
    192 
    193     # if an OID wasn't provided then feed this document to
    194     # hashfile.exe
    195     if (!defined $OID) {
    196     $OID = "NULL";
    197     my $tmp_filename = &util::get_tmp_filename();
    198     if (!open (OUTFILE, ">$tmp_filename")) {
    199         print STDERR "doc::set_OID could not write to $tmp_filename\n";
    200     } else {
    201         $self->output_section('OUTFILE', $self->get_top_section(), 1);
    202         close (OUTFILE);
    203 
    204         $OID = $self->_calc_OID ($tmp_filename);
    205         &util::rm ($tmp_filename);
    206     }
    207     }
    208 
    209     $self->set_metadata_element ($self->get_top_section(), "Identifier", $OID);
    210 }
    211 
    212 # returns the OID for this document
    213 sub get_OID {
    214     my $self = shift (@_);
    215     my $OID = $self->get_metadata_element ($self->get_top_section(), "Identifier");
    216     return $OID if (defined $OID);
    217     return "NULL";
    218 }
    219 
    220 sub delete_OID {
    221     my $self = shift (@_);
    222    
    223     $self->set_metadata_element ($self->get_top_section(), "Identifier", "NULL");
    224 }
    225 
    226 
    227 # methods for manipulating section names
    228 
    229 # returns the name of the top-most section (the top
    230 # level of the document
    231 sub get_top_section {
    232     my $self = shift (@_);
    233    
    234     return "";
    235 }
    236 
    237 # returns a section
    238 sub get_parent_section {
    239     my $self = shift (@_);
    240     my ($section) = @_;
    241 
    242     $section =~ s/(^|\.)\d+$//;
    243 
    244     return $section;
    245 }
    246 
    247 # returns the first child section (or the end child
    248 # if there isn't any)
    249 sub get_begin_child {
    250     my $self = shift (@_);
    251     my ($section) = @_;
    252 
    253     my $section_ptr = $self->_lookup_section($section);
    254     return "" unless defined $section_ptr;
    255 
    256     if (defined $section_ptr->{'subsection_order'}->[0]) {
    257     return "$section.$section_ptr->{'subsection_order'}->[0]";
    258     }
    259 
    260     return $self->get_end_child ($section);
    261 }
    262 
    263 # returns the next child of a parent section
    264 sub get_next_child {
    265     my $self = shift (@_);
    266     my ($section) = @_;
    267    
    268     my $parent_section = $self->get_parent_section($section);
    269     my $parent_section_ptr = $self->_lookup_section($parent_section);
    270     return undef unless defined $parent_section_ptr;
    271 
    272     my ($section_num) = $section =~ /(\d+)$/;
    273     return undef unless defined $section_num;
    274 
    275     my $i = 0;
    276     my $section_order = $parent_section_ptr->{'subsection_order'};
    277     while ($i < scalar(@$section_order)) {
    278     last if $section_order->[$i] eq $section_num;
    279     $i++;
    280     }
    281 
    282     $i++; # the next child
    283     if ($i < scalar(@$section_order)) {
    284     return $section_order->[$i] if $parent_section eq "";
    285     return "$parent_section.$section_order->[$i]";
    286     }
    287 
    288     # no more sections in this level
    289     return undef;
    290 }
    291 
    292 # returns a reference to a list of children
    293 sub get_children {
    294     my $self = shift (@_);
    295     my ($section) = @_;
    296 
    297     my $section_ptr = $self->_lookup_section($section);
    298     return [] unless defined $section_ptr;
    299 
    300     my @children = @{$section_ptr->{'subsection_order'}};
    301 
    302     map {$_ = "$section.$_"; $_ =~ s/^\.+//;} @children;
    303     return \@children;
    304 }
    305 
    306 # returns the child section one past the last one (which
    307 # is coded as "0")
    308 sub get_end_child {
    309     my $self = shift (@_);
    310     my ($section) = @_;
    311 
    312     return $section . ".0" unless $section eq "";
    313     return "0";
    314 }
    315 
    316 # returns the next section in book order
    317 sub get_next_section {
    318     my $self = shift (@_);
    319     my ($section) = @_;
    320 
    321     return undef unless defined $section;
    322 
    323     my $section_ptr = $self->_lookup_section($section);
    324     return undef unless defined $section_ptr;
    325 
    326     # first try to find first child
    327     if (defined $section_ptr->{'subsection_order'}->[0]) {
    328     return $section_ptr->{'subsection_order'}->[0] if ($section eq "");
    329     return "$section.$section_ptr->{'subsection_order'}->[0]";
    330     }
    331 
    332     do {
    333     # try to find sibling
    334     my $next_child = $self->get_next_child ($section);
    335     return $next_child if (defined $next_child);
    336 
    337     # move up one level
    338     $section = $self->get_parent_section ($section);
    339     } while $section =~ /\d/;
    340 
    341     return undef;
    342 }
    343 
    344 sub is_leaf_section {
    345     my $self = shift (@_);
    346     my ($section) = @_;
    347 
    348     my $section_ptr = $self->_lookup_section($section);
    349     return 1 unless defined $section_ptr;
    350 
    351     return (scalar (@{$section_ptr->{'subsection_order'}}) == 0);
    352 }
    353 
    354 # methods for dealing with sections
    355 
    356 # returns the name of the inserted section
    357 sub insert_section {
    358     my $self = shift (@_);
    359     my ($before_section) = @_;
    360 
    361     # get the child to insert before and its parent section
    362     my $parent_section = "";
    363     my $before_child = "0";
    364     my @before_section = split (/\./, $before_section);
    365     if (scalar(@before_section) > 0) {
    366     $before_child = pop (@before_section);
    367     $parent_section = join (".", @before_section);
    368     }
    369 
    370     my $parent_section_ptr = $self->_lookup_section($parent_section);
    371     if (!defined $parent_section_ptr) {
    372     print STDERR "doc::insert_section couldn't find parent section " .
    373         "$parent_section\n";
    374     return;
    375     }
    376 
    377     # get the next section number
    378     my $section_num = $parent_section_ptr->{'next_subsection'}++;
    379 
    380     my $i = 0;
    381     while ($i < scalar(@{$parent_section_ptr->{'subsection_order'}}) &&
    382        $parent_section_ptr->{'subsection_order'}->[$i] ne $before_child) {
    383     $i++;
    384     }
    385    
    386     # insert the section number into the order list
    387     splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 0, $section_num);
    388 
    389     # add this section to the parent section
    390     my $section_ptr = {'subsection_order'=>[],
    391                'next_subsection'=>1,
    392                'subsections'=>{},
    393                'metadata'=>[],
    394                'text'=>""};
    395     $parent_section_ptr->{'subsections'}->{$section_num} = $section_ptr;
    396 
    397     # work out the full section number
    398     my $section = $parent_section;
    399     $section .= "." unless $section eq "";
    400     $section .= $section_num;
    401    
    402     return $section;
    403 }
    404 
    405 # creates a pre-named section
    406 sub create_named_section {
    407     my $self = shift (@_);
    408     my ($mastersection) = @_;
    409 
    410     my ($num);
    411     my $section = $mastersection;
    412     my $sectionref = $self;
    413 
    414     while ($section ne "") {
    415     ($num, $section) = $section =~ /^\.?(\d+)(.*)$/;
    416     $num =~ s/^0+(\d)/$1/; # remove leading 0s
    417     $section = "" unless defined $section;
    418    
    419     if (defined $num) {
    420         if (!defined $sectionref->{'subsections'}->{$num}) {
    421         push (@{$sectionref->{'subsection_order'}}, $num);
    422         $sectionref->{'subsections'}->{$num} = {'subsection_order'=>[],
    423                             'next_subsection'=>1,
    424                             'subsections'=>{},
    425                             'metadata'=>[],
    426                             'text'=>""};
    427         if ($num >= $sectionref->{'next_subsection'}) {
    428             $sectionref->{'next_subsection'} = $num + 1;
    429         }
    430         }
    431         $sectionref = $sectionref->{'subsections'}->{$num};
    432 
    433     } else {
    434         print STDERR "doc::create_named_section couldn't create section " .
    435         "$mastersection\n";
    436         last;
    437     }
    438     }
    439 }
    440 
    441 # returns a reference to a list of subsections
    442 sub list_subsections {
    443     my $self = shift (@_);
    444     my ($section) = @_;
    445 
    446     my $section_ptr = $self->_lookup_section ($section);
    447     if (!defined $section_ptr) {
    448     print STDERR "doc::list_subsections couldn't find section $section\n";
    449     return [];
    450     }
    451 
    452     return [@{$section_ptr->{'subsection_order'}}];
    453 }
    454 
    455 sub delete_section {
    456     my $self = shift (@_);
    457     my ($section) = @_;
    458 
    459 #    my $section_ptr = {'subsection_order'=>[],
    460 #              'next_subsection'=>1,
    461 #              'subsections'=>{},
    462 #              'metadata'=>[],
    463 #              'text'=>""};
    464 
    465     # if this is the top section reset everything
    466     if ($section eq "") {
    467     $self->{'subsection_order'} = [];
    468     $self->{'subsections'} = {};
    469     $self->{'metadata'} = [];
    470     $self->{'text'} = "";
    471     return;
    472     }
    473 
    474     # find the parent of the section to delete
    475     my $parent_section = "";
    476     my $child = "0";
    477     my @section = split (".", $section);
    478     if (scalar(@section) > 0) {
    479     $child = pop (@section);
    480     $parent_section = join (".", @section);
    481     }
    482 
    483     my $parent_section_ptr = $self->_lookup_section($parent_section);
    484     if (!defined $parent_section_ptr) {
    485     print STDERR "doc::delete_section couldn't find parent section " .
    486         "$parent_section\n";
    487     return;
    488     }
    489 
    490     # remove this section from the subsection_order list
    491     my $i = 0;
    492     while ($i < scalar (@{$parent_section_ptr->{'subsection_order'}})) {
    493     if ($parent_section_ptr->{'subsection_order'}->[$i] eq $child) {
    494         splice (@{$parent_section_ptr->{'subsection_order'}}, $i, 1);
    495         last;
    496     }
    497     $i++;
    498     }
    499 
    500     # remove this section from the subsection hash
    501     if (defined ($parent_section_ptr->{'subsections'}->{$child})) {
    502     undef $parent_section_ptr->{'subsections'}->{$child};
    503     }
     58    # included for backwards compatibility
     59    # (in particular precalculating OID in HTMLPlug)
     60    return basedoc::_calc_OID(@_);
    50461}
    50562
     
    53289}
    53390
    534 # returns the first metadata value which matches field
    535 sub get_metadata_element {
    536     my $self = shift (@_);
    537     my ($section, $field) = @_;
    538     my ($data);
    539 
    540     my $section_ptr = $self->_lookup_section($section);
    541     if (!defined $section_ptr) {
    542     print STDERR "doc::get_metadata_element couldn't find section " .
    543         "$section\n";
    544     return;
    545     }
    546 
    547     foreach $data (@{$section_ptr->{'metadata'}}) {
    548     return $data->[1] if (scalar(@$data) >= 2 && $data->[0] eq $field);
    549     }
    550    
    551     return undef; # was not found
    552 }
    55391
    55492# add_metadata assumes the text is in (extended) ascii form. For
     
    577115   
    578116    push (@{$section_ptr->{'metadata'}}, [$field, $value]);
    579 }
    580 
    581 # returns a list of the form [value1, value2, ...]
    582 sub get_metadata {
    583     my $self = shift (@_);
    584     my ($section, $field) = @_;
    585     my ($data);
    586 
    587     my $section_ptr = $self->_lookup_section($section);
    588     if (!defined $section_ptr) {
    589     print STDERR "doc::get_metadata couldn't find section " .
    590         "$section\n";
    591     return;
    592     }
    593 
    594     my @metadata = ();
    595     foreach $data (@{$section_ptr->{'metadata'}}) {
    596     push (@metadata, $data->[1]) if ($data->[0] eq $field);
    597     }
    598    
    599     return \@metadata;
    600 }
    601 
    602 # returns a list of the form [[field,value],[field,value],...]
    603 sub get_all_metadata {
    604     my $self = shift (@_);
    605     my ($section) = @_;
    606 
    607     my $section_ptr = $self->_lookup_section($section);
    608     if (!defined $section_ptr) {
    609     print STDERR "doc::get_all_metadata couldn't find section " .
    610         "$section\n";
    611     return;
    612     }
    613    
    614     return $section_ptr->{'metadata'};
    615 }
    616 
    617 # $value is optional
    618 sub delete_metadata {
    619     my $self = shift (@_);
    620     my ($section, $field, $value) = @_;
    621 
    622     my $section_ptr = $self->_lookup_section($section);
    623     if (!defined $section_ptr) {
    624     print STDERR "doc::delete_metadata couldn't find section " .
    625         "$section\n";
    626     return;
    627     }
    628 
    629     my $i = 0;
    630     while ($i < scalar (@{$section_ptr->{'metadata'}})) {
    631     if (($section_ptr->{'metadata'}->[$i]->[0] eq $field) &&
    632         (!defined $value || $section_ptr->{'metadata'}->[$i]->[1] eq $value)) {
    633         splice (@{$section_ptr->{'metadata'}}, $i, 1);
    634     } else {
    635         $i++;
    636     }
    637     }
    638 }
    639 
    640 sub delete_all_metadata {
    641     my $self = shift (@_);
    642     my ($section) = @_;
    643 
    644     my $section_ptr = $self->_lookup_section($section);
    645     if (!defined $section_ptr) {
    646     print STDERR "doc::delete_all_metadata couldn't find section " .
    647         "$section\n";
    648     return;
    649     }
    650    
    651     $section_ptr->{'metadata'} = [];
    652 }
    653 
    654 
    655 # methods for dealing with associated files
    656 
    657 sub associate_file {
    658     my $self = shift (@_);
    659     my ($real_filename, $assoc_filename, $mime_type) = @_;
    660     $mime_type = &html::guess_mime_type ($real_filename) unless defined $mime_type;
    661 
    662     # remove all associated files with the same name
    663     $self->delete_assoc_file ($assoc_filename);
    664 
    665     push (@{$self->{'associated_files'}},
    666       [$real_filename, $assoc_filename, $mime_type]);
    667 }
    668 
    669 # returns a list of associated files in the form
    670 #   [[real_filename, assoc_filename, mimetype], ...]
    671 sub get_assoc_files {
    672     my $self = shift (@_);
    673 
    674     return $self->{'associated_files'};
    675 }
    676 
    677 sub delete_assoc_file {
    678     my $self = shift (@_);
    679     my ($assoc_filename) = @_;
    680 
    681     my $i=0;
    682     while ($i < scalar (@{$self->{'associated_files'}})) {
    683     if ($self->{'associated_files'}->[$i]->[1] eq $assoc_filename) {
    684         splice (@{$self->{'associated_files'}}, $i, 1);
    685     } else {
    686         $i++;
    687     }
    688     }
    689117}
    690118
     
    722150}
    723151
    724 # returns the text for a section
    725 sub get_text {
    726     my $self = shift (@_);
    727     my ($section) = @_;
    728 
    729     my $section_ptr = $self->_lookup_section($section);
    730     if (!defined $section_ptr) {
    731     print STDERR "doc::get_text couldn't find section " .
    732         "$section\n";
    733     return "";
    734     }
    735 
    736     return $section_ptr->{'text'};
    737 }
    738 
    739 # returns the (utf-8 encoded) length of the text for a section
    740 sub get_text_length {
    741     my $self = shift (@_);
    742     my ($section) = @_;
    743 
    744     my $section_ptr = $self->_lookup_section($section);
    745     if (!defined $section_ptr) {
    746     print STDERR "doc::get_text_length couldn't find section " .
    747         "$section\n";
    748     return 0;
    749     }
    750 
    751     return length ($section_ptr->{'text'});
    752 }
    753 
    754 sub delete_text {
    755     my $self = shift (@_);
    756     my ($section) = @_;
    757 
    758     my $section_ptr = $self->_lookup_section($section);
    759     if (!defined $section_ptr) {
    760     print STDERR "doc::delete_text couldn't find section " .
    761         "$section\n";
    762     return;
    763     }
    764 
    765     $section_ptr->{'text'} = "";
    766 }
    767 
    768152
    7691531;
Note: See TracChangeset for help on using the changeset viewer.