Changeset 862


Ignore:
Timestamp:
2000-01-21T16:59:04+13:00 (24 years ago)
Author:
sjboddie
Message:

fixed a couple of bugs that were preventing muliple document gml files
from being processed correctly

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/GMLPlug.pm

    r849 r862  
    8686
    8787    # read in the document
    88     my $gml = "";
    89     my $line = "";
    90 
    9188    if ($gz) {
    9289    if (!open (INFILE, "zcat $fullname |")) {
     
    10299
    103100    undef $/;
    104     while (defined ($line = <INFILE>)) {
    105     $gml .= $line;
    106     }
     101    my $gml = <INFILE>;
    107102    $/ = "\n";
    108 
    109103    close (INFILE);
    110 
     104   
    111105    my @gml_sections = split("</gsdlsection>",$gml);
    112106    $gml = shift(@gml_sections);
    113107
    114108    my $no_docs = 0;
    115     my $src_filename = "";
    116 
    117     while (1)
    118     {
     109#    my $src_filename = ""; #### don't appear to use this anymore - not sure if that's right
     110
     111    while (1) {
    119112    # create a new document
    120113    my $doc_obj = new doc ();
     
    123116    # process the document
    124117    my $firstsection = 1;
    125     while (1)
    126     {
    127         my $tags = "";
    128         my $text = "";
    129 
    130         my @indenting_sections = split("<gsdlsection",$gml);
     118    while (1) {
     119        my ($tags, $text) = ("", "");
     120
     121        my @indenting_sections = split("<gsdlsection", $gml);
    131122        shift(@indenting_sections); # first entry is trivially empty
    132 ####        print STDERR "**** no indenting sections = ", scalar(@indenting_sections), "\n";
    133 
    134         foreach $gml (@indenting_sections)
    135         {
    136 ####        print STDERR "\n\n\n\n!!!!!!!!!!! gml = $gml\n\n\n";
    137 
    138         if ($gml =~ m/^\s*([^>]*)>(.*)$/so)
    139         {
    140             $tags = $1 if (defined $1);
     123
     124        foreach $gml (@indenting_sections) {
     125
     126        if ($gml =~ /^\s*([^>]*)>(.*)$/so) {
     127            $tags = $1 if defined $1;
    141128            $text = &GMLPlug::_unescape_text($2);
    142         }
    143         else
    144         {
     129
     130        } else {
    145131            print STDERR "GMLPlug::read - error in file $fullname\n";
    146132            print STDERR "text: \"$gml\"\n";
     
    149135
    150136        # create the section (unless this is the first section)
    151         if ($firstsection)
    152         {
     137        if ($firstsection) {
    153138            $firstsection = 0;
    154            
    155             print STDERR "  0 of $src_filename\n" if ($no_docs==1);
    156             $tags =~ m/gsdlsourcefilename\s*=\s*(\"([^\"]*)\")|(\w+)/o;
    157             $src_filename = $2 || $3;
    158             print STDERR "  $no_docs of $src_filename\n" if ($no_docs>=1);
    159         }
    160         else
    161         {
    162 ####            print STDERR "*** tags = $tags\n";
    163 
    164             if ($tags =~ s/gsdlnum\s*=\s*\"?(\d+)\"?//o)
    165             {
     139#           $tags =~ /gsdlsourcefilename\s*=\s*(?:\"([^\"]*)\")/o;
     140#           $src_filename = $2 || $3;
     141
     142        } else {
     143
     144            $tags =~ s/gsdlnum\s*=\s*\"?(\d+)\"?//o;
     145            if (defined $1) {
    166146            $section .= ".$1";
    167147            $doc_obj->create_named_section($section);
    168             }
    169             else
    170             {
     148            } else {
    171149            $section = $doc_obj->insert_section($doc_obj->get_end_child($section));
    172150            }
    173151        }
    174152       
    175 
    176153        # add the tags
    177         while ((defined $tags)
    178                && ($tags =~ s/^\s*(\w+)\s*=\s*\"([^\"]*)\"//o))
    179         {
    180             $doc_obj->add_utf8_metadata($section, $1,
    181                         &GMLPlug::_unescape_text($2))
     154        while ((defined $tags) && ($tags =~ s/^\s*(\w+)=\"([^\"]*)\"//o)) {
     155            $doc_obj->add_utf8_metadata($section, $1, &GMLPlug::_unescape_text($2))
    182156            if (defined $1 and defined $2);
    183157        }
     
    189163
    190164        $gml = shift(@gml_sections); # get next bit of data
    191         last if (!defined $gml);
    192 ###     print "####### before section = $section\n";
     165        last unless defined $gml;
    193166        $section = $doc_obj->get_parent_section ($section);
    194 ###     print "####### after section = $section\n";
    195         #### last if ($section eq "");
    196     }
    197 
    198     # add the associated files
    199     $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");
    200     my ($assoc_file_info);
    201     foreach $assoc_file_info (@$assoc_files) {
    202     my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
    203     $dir = "" unless defined $dir;
    204     $doc_obj->associate_file(&util::filename_cat($parent_dir, $assoc_file),
    205                  &util::filename_cat($dir, $assoc_file), $mime_type);
     167        last if $section eq ""; # back to top level again (more than one document in gml file)
     168    }
     169
     170    # add the associated files
     171    $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");
     172    my ($assoc_file_info);
     173    foreach $assoc_file_info (@$assoc_files) {
     174        my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
     175        $dir = "" unless defined $dir;
     176        $doc_obj->associate_file(&util::filename_cat($parent_dir, $assoc_file),
     177                     &util::filename_cat($dir, $assoc_file), $mime_type);
    206178    }
    207179    $doc_obj->delete_metadata($doc_obj->get_top_section(), "gsdlassocfile");
    208 
     180   
    209181    # add metadata
    210182    foreach $field (keys(%$metadata)) {
     
    218190        }
    219191    }
    220 
    221 
     192   
    222193    # assume the document has an OID
    223 
     194   
    224195    # process the document
    225196    $processor->process($doc_obj, $file);
    226 
     197   
    227198    $no_docs++;
    228 
    229     last if (defined $maxdocs && $maxdocs =~ /\d/ && $no_docs >= $maxdocs);
    230 
    231     last if (!defined $gml);
     199    last if ($maxdocs > -1 && $no_docs >= $maxdocs);
     200    last unless defined $gml;
    232201    }
    233202
Note: See TracChangeset for help on using the changeset viewer.