root/main/trunk/greenstone2/perllib/plugins/LOMPlugin.pm @ 24971

Revision 24971, 19.7 KB (checked in by ak19, 8 years ago)

1. Introduced the util::filepath_to_url_format subroutine which will be used to convert filenames to URL style filenames to match the slashes used in the filename regex-es in extrameta keys used to index into extrameta data structures. 2. Fixed bug on windows where metadata.xml specifies filenames as regex with backslash in front of the file extension's period mark: DirectoryPlugin? needed to unregex the filepath before calling fileparse on it, else the escaping backslash would interfere with perl's fileparse routine (only on windows, since backslash also represents a dirsep here). 3. Updated all those perl plugins where the new util::filepath_to_url_format needs to be called so that they use URL style filenames (thereafter regexed) to index into the extrameta data structures.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# LOMPlugin.pm -- plugin for import the collection from LOM
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2005 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27### Note this plugin currently can't download source documents from outside if you are behind a firewall.
28# Unless, you set the http_proxy environment variable to be your proxy server,
29# and set proxy_user and proxy_password in .wgetrc file in home directory.
30# (does that work on windows??)
31
32package LOMPlugin;
33
34use extrametautil;
35use ReadTextFile;
36use MetadataPass;
37use MetadataRead;
38use util;
39use XMLParser;
40use Cwd;
41
42# methods with identical signatures take precedence in the order given in the ISA list.
43sub BEGIN {
44    @ISA = ('MetadataRead', 'ReadTextFile', 'MetadataPass');
45}
46
47use strict; # every perl program should have this!
48no strict 'refs'; # make an exception so we can use variables as filehandles
49
50
51my $arguments =
52    [ { 'name' => "process_exp",
53    'desc' => "{BasePlugin.process_exp}",
54    'type' => "string",
55    'deft' => &get_default_process_exp(),
56    'reqd' => "no" },
57      { 'name' => "root_tag",
58    'desc' => "{LOMPlugin.root_tag}",
59    'type' => "regexp",
60    'deft' => q/^(?i)lom$/,
61    'reqd' => "no" },
62      { 'name' => "check_timestamp",
63    'desc' => "{LOMPlugin.check_timestamp}",
64    'type' => "flag" },
65      { 'name' => "download_srcdocs",
66    'desc' => "{LOMPlugin.download_srcdocs}",
67    'type' => "regexp",
68    'deft' => "",
69    'reqd' => "no" }];
70
71my $options = { 'name'     => "LOMPlugin",
72        'desc'     => "{LOMPlugin.desc}",
73        'abstract' => "no",
74        'inherits' => "yes",
75        'args'     => $arguments };
76
77
78
79my ($self);
80sub new {
81    my $class = shift (@_);
82    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
83    push(@$pluginlist, $class);
84   
85    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
86    push(@{$hashArgOptLists->{"OptList"}},$options);
87   
88    $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
89
90    if ($self->{'info_only'}) {
91    # don't worry about creating the XML parser as all we want is the
92    # list of plugin options
93    return bless $self, $class;
94    }
95
96    #create XML::Parser object for parsing dublin_core.xml files
97    my $parser = new XML::Parser('Style' => 'Stream',
98                 'Handlers' => {'Char' => \&Char,
99                        'Doctype' => \&Doctype
100                        });
101    $self->{'parser'} = $parser;
102
103    $self->{'extra_blocks'} = {};
104
105    return bless $self, $class;
106}
107
108sub get_default_process_exp {
109    my $self = shift (@_);
110
111    return q^(?i)\.xml$^;
112}
113
114
115sub can_process_this_file {
116    my $self = shift(@_);
117    my ($filename) = @_;
118
119    if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) {
120    return 1; # its a file for us
121    }
122    return 0;
123}
124
125sub metadata_read {
126    my $self = shift (@_);
127    my ($pluginfo, $base_dir, $file, $block_hash,
128    $extrametakeys, $extrametadata, $extrametafile,
129    $processor, $gli, $aux) = @_;
130
131    my $outhandle = $self->{'outhandle'};
132
133    # can we process this file??
134    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
135    return undef unless $self->can_process_this_file_for_metadata($filename_full_path);
136
137    $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
138   
139    print $outhandle "LOMPlugin: extracting metadata from $file\n"
140    if $self->{'verbosity'} > 1;
141
142    my ($dir,$tail) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/;
143    $self->{'output_dir'} = $dir;
144
145    eval {
146    $self->{'parser'}->parsefile($filename_full_path);
147    };
148   
149    if ($@) {
150    print $outhandle "LOMPlugin: skipping $filename_full_path as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
151    print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2);
152    return 0;
153    }
154
155    $self->{'output_dir'} = undef;
156
157    my $file_re;
158    my $lom_srcdoc = $self->{'lom_srcdoc'};
159
160    if (defined $lom_srcdoc) {
161    my $dirsep = &util::get_re_dirsep();
162    $lom_srcdoc =~ s/^$base_dir($dirsep)//;
163    $self->{'extra_blocks'}->{$file}++;
164    $file_re = $lom_srcdoc;
165    }
166    else {
167    $file_re = $tail;
168    }
169   
170    # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
171    # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
172    # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
173    $file_re = &util::filepath_to_url_format($file_re);
174    $file_re = &util::filename_to_regex($file_re);
175    $self->{'lom_srcdoc'} = undef; # reset for next file to be processed
176
177    &extrametautil::addmetakey($extrametakeys, $file_re);
178    &extrametautil::setmetadata($extrametadata, $file_re, $self->{'saved_metadata'});
179    if (defined $lom_srcdoc) {
180    # copied from oaiplugin
181    if (!defined &extrametautil::getmetafile($extrametafile, $file_re)) {
182        &extrametautil::setmetafile($extrametafile, $file_re, {});
183    }
184     #maps the file to full path
185    &extrametautil::setmetafile_for_named_file($extrametafile, $file_re, $file, $filename_full_path);
186    }
187   
188    return 1;
189}
190
191sub check_doctype {
192    $self = shift (@_);
193   
194    my ($filename) = @_;
195   
196    if (open(XMLIN,"<$filename")) {
197    my $doctype = $self->{'root_tag'};
198    ## check whether the doctype has the same name as the root element tag
199    while (defined (my $line = <XMLIN>)) {
200        ## find the root element
201        if ($line =~ /<([\w\d:]+)[\s>]/){
202        my $root = $1;
203        if ($root !~ $doctype){
204            close(XMLIN);
205            return 0;
206        }
207        else {
208            close(XMLIN);
209            return 1;
210        }
211        }
212    }
213    close(XMLIN);
214    }
215   
216    return undef; # haven't found a valid line
217   
218}
219
220sub read_file {
221    my $self = shift (@_);
222    my ($filename, $encoding, $language, $textref) = @_;
223
224    my $metadata_table = $self->{'metadata_table'};
225
226    my $rawtext = $metadata_table->{'rawtext'};
227
228    delete $metadata_table->{'rawtext'};
229
230    $$textref = $rawtext;
231}
232
233sub read {
234    my $self = shift (@_);
235    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
236
237    my $outhandle = $self->{'outhandle'};
238
239    return 0 if (defined $self->{'extra_blocks'}->{$file});
240
241    # can we process this file??
242    my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
243    return undef unless $self->can_process_this_file($filename_full_path);
244
245    $self->{'metadata_table'} = $metadata;
246
247    my $lom_language = $metadata->{'lom_language'};
248
249    my $store_input_encoding;
250    my $store_extract_language;
251    my $store_default_language;
252    my $store_default_encoding;
253
254    if (defined $lom_language) {
255    delete $metadata->{'lom_language'};
256
257    $store_input_encoding   = $self->{'input_encoding'};
258    $store_extract_language = $self->{'extract_language'};
259    $store_default_language = $self->{'default_language'};
260    $store_default_encoding = $self->{'default_encoding'};
261
262    $self->{'input_encoding'}   = "utf8";
263    $self->{'extract_language'} = 0;
264    $self->{'default_language'} = $lom_language;
265    $self->{'default_encoding'} = "utf8";
266    }
267
268    my $rv = $self->SUPER::read(@_);
269
270    if (defined $lom_language) {   
271    $self->{'input_encoding'}   = $store_input_encoding;
272    $self->{'extract_language'} = $store_extract_language;
273    $self->{'default_language'} = $store_default_language;
274    $self->{'default_encoding'} = $store_default_encoding;
275    }
276
277    $self->{'metadata_table'} = undef;
278
279    return $rv;
280}
281
282# do plugin specific processing of doc_obj
283sub process {
284    my $self = shift (@_);
285    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
286    my $outhandle = $self->{'outhandle'};
287
288    my $cursection = $doc_obj->get_top_section();
289    $doc_obj->add_utf8_text($cursection, $$textref);
290
291    return 1;
292}
293
294sub Doctype {
295    my ($expat, $name, $sysid, $pubid, $internal) = @_;
296
297    my $root_tag = $self->{'root_tag'};
298
299    if ($name !~ /$root_tag/) {
300    die "Root tag $name does not match regular expression $root_tag";
301    }
302}
303
304sub StartTag {
305    my ($expat, $element) = @_;
306
307    my %attr = %_;
308   
309    my $raw_tag = "&lt;$element";
310    map { $raw_tag .= " $_=\"$attr{$_}\""; } keys %attr;
311    $raw_tag .= "&gt;";
312
313    if ($element =~ m/$self->{'root_tag'}/) {
314    $self->{'raw_text'} = $raw_tag;
315
316    $self->{'saved_metadata'} = {};
317    $self->{'metaname_stack'} = [];
318    $self->{'lom_datatype'} = "";
319    $self->{'lom_language'} = undef;
320    $self->{'metadatatext'} = "<table class=\"metadata\" width=\"_pagewidth_\" >\n";
321    }
322    else {
323    my $xml_depth = scalar(@{$self->{'metaname_stack'}});
324    $self->{'raw_text'} .= "\n";
325    $self->{'raw_text'} .= "&nbsp;&nbsp;" x $xml_depth;
326    $self->{'raw_text'} .= $raw_tag;
327
328    my $metaname_stack = $self->{'metaname_stack'};
329    push(@$metaname_stack,$element);
330    if (scalar(@$metaname_stack)==1) {
331        # top level LOM category
332        my $style = "class=\"metadata\"";
333        my $open_close
334        = "<a id=\"${element}opencloselink\" href=\"javascript:hideTBodyArea('$element')\">\n";
335        $open_close
336        .= "<img id=\"${element}openclose\" border=\"0\" src=\"_httpopenmdicon_\"></a>\n";
337
338        my $header_line = "  <tr $style ><th $style colspan=\"3\">$open_close \u$element</th></tr>\n";
339        my $md_tbody = "<tbody id=\"$element\">\n";
340
341        $self->{'mdheader'}     = $header_line;
342        $self->{'mdtbody'}      = $md_tbody;
343        $self->{'mdtbody_text'} = "";
344    }
345    }
346}
347
348sub EndTag {
349    my ($expat, $element) = @_;
350
351    my $raw_tag = "&lt;/$element&gt;";
352   
353    if ($element =~ m/$self->{'root_tag'}/) {
354    $self->{'raw_text'} .= $raw_tag;
355
356    my $metadatatext = $self->{'metadatatext'};
357    $metadatatext .= "</table>";
358
359    my $raw_text = $self->{'raw_text'};
360
361    $self->{'saved_metadata'}->{'MetadataTable'} =  $metadatatext;
362    $self->{'metadatatext'} = "";
363
364    $self->{'saved_metadata'}->{'rawtext'} =  $raw_text;
365    $self->{'raw_text'} = "";
366
367    if (defined $self->{'lom_language'}) {
368        $self->{'saved_metadata'}->{'lom_language'} = $self->{'lom_language'};
369        $self->{'lom_language'} = undef;
370    }
371    }
372    else {
373    my $metaname_stack = $self->{'metaname_stack'};
374
375    if (scalar(@$metaname_stack)==1) {
376        my $header_line = $self->{'mdheader'};
377        my $tbody_start = $self->{'mdtbody'};
378        my $tbody_text  = $self->{'mdtbody_text'};
379        if ($tbody_text !~ m/^\s*$/s) {
380        my $tbody_end = "</tbody>\n";
381        my $table_chunk
382            = $header_line.$tbody_start.$tbody_text.$tbody_end;
383
384        $self->{'metadatatext'} .= $table_chunk;
385        }
386        $self->{'mdtheader'}    = "";
387        $self->{'mdtbody'}      = "";
388        $self->{'mdtbody_text'} = "";
389    }
390
391    pop(@$metaname_stack);
392
393    my $xml_depth = scalar(@{$self->{'metaname_stack'}});
394    $self->{'raw_text'} .= "\n";
395    $self->{'raw_text'} .= "&nbsp;&nbsp;" x $xml_depth;
396    $self->{'raw_text'} .= $raw_tag;
397    }
398}
399
400sub process_datatype_info
401{
402    my $self = shift(@_);
403    my ($metaname_stack,$md_content) = @_;
404
405    my @without_dt_stack = @$metaname_stack; # without datatype stack
406
407    my $innermost_element = $without_dt_stack[$#without_dt_stack];
408
409    # Loose last item if encoding datatype information
410    if ($innermost_element =~ m/^(lang)?string$/) {
411    $self->{'lom_datatype'} = $innermost_element;
412
413    pop @without_dt_stack;
414    $innermost_element = $without_dt_stack[$#without_dt_stack];
415    }
416    elsif ($innermost_element =~ m/^date(Time)?$/i) {
417    if ($innermost_element =~ m/^date$/i) {
418        $self->{'lom_datatype'} = "dateTime";
419    }
420    else {
421        $self->{'lom_datatype'} = $innermost_element;
422
423        pop @without_dt_stack;
424        $innermost_element = $without_dt_stack[$#without_dt_stack];
425    }
426
427    if ($md_content =~ m/^(\d{1,2})\s*(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s*(\d{4})/i) {
428        my ($day,$mon,$year) = ($1,$2,$3);
429       
430        my %month_lookup = ( 'jan' =>  1, 'feb' =>  2, 'mar' =>  3,
431                 'apr' =>  4, 'may' =>  5, 'jun' =>  6,
432                 'jul' =>  7, 'aug' =>  8, 'sep' =>  9,
433                 'oct' => 10, 'nov' => 11, 'dec' => 12 );
434
435        my $mon_num = $month_lookup{lc($mon)};
436
437        $md_content = sprintf("%d%02d%02d",$year,$mon_num,$day);
438    }
439
440    $md_content =~ s/\-//g;
441    }
442
443    if ($innermost_element eq "source") {
444    $self->{'lom_source'} = $md_content;
445    }
446    elsif ($innermost_element eq "value") {
447    $self->{'lom_value'} = $md_content;
448    }
449
450    return (\@without_dt_stack,$innermost_element,$md_content);
451}
452
453sub reset_datatype_info
454{
455    my $self = shift(@_);
456
457    $self->{'lom_datatype'} = "";
458}
459
460
461sub pretty_print_text
462{
463    my $self = shift(@_);
464   
465    my ($pretty_print_text) = @_;
466
467##    $metavalue_utf8 = &util::hyperlink_text($metavalue_utf8);
468    $pretty_print_text = &util::hyperlink_text($pretty_print_text);
469   
470####    $pretty_print_text =~ s/(BEGIN:vCard.*END:vCard)/<pre>$1<\/pre>/sg;
471
472    if ($self->{'lom_datatype'} eq "dateTime") {
473    if ($pretty_print_text =~ m/^(\d{4})(\d{2})(\d{2})$/) {
474        $pretty_print_text = "$1-$2-$3";
475    }
476    }
477
478    return $pretty_print_text;
479}
480
481sub pretty_print_table_tr
482{
483    my $self = shift (@_);
484    my ($without_dt_stack) = @_;
485
486    my $style = "class=\"metadata\"";
487
488    my $innermost_element = $without_dt_stack->[scalar(@$without_dt_stack)-1];
489    my $outermost_element = $without_dt_stack->[0];
490
491    # Loose top level stack item (already named in pretty print table)
492    my @pretty_print_stack = @$without_dt_stack;
493    shift @pretty_print_stack;
494
495    if ($innermost_element eq "source") {
496    return if (!defined $self->{'lom_value'});
497    }
498
499    if ($innermost_element eq "value") {
500    return if (!defined $self->{'lom_source'});
501    }
502
503    my $pretty_print_text = "";
504
505    if (($innermost_element eq "value") || ($innermost_element eq "source")) {
506    my $source = $self->{'lom_source'};
507    my $value  = $self->pretty_print_text($self->{'lom_value'});
508
509    $self->{'lom_source'} = undef;
510    $self->{'lom_value'} = undef;
511
512    pop @pretty_print_stack;
513   
514    $pretty_print_text = "<td $style>$source</td><td $style>$value</td>";
515    }
516    else {
517    $pretty_print_text = $self->pretty_print_text($_);
518    $pretty_print_text = "<td $style colspan=2>$pretty_print_text</td>";
519    }
520    my $pretty_print_fmn = join(' : ',map { "\u$_"; } @pretty_print_stack);
521
522
523    # my $tr_attr = "id=\"$outermost_element\" style=\"display:block;\"";
524    my $tr_attr = "$style id=\"$outermost_element\"";
525
526    my $mdtext_line = "  <tr $tr_attr><td $style><nobr>$pretty_print_fmn</nobr></td>$pretty_print_text</tr>\n";
527    $self->{'mdtbody_text'} .= $mdtext_line;
528}
529
530
531sub check_for_language
532{
533    my $self = shift(@_);
534    my ($innermost_element,$md_content) = @_;
535
536    # Look for 'language' tag
537    if ($innermost_element eq "language") {
538    my $lom_lang = $self->{'lom_language'};
539   
540    if (defined $lom_lang) {
541        my $new_lom_lang = $md_content;
542        $new_lom_lang =~ s/-.*//; # remove endings like -US or -GB
543
544        if ($lom_lang ne $new_lom_lang) {
545        my $outhandle = $self->{'outhandle'};
546       
547        print $outhandle "Warning: Conflicting general language in record\n";
548        print $outhandle "         $new_lom_lang (previous value for language = $lom_lang)\n";
549        }
550        # otherwise, existing value OK => do nothing
551    }
552    else {
553        $lom_lang = $md_content;
554        $lom_lang =~ s/-.*//; # remove endings like -US or -GB
555       
556        $self->{'lom_language'} = $lom_lang;
557    }
558    }
559}
560
561sub found_specific_identifier
562{
563    my $self = shift(@_);
564    my ($specific_id,$full_mname,$md_content) = @_;
565
566    my $found_id = 0;
567    if ($full_mname eq $specific_id) {
568    if ($md_content =~ m/^(http|ftp):/) {
569        $found_id = 1;
570    }
571    }
572
573    return $found_id;
574}
575
576sub download_srcdoc
577{
578    my $self = shift(@_);
579    my ($doc_url) = @_;
580
581    my $outhandle  = $self->{'outhandle'};
582    my $output_dir = $self->{'output_dir'};
583
584    $output_dir = &util::filename_cat($output_dir,"_gsdldown.all");
585
586    if (! -d $output_dir) {
587    mkdir $output_dir;
588    }
589
590    my $re_dirsep = &util::get_re_dirsep();
591    my $os_dirsep = &util::get_dirsep();
592
593    my $file_url = $doc_url;
594    $file_url =~ s/$re_dirsep/$os_dirsep/g;
595    $file_url =~ s/^(http|ftp):\/\///;
596    $file_url .= "index.html" if ($file_url =~ m/\/$/);
597
598    my $full_file_url = &util::filename_cat($output_dir,$file_url);
599    # the path to srcdoc will be used later in extrametadata to associate
600    # the lom metadata with the document. Needs to be relative to current
601    # directory.
602    my $srcdoc_path = &util::filename_cat("_gsdldown.all", $file_url);
603    my $check_timestamp = $self->{'check_timestamp'};
604    my $status;
605
606    if (($check_timestamp) || (!$check_timestamp && !-e $full_file_url)) {
607    if (!-e $full_file_url) {
608        print $outhandle "Mirroring $doc_url\n";
609    }
610    else {
611        print $outhandle "Checking to see if update needed for $doc_url\n";
612    }
613
614    # on linux, if we pass an absolute path as -P arg to wget, then it
615    # stuffs up the
616    # URL rewriting in the file. Need a relative path or none, so now
617    # we change working directory first.
618    my $changed_dir = 0;
619    my $current_dir = cwd();
620    my $wget_cmd = "";
621    if ($ENV{'GSDLOS'} ne "windows") {
622        $changed_dir = 1;
623       
624        chdir "$output_dir";
625        $wget_cmd = "wget -nv  --timestamping -k -p \"$doc_url\"";
626    } else {
627        $wget_cmd = "wget -nv -P \"$output_dir\" --timestamping -k -p \"$doc_url\"";
628    }
629    ##print STDERR "**** wget = $wget_cmd\n";
630
631   
632    $status = system($wget_cmd);
633    if ($changed_dir) {
634        chdir $current_dir;
635    }
636    if ($status==0) {
637        $self->{'lom_srcdoc'} = $srcdoc_path;   
638    }
639    else {
640        $self->{'lom_srcdoc'} = undef;
641        print $outhandle "Error: failed to execute $wget_cmd\n";
642    }
643    }
644    else {
645    # not time-stamping and file already exists
646    $status=0;
647    $self->{'lom_srcdoc'} = $srcdoc_path;   
648    }
649
650    return $status==0;
651   
652}
653
654
655sub check_for_identifier
656{
657    my $self = shift(@_);
658    my ($full_mname,$md_content) = @_;
659
660    my $success = 0;
661
662    my $download_re = $self->{'download_srcdocs'};
663    if (($download_re ne "") && $md_content =~ m/$download_re/) {
664   
665    if ($self->found_specific_identifier("general^identifier^entry",$full_mname,$md_content)) {
666        $success = $self->download_srcdoc($md_content);
667    }
668
669    if (!$success) {
670        if ($self->found_specific_identifier("technical^location",$full_mname,$md_content)) {
671        $success = $self->download_srcdoc($md_content);
672        }
673    }
674    }
675
676    return $success;
677}
678
679
680sub Text {
681    if ($_ !~ m/^\s*$/) {
682    #
683    # Work out indentations and line wraps for raw XML
684    #
685    my $xml_depth = scalar(@{$self->{'metaname_stack'}})+1;
686    my $indent = "&nbsp;&nbsp;" x $xml_depth;
687   
688    my $formatted_text = "\n".$_;
689
690    # break into lines < 80 chars on space
691    $formatted_text =~ s/(.{50,80})\s+/$1\n/mg;
692    $formatted_text =~ s/^/$indent/mg;
693    ## $formatted_text =~ s/\s+$//s;
694
695    $self->{'raw_text'} .= $formatted_text;
696    }
697
698    my $metaname_stack = $self->{'metaname_stack'};
699    if (($_ !~ /^\s*$/) && (scalar(@$metaname_stack)>0)) {
700
701    my ($without_dt_stack,$innermost_element,$md_content)
702        = $self->process_datatype_info($metaname_stack,$_);
703
704    $self->pretty_print_table_tr($without_dt_stack);
705
706    my $full_mname = join('^',@{$without_dt_stack});
707    $self->set_filere_metadata(lc($full_mname),$md_content);
708
709    $self->check_for_language($innermost_element,$md_content);
710    $self->check_for_identifier($full_mname,$md_content); # source doc
711
712    $self->reset_datatype_info();
713    }
714}
715
716# This Char function overrides the one in XML::Parser::Stream to overcome a
717# problem where $expat->{Text} is treated as the return value, slowing
718# things down significantly in some cases.
719sub Char {
720  $_[0]->{'Text'} .= $_[1];
721  return undef;
722}
723
7241;
Note: See TracBrowser for help on using the browser.