source: main/trunk/greenstone2/perllib/plugins/LOMPlugin.pm@ 36293

Last change on this file since 36293 was 36293, checked in by anupama, 21 months ago

Forgot to commit minor change for GS3 to LOMPlugin previously. If GS3, no line feed character (backslash-n) added into value of the MetadataTable metadata.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.2 KB
Line 
1###########################################################################
2#
3# LOMPlugin.pm -- plugin for import the collection from LOM
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2005 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27### Note this plugin currently can't download source documents from outside if you are behind a firewall.
28# Unless, you set the http_proxy environment variable to be your proxy server,
29# and set proxy_user and proxy_password in .wgetrc file in home directory.
30# (does that work on windows??)
31
32package LOMPlugin;
33
34use extrametautil;
35use ReadTextFile;
36use MetadataPass;
37use MetadataRead;
38use util;
39use FileUtils;
40use XMLParser;
41use Cwd;
42
43# methods with identical signatures take precedence in the order given in the ISA list.
44sub BEGIN {
45 @ISA = ('MetadataRead', 'ReadTextFile', 'MetadataPass');
46}
47
48use strict; # every perl program should have this!
49no strict 'refs'; # make an exception so we can use variables as filehandles
50
51
52my $arguments =
53 [ { 'name' => "process_exp",
54 'desc' => "{BaseImporter.process_exp}",
55 'type' => "string",
56 'deft' => &get_default_process_exp(),
57 'reqd' => "no" },
58 { 'name' => "root_tag",
59 'desc' => "{LOMPlugin.root_tag}",
60 'type' => "regexp",
61 'deft' => q/^(?i)lom$/,
62 'reqd' => "no" },
63 { 'name' => "check_timestamp",
64 'desc' => "{LOMPlugin.check_timestamp}",
65 'type' => "flag" },
66 { 'name' => "download_srcdocs",
67 'desc' => "{LOMPlugin.download_srcdocs}",
68 'type' => "regexp",
69 'deft' => "",
70 'reqd' => "no" }];
71
72my $options = { 'name' => "LOMPlugin",
73 'desc' => "{LOMPlugin.desc}",
74 'abstract' => "no",
75 'inherits' => "yes",
76 'args' => $arguments };
77
78
79
80my ($self);
81sub new {
82 my $class = shift (@_);
83 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
84 push(@$pluginlist, $class);
85
86 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
87 push(@{$hashArgOptLists->{"OptList"}},$options);
88
89 $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
90
91 if ($self->{'info_only'}) {
92 # don't worry about creating the XML parser as all we want is the
93 # list of plugin options
94 return bless $self, $class;
95 }
96
97 #create XML::Parser object for parsing dublin_core.xml files
98 my $parser = new XML::Parser('Style' => 'Stream',
99 'Handlers' => {'Char' => \&Char,
100 'Doctype' => \&Doctype
101 });
102 $self->{'parser'} = $parser;
103
104 $self->{'extra_blocks'} = {};
105
106 $self->{'endline'} = ($ENV{'GSDL3SRCHOME'}) ? "" : "\n";
107
108 return bless $self, $class;
109}
110
111sub get_default_process_exp {
112 my $self = shift (@_);
113
114 return q^(?i)\.xml$^;
115}
116
117
118sub can_process_this_file {
119 my $self = shift(@_);
120 my ($filename) = @_;
121
122 if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) {
123 return 1; # its a file for us
124 }
125 return 0;
126}
127
128sub metadata_read {
129 my $self = shift (@_);
130 my ($pluginfo, $base_dir, $file, $block_hash,
131 $extrametakeys, $extrametadata, $extrametafile,
132 $processor, $gli, $aux) = @_;
133
134 my $outhandle = $self->{'outhandle'};
135
136 # can we process this file??
137 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
138 return undef unless $self->can_process_this_file_for_metadata($filename_full_path);
139
140 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
141
142 print $outhandle "LOMPlugin: extracting metadata from $file\n"
143 if $self->{'verbosity'} > 1;
144
145 my ($dir,$tail) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/;
146 $self->{'output_dir'} = $dir;
147
148 eval {
149 $self->{'parser'}->parsefile($filename_full_path);
150 };
151
152 if ($@) {
153 print $outhandle "LOMPlugin: skipping $filename_full_path as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
154 print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2);
155 return 0;
156 }
157
158 $self->{'output_dir'} = undef;
159
160 my $file_re;
161 my $lom_srcdoc = $self->{'lom_srcdoc'};
162
163 if (defined $lom_srcdoc) {
164 my $dirsep = &util::get_re_dirsep();
165 $lom_srcdoc =~ s/^$base_dir($dirsep)//;
166 $self->{'extra_blocks'}->{$file}++;
167 $file_re = $lom_srcdoc;
168 }
169 else {
170 $file_re = $tail;
171 }
172
173 # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
174 # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
175 # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
176 $file_re = &util::filepath_to_url_format($file_re);
177 $file_re = &util::filename_to_regex($file_re);
178 $self->{'lom_srcdoc'} = undef; # reset for next file to be processed
179
180 &extrametautil::addmetakey($extrametakeys, $file_re);
181 &extrametautil::setmetadata($extrametadata, $file_re, $self->{'saved_metadata'});
182 if (defined $lom_srcdoc) {
183 # copied from oaiplugin
184 if (!defined &extrametautil::getmetafile($extrametafile, $file_re)) {
185 &extrametautil::setmetafile($extrametafile, $file_re, {});
186 }
187 #maps the file to full path
188 &extrametautil::setmetafile_for_named_file($extrametafile, $file_re, $file, $filename_full_path);
189 }
190
191 return 1;
192}
193
194sub check_doctype {
195 $self = shift (@_);
196
197 my ($filename) = @_;
198
199 if (open(XMLIN,"<$filename")) {
200 my $doctype = $self->{'root_tag'};
201 ## check whether the doctype has the same name as the root element tag
202 while (defined (my $line = <XMLIN>)) {
203 ## find the root element
204 if ($line =~ /<([\w\d:]+)[\s>]/){
205 my $root = $1;
206 if ($root !~ $doctype){
207 close(XMLIN);
208 return 0;
209 }
210 else {
211 close(XMLIN);
212 return 1;
213 }
214 }
215 }
216 close(XMLIN);
217 }
218
219 return undef; # haven't found a valid line
220
221}
222
223sub read_file {
224 my $self = shift (@_);
225 my ($filename, $encoding, $language, $textref) = @_;
226
227 my $metadata_table = $self->{'metadata_table'};
228
229 my $rawtext = $metadata_table->{'rawtext'};
230
231 delete $metadata_table->{'rawtext'};
232
233 $$textref = $rawtext;
234}
235
236sub read {
237 my $self = shift (@_);
238 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
239
240 my $outhandle = $self->{'outhandle'};
241
242 return 0 if (defined $self->{'extra_blocks'}->{$file});
243
244 # can we process this file??
245 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
246 return undef unless $self->can_process_this_file($filename_full_path);
247
248 $self->{'metadata_table'} = $metadata;
249
250 my $lom_language = $metadata->{'lom_language'};
251
252 my $store_input_encoding;
253 my $store_extract_language;
254 my $store_default_language;
255 my $store_default_encoding;
256
257 if (defined $lom_language) {
258 delete $metadata->{'lom_language'};
259
260 $store_input_encoding = $self->{'input_encoding'};
261 $store_extract_language = $self->{'extract_language'};
262 $store_default_language = $self->{'default_language'};
263 $store_default_encoding = $self->{'default_encoding'};
264
265 $self->{'input_encoding'} = "utf8";
266 $self->{'extract_language'} = 0;
267 $self->{'default_language'} = $lom_language;
268 $self->{'default_encoding'} = "utf8";
269 }
270
271 my $rv = $self->SUPER::read(@_);
272
273 if (defined $lom_language) {
274 $self->{'input_encoding'} = $store_input_encoding;
275 $self->{'extract_language'} = $store_extract_language;
276 $self->{'default_language'} = $store_default_language;
277 $self->{'default_encoding'} = $store_default_encoding;
278 }
279
280 $self->{'metadata_table'} = undef;
281
282 return $rv;
283}
284
285# do plugin specific processing of doc_obj
286sub process {
287 my $self = shift (@_);
288 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
289 my $outhandle = $self->{'outhandle'};
290
291 my $cursection = $doc_obj->get_top_section();
292 $doc_obj->add_utf8_text($cursection, $$textref);
293
294 return 1;
295}
296
297sub Doctype {
298 my ($expat, $name, $sysid, $pubid, $internal) = @_;
299
300 my $root_tag = $self->{'root_tag'};
301
302 if ($name !~ /$root_tag/) {
303 die "Root tag $name does not match regular expression $root_tag";
304 }
305}
306
307sub StartTag {
308 my ($expat, $element) = @_;
309
310 my %attr = %_;
311
312 my $raw_tag = "&lt;$element";
313 map { $raw_tag .= " $_=\"$attr{$_}\""; } keys %attr;
314 $raw_tag .= "&gt;";
315
316 if ($element =~ m/$self->{'root_tag'}/) {
317 $self->{'raw_text'} = $raw_tag;
318
319 $self->{'saved_metadata'} = {};
320 $self->{'metaname_stack'} = [];
321 $self->{'lom_datatype'} = "";
322 $self->{'lom_language'} = undef;
323 $self->{'metadatatext'} = "<table class=\"metadata\" width=\"_pagewidth_\" >".$self->{'endline'};
324 }
325 else {
326 my $xml_depth = scalar(@{$self->{'metaname_stack'}});
327 $self->{'raw_text'} .= $self->{'endline'};
328 $self->{'raw_text'} .= "&nbsp;&nbsp;" x $xml_depth;
329 $self->{'raw_text'} .= $raw_tag;
330
331 my $metaname_stack = $self->{'metaname_stack'};
332 push(@$metaname_stack,$element);
333 if (scalar(@$metaname_stack)==1) {
334 # top level LOM category
335 my $style = "class=\"metadata\"";
336 my $open_close
337 = "<a id=\"${element}opencloselink\" href=\"javascript:hideTBodyArea('$element')\">".$self->{'endline'};
338 $open_close
339 .= "<img id=\"${element}openclose\" border=\"0\" src=\"_httpopenmdicon_\"></a>".$self->{'endline'};
340
341 my $header_line = " <tr $style ><th $style colspan=\"3\">$open_close \u$element</th></tr>".$self->{'endline'};
342 my $md_tbody = "<tbody id=\"$element\">".$self->{'endline'};
343
344 $self->{'mdheader'} = $header_line;
345 $self->{'mdtbody'} = $md_tbody;
346 $self->{'mdtbody_text'} = "";
347 }
348 }
349}
350
351sub EndTag {
352 my ($expat, $element) = @_;
353
354 my $raw_tag = "&lt;/$element&gt;";
355
356 if ($element =~ m/$self->{'root_tag'}/) {
357 $self->{'raw_text'} .= $raw_tag;
358
359 my $metadatatext = $self->{'metadatatext'};
360 $metadatatext .= "</table>";
361
362 my $raw_text = $self->{'raw_text'};
363
364 $self->{'saved_metadata'}->{'MetadataTable'} = $metadatatext;
365 $self->{'metadatatext'} = "";
366
367 $self->{'saved_metadata'}->{'rawtext'} = $raw_text;
368 $self->{'raw_text'} = "";
369
370 if (defined $self->{'lom_language'}) {
371 $self->{'saved_metadata'}->{'lom_language'} = $self->{'lom_language'};
372 $self->{'lom_language'} = undef;
373 }
374 }
375 else {
376 my $metaname_stack = $self->{'metaname_stack'};
377
378 if (scalar(@$metaname_stack)==1) {
379 my $header_line = $self->{'mdheader'};
380 my $tbody_start = $self->{'mdtbody'};
381 my $tbody_text = $self->{'mdtbody_text'};
382 if ($tbody_text !~ m/^\s*$/s) {
383 my $tbody_end = "</tbody>".$self->{'endline'};
384 my $table_chunk
385 = $header_line.$tbody_start.$tbody_text.$tbody_end;
386
387 $self->{'metadatatext'} .= $table_chunk;
388 }
389 $self->{'mdtheader'} = "";
390 $self->{'mdtbody'} = "";
391 $self->{'mdtbody_text'} = "";
392 }
393
394 pop(@$metaname_stack);
395
396 my $xml_depth = scalar(@{$self->{'metaname_stack'}});
397 $self->{'raw_text'} .= $self->{'endline'};
398 $self->{'raw_text'} .= "&nbsp;&nbsp;" x $xml_depth;
399 $self->{'raw_text'} .= $raw_tag;
400 }
401}
402
403sub process_datatype_info
404{
405 my $self = shift(@_);
406 my ($metaname_stack,$md_content) = @_;
407
408 my @without_dt_stack = @$metaname_stack; # without datatype stack
409
410 my $innermost_element = $without_dt_stack[$#without_dt_stack];
411
412 # Loose last item if encoding datatype information
413 if ($innermost_element =~ m/^(lang)?string$/) {
414 $self->{'lom_datatype'} = $innermost_element;
415
416 pop @without_dt_stack;
417 $innermost_element = $without_dt_stack[$#without_dt_stack];
418 }
419 elsif ($innermost_element =~ m/^date(Time)?$/i) {
420 if ($innermost_element =~ m/^date$/i) {
421 $self->{'lom_datatype'} = "dateTime";
422 }
423 else {
424 $self->{'lom_datatype'} = $innermost_element;
425
426 pop @without_dt_stack;
427 $innermost_element = $without_dt_stack[$#without_dt_stack];
428 }
429
430 if ($md_content =~ m/^(\d{1,2})\s*(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s*(\d{4})/i) {
431 my ($day,$mon,$year) = ($1,$2,$3);
432
433 my %month_lookup = ( 'jan' => 1, 'feb' => 2, 'mar' => 3,
434 'apr' => 4, 'may' => 5, 'jun' => 6,
435 'jul' => 7, 'aug' => 8, 'sep' => 9,
436 'oct' => 10, 'nov' => 11, 'dec' => 12 );
437
438 my $mon_num = $month_lookup{lc($mon)};
439
440 $md_content = sprintf("%d%02d%02d",$year,$mon_num,$day);
441 }
442
443 $md_content =~ s/\-//g;
444 }
445
446 if ($innermost_element eq "source") {
447 $self->{'lom_source'} = $md_content;
448 }
449 elsif ($innermost_element eq "value") {
450 $self->{'lom_value'} = $md_content;
451 }
452
453 return (\@without_dt_stack,$innermost_element,$md_content);
454}
455
456sub reset_datatype_info
457{
458 my $self = shift(@_);
459
460 $self->{'lom_datatype'} = "";
461}
462
463
464sub pretty_print_text
465{
466 my $self = shift(@_);
467
468 my ($pretty_print_text) = @_;
469
470## $metavalue_utf8 = &util::hyperlink_text($metavalue_utf8);
471 $pretty_print_text = &util::hyperlink_text($pretty_print_text);
472
473#### $pretty_print_text =~ s/(BEGIN:vCard.*END:vCard)/<pre>$1<\/pre>/sg;
474
475 if ($self->{'lom_datatype'} eq "dateTime") {
476 if ($pretty_print_text =~ m/^(\d{4})(\d{2})(\d{2})$/) {
477 $pretty_print_text = "$1-$2-$3";
478 }
479 }
480
481 return $pretty_print_text;
482}
483
484sub pretty_print_table_tr
485{
486 my $self = shift (@_);
487 my ($without_dt_stack) = @_;
488
489 my $style = "class=\"metadata\"";
490
491 my $innermost_element = $without_dt_stack->[scalar(@$without_dt_stack)-1];
492 my $outermost_element = $without_dt_stack->[0];
493
494 # Loose top level stack item (already named in pretty print table)
495 my @pretty_print_stack = @$without_dt_stack;
496 shift @pretty_print_stack;
497
498 if ($innermost_element eq "source") {
499 return if (!defined $self->{'lom_value'});
500 }
501
502 if ($innermost_element eq "value") {
503 return if (!defined $self->{'lom_source'});
504 }
505
506 my $pretty_print_text = "";
507
508 if (($innermost_element eq "value") || ($innermost_element eq "source")) {
509 my $source = $self->{'lom_source'};
510 my $value = $self->pretty_print_text($self->{'lom_value'});
511
512 $self->{'lom_source'} = undef;
513 $self->{'lom_value'} = undef;
514
515 pop @pretty_print_stack;
516
517 $pretty_print_text = "<td $style>$source</td><td $style>$value</td>";
518 }
519 else {
520 $pretty_print_text = $self->pretty_print_text($_);
521 $pretty_print_text = "<td $style colspan=2>$pretty_print_text</td>";
522 }
523 my $pretty_print_fmn = join(' : ',map { "\u$_"; } @pretty_print_stack);
524
525
526 # my $tr_attr = "id=\"$outermost_element\" style=\"display:block;\"";
527 my $tr_attr = "$style id=\"$outermost_element\"";
528
529 my $mdtext_line = " <tr $tr_attr><td $style><nobr>$pretty_print_fmn</nobr></td>$pretty_print_text</tr>".$self->{'endline'};
530 $self->{'mdtbody_text'} .= $mdtext_line;
531}
532
533
534sub check_for_language
535{
536 my $self = shift(@_);
537 my ($innermost_element,$md_content) = @_;
538
539 # Look for 'language' tag
540 if ($innermost_element eq "language") {
541 my $lom_lang = $self->{'lom_language'};
542
543 if (defined $lom_lang) {
544 my $new_lom_lang = $md_content;
545 $new_lom_lang =~ s/-.*//; # remove endings like -US or -GB
546
547 if ($lom_lang ne $new_lom_lang) {
548 my $outhandle = $self->{'outhandle'};
549
550 print $outhandle "Warning: Conflicting general language in record\n";
551 print $outhandle " $new_lom_lang (previous value for language = $lom_lang)\n";
552 }
553 # otherwise, existing value OK => do nothing
554 }
555 else {
556 $lom_lang = $md_content;
557 $lom_lang =~ s/-.*//; # remove endings like -US or -GB
558
559 $self->{'lom_language'} = $lom_lang;
560 }
561 }
562}
563
564sub found_specific_identifier
565{
566 my $self = shift(@_);
567 my ($specific_id,$full_mname,$md_content) = @_;
568
569 my $found_id = 0;
570 if ($full_mname eq $specific_id) {
571 if ($md_content =~ m/^(http|ftp):/) {
572 $found_id = 1;
573 }
574 }
575
576 return $found_id;
577}
578
579sub download_srcdoc
580{
581 my $self = shift(@_);
582 my ($doc_url) = @_;
583
584 my $outhandle = $self->{'outhandle'};
585 my $output_dir = $self->{'output_dir'};
586
587 $output_dir = &FileUtils::filenameConcatenate($output_dir,"_gsdldown.all");
588
589 if (! -d $output_dir) {
590 mkdir $output_dir;
591 }
592
593 my $re_dirsep = &util::get_re_dirsep();
594 my $os_dirsep = &util::get_dirsep();
595
596 my $file_url = $doc_url;
597 $file_url =~ s/$re_dirsep/$os_dirsep/g;
598 $file_url =~ s/^(http|ftp):\/\///;
599 $file_url .= "index.html" if ($file_url =~ m/\/$/);
600
601 my $full_file_url = &FileUtils::filenameConcatenate($output_dir,$file_url);
602 # the path to srcdoc will be used later in extrametadata to associate
603 # the lom metadata with the document. Needs to be relative to current
604 # directory.
605 my $srcdoc_path = &FileUtils::filenameConcatenate("_gsdldown.all", $file_url);
606 my $check_timestamp = $self->{'check_timestamp'};
607 my $status;
608
609 if (($check_timestamp) || (!$check_timestamp && !-e $full_file_url)) {
610 if (!-e $full_file_url) {
611 print $outhandle "Mirroring $doc_url\n";
612 }
613 else {
614 print $outhandle "Checking to see if update needed for $doc_url\n";
615 }
616
617 # on linux, if we pass an absolute path as -P arg to wget, then it
618 # stuffs up the
619 # URL rewriting in the file. Need a relative path or none, so now
620 # we change working directory first.
621 my $changed_dir = 0;
622 my $current_dir = cwd();
623 my $wget_cmd = "";
624 if ($ENV{'GSDLOS'} ne "windows") {
625 $changed_dir = 1;
626
627 chdir "$output_dir";
628 $wget_cmd = "wget -nv --timestamping -k -p \"$doc_url\"";
629 } else {
630 $wget_cmd = "wget -nv -P \"$output_dir\" --timestamping -k -p \"$doc_url\"";
631 }
632 ##print STDERR "**** wget = $wget_cmd\n";
633
634 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
635 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
636
637 $status = system($wget_cmd);
638 if ($changed_dir) {
639 chdir $current_dir;
640 }
641 if ($status==0) {
642 $self->{'lom_srcdoc'} = $srcdoc_path;
643 }
644 else {
645 $self->{'lom_srcdoc'} = undef;
646 print $outhandle "Error: failed to execute $wget_cmd\n";
647 }
648 }
649 else {
650 # not time-stamping and file already exists
651 $status=0;
652 $self->{'lom_srcdoc'} = $srcdoc_path;
653 }
654
655 return $status==0;
656
657}
658
659
660sub check_for_identifier
661{
662 my $self = shift(@_);
663 my ($full_mname,$md_content) = @_;
664
665 my $success = 0;
666
667 my $download_re = $self->{'download_srcdocs'};
668 if (($download_re ne "") && $md_content =~ m/$download_re/) {
669
670 if ($self->found_specific_identifier("general^identifier^entry",$full_mname,$md_content)) {
671 $success = $self->download_srcdoc($md_content);
672 }
673
674 if (!$success) {
675 if ($self->found_specific_identifier("technical^location",$full_mname,$md_content)) {
676 $success = $self->download_srcdoc($md_content);
677 }
678 }
679 }
680
681 return $success;
682}
683
684
685sub Text {
686 if ($_ !~ m/^\s*$/) {
687 #
688 # Work out indentations and line wraps for raw XML
689 #
690 my $xml_depth = scalar(@{$self->{'metaname_stack'}})+1;
691 my $indent = "&nbsp;&nbsp;" x $xml_depth;
692
693 my $formatted_text = "\n".$_;
694
695 # break into lines < 80 chars on space
696 $formatted_text =~ s/(.{50,80})\s+/$1\n/mg;
697 $formatted_text =~ s/^/$indent/mg;
698 ## $formatted_text =~ s/\s+$//s;
699
700 $self->{'raw_text'} .= $formatted_text;
701 }
702
703 my $metaname_stack = $self->{'metaname_stack'};
704 if (($_ !~ /^\s*$/) && (scalar(@$metaname_stack)>0)) {
705
706 my ($without_dt_stack,$innermost_element,$md_content)
707 = $self->process_datatype_info($metaname_stack,$_);
708
709 $self->pretty_print_table_tr($without_dt_stack);
710
711 my $full_mname = join('^',@{$without_dt_stack});
712 $self->set_filere_metadata(lc($full_mname),$md_content);
713
714 $self->check_for_language($innermost_element,$md_content);
715 $self->check_for_identifier($full_mname,$md_content); # source doc
716
717 $self->reset_datatype_info();
718 }
719}
720
721# This Char function overrides the one in XML::Parser::Stream to overcome a
722# problem where $expat->{Text} is treated as the return value, slowing
723# things down significantly in some cases.
724sub Char {
725 $_[0]->{'Text'} .= $_[1];
726 return undef;
727}
728
7291;
Note: See TracBrowser for help on using the repository browser.