source: main/trunk/greenstone2/perllib/plugins/LOMPlugin.pm@ 36372

Last change on this file since 36372 was 36372, checked in by kjdon, 21 months ago

tidy up of extrametautil, renaming some methods to make them easier to understand, removing anything unused. then modifying plugins to use new methods. Also, moved some common code to MetadataRead function, can call this from several plugins instead of duplicating code. This is an interim commit, where I have left in the old code to make it easier to track changes. Next commit will have everything tidied up.

  • Property svn:keywords set to Author Date Id Revision
File size: 20.6 KB
Line 
1###########################################################################
2#
3# LOMPlugin.pm -- plugin for import the collection from LOM
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2005 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27### Note this plugin currently can't download source documents from outside if you are behind a firewall.
28# Unless, you set the http_proxy environment variable to be your proxy server,
29# and set proxy_user and proxy_password in .wgetrc file in home directory.
30# (does that work on windows??)
31
32package LOMPlugin;
33
34use extrametautil;
35use ReadTextFile;
36use MetadataPass;
37use MetadataRead;
38use util;
39use FileUtils;
40use XMLParser;
41use Cwd;
42
43# methods with identical signatures take precedence in the order given in the ISA list.
44sub BEGIN {
45 @ISA = ('MetadataRead', 'ReadTextFile', 'MetadataPass');
46}
47
48use strict; # every perl program should have this!
49no strict 'refs'; # make an exception so we can use variables as filehandles
50
51
52my $arguments =
53 [ { 'name' => "process_exp",
54 'desc' => "{BaseImporter.process_exp}",
55 'type' => "string",
56 'deft' => &get_default_process_exp(),
57 'reqd' => "no" },
58 { 'name' => "root_tag",
59 'desc' => "{LOMPlugin.root_tag}",
60 'type' => "regexp",
61 'deft' => q/^(?i)lom$/,
62 'reqd' => "no" },
63 { 'name' => "check_timestamp",
64 'desc' => "{LOMPlugin.check_timestamp}",
65 'type' => "flag" },
66 { 'name' => "download_srcdocs",
67 'desc' => "{LOMPlugin.download_srcdocs}",
68 'type' => "regexp",
69 'deft' => "",
70 'reqd' => "no" }];
71
72my $options = { 'name' => "LOMPlugin",
73 'desc' => "{LOMPlugin.desc}",
74 'abstract' => "no",
75 'inherits' => "yes",
76 'args' => $arguments };
77
78
79
80my ($self);
81sub new {
82 my $class = shift (@_);
83 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
84 push(@$pluginlist, $class);
85
86 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
87 push(@{$hashArgOptLists->{"OptList"}},$options);
88
89 $self = new ReadTextFile($pluginlist, $inputargs, $hashArgOptLists);
90
91 if ($self->{'info_only'}) {
92 # don't worry about creating the XML parser as all we want is the
93 # list of plugin options
94 return bless $self, $class;
95 }
96
97 #create XML::Parser object for parsing dublin_core.xml files
98 my $parser = new XML::Parser('Style' => 'Stream',
99 'Handlers' => {'Char' => \&Char,
100 'Doctype' => \&Doctype
101 });
102 $self->{'parser'} = $parser;
103
104 $self->{'extra_blocks'} = {};
105
106 $self->{'endline'} = ($ENV{'GSDL3SRCHOME'}) ? "" : "\n";
107
108 return bless $self, $class;
109}
110
111sub get_default_process_exp {
112 my $self = shift (@_);
113
114 return q^(?i)\.xml$^;
115}
116
117
118sub can_process_this_file {
119 my $self = shift(@_);
120 my ($filename) = @_;
121
122 if ($self->SUPER::can_process_this_file($filename) && $self->check_doctype($filename)) {
123 return 1; # its a file for us
124 }
125 return 0;
126}
127
128sub metadata_read {
129 my $self = shift (@_);
130 my ($pluginfo, $base_dir, $file, $block_hash,
131 $extrametakeys, $extrametadata, $extrametafile,
132 $processor, $gli, $aux) = @_;
133
134 my $outhandle = $self->{'outhandle'};
135
136 # can we process this file??
137 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
138 return undef unless $self->can_process_this_file_for_metadata($filename_full_path);
139
140 $file =~ s/^[\/\\]+//; # $file often begins with / so we'll tidy it up
141
142 print $outhandle "LOMPlugin: extracting metadata from $file\n"
143 if $self->{'verbosity'} > 1;
144
145 my ($dir,$tail) = $filename_full_path =~ /^(.*?)([^\/\\]*)$/;
146 $self->{'output_dir'} = $dir;
147
148 eval {
149 $self->{'parser'}->parsefile($filename_full_path);
150 };
151
152 if ($@) {
153 print $outhandle "LOMPlugin: skipping $filename_full_path as not conformant to LOM syntax\n" if ($self->{'verbosity'} > 1);
154 print $outhandle "\n Perl Error:\n $@\n" if ($self->{'verbosity'}>2);
155 return 0;
156 }
157
158 $self->{'output_dir'} = undef;
159
160 my $file_re;
161 my $lom_srcdoc = $self->{'lom_srcdoc'};
162
163 if (defined $lom_srcdoc) {
164 my $dirsep = &util::get_re_dirsep();
165 $lom_srcdoc =~ s/^$base_dir($dirsep)//;
166 $self->{'extra_blocks'}->{$file}++;
167 $file_re = $lom_srcdoc;
168 }
169 else {
170 $file_re = $tail;
171 }
172
173 if (defined $lom_srcdoc) {
174 $self->store_meta_in_extrametadata($filename_re, $self->{'saved_metadata'}, $file, $filename_full_path, $extrametakeys, $extrametadata, $extrametafile);
175 } else {
176 $self->store_meta_in_extrametadata($filename_for_metadata, $self->{'saved_metadata'}, undef, undef, $extrametakeys, $extrametadata, $extrametafile);
177 }
178 $self->{'lom_srcdoc'} = undef; # reset for next file to be processed
179 # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
180 # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
181 # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
182 if (0) {
183 $file_re = &util::filepath_to_url_format($file_re);
184 $file_re = &util::filename_to_regex($file_re);
185 $self->{'lom_srcdoc'} = undef; # reset for next file to be processed
186
187 &extrametautil::addmetakey($extrametakeys, $file_re);
188 &extrametautil::setmetadata($extrametadata, $file_re, $self->{'saved_metadata'});
189 if (defined $lom_srcdoc) {
190 # copied from oaiplugin
191 if (!defined &extrametautil::getmetafile($extrametafile, $file_re)) {
192 &extrametautil::setmetafile($extrametafile, $file_re, {});
193 }
194 #maps the file to full path
195 &extrametautil::setmetafile_for_named_file($extrametafile, $file_re, $file, $filename_full_path);
196 }
197 }
198 return 1;
199}
200
201sub check_doctype {
202 $self = shift (@_);
203
204 my ($filename) = @_;
205
206 if (open(XMLIN,"<$filename")) {
207 my $doctype = $self->{'root_tag'};
208 ## check whether the doctype has the same name as the root element tag
209 while (defined (my $line = <XMLIN>)) {
210 ## find the root element
211 if ($line =~ /<([\w\d:]+)[\s>]/){
212 my $root = $1;
213 if ($root !~ $doctype){
214 close(XMLIN);
215 return 0;
216 }
217 else {
218 close(XMLIN);
219 return 1;
220 }
221 }
222 }
223 close(XMLIN);
224 }
225
226 return undef; # haven't found a valid line
227
228}
229
230sub read_file {
231 my $self = shift (@_);
232 my ($filename, $encoding, $language, $textref) = @_;
233
234 my $metadata_table = $self->{'metadata_table'};
235
236 my $rawtext = $metadata_table->{'rawtext'};
237
238 delete $metadata_table->{'rawtext'};
239
240 $$textref = $rawtext;
241}
242
243sub read {
244 my $self = shift (@_);
245 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
246
247 my $outhandle = $self->{'outhandle'};
248
249 return 0 if (defined $self->{'extra_blocks'}->{$file});
250
251 # can we process this file??
252 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
253 return undef unless $self->can_process_this_file($filename_full_path);
254
255 $self->{'metadata_table'} = $metadata;
256
257 my $lom_language = $metadata->{'lom_language'};
258
259 my $store_input_encoding;
260 my $store_extract_language;
261 my $store_default_language;
262 my $store_default_encoding;
263
264 if (defined $lom_language) {
265 delete $metadata->{'lom_language'};
266
267 $store_input_encoding = $self->{'input_encoding'};
268 $store_extract_language = $self->{'extract_language'};
269 $store_default_language = $self->{'default_language'};
270 $store_default_encoding = $self->{'default_encoding'};
271
272 $self->{'input_encoding'} = "utf8";
273 $self->{'extract_language'} = 0;
274 $self->{'default_language'} = $lom_language;
275 $self->{'default_encoding'} = "utf8";
276 }
277
278 my $rv = $self->SUPER::read(@_);
279
280 if (defined $lom_language) {
281 $self->{'input_encoding'} = $store_input_encoding;
282 $self->{'extract_language'} = $store_extract_language;
283 $self->{'default_language'} = $store_default_language;
284 $self->{'default_encoding'} = $store_default_encoding;
285 }
286
287 $self->{'metadata_table'} = undef;
288
289 return $rv;
290}
291
292# do plugin specific processing of doc_obj
293sub process {
294 my $self = shift (@_);
295 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
296 my $outhandle = $self->{'outhandle'};
297
298 my $cursection = $doc_obj->get_top_section();
299 $doc_obj->add_utf8_text($cursection, $$textref);
300
301 return 1;
302}
303
304sub Doctype {
305 my ($expat, $name, $sysid, $pubid, $internal) = @_;
306
307 my $root_tag = $self->{'root_tag'};
308
309 if ($name !~ /$root_tag/) {
310 die "Root tag $name does not match regular expression $root_tag";
311 }
312}
313
314sub StartTag {
315 my ($expat, $element) = @_;
316
317 my %attr = %_;
318
319 my $raw_tag = "&lt;$element";
320 map { $raw_tag .= " $_=\"$attr{$_}\""; } keys %attr;
321 $raw_tag .= "&gt;";
322
323 if ($element =~ m/$self->{'root_tag'}/) {
324 $self->{'raw_text'} = $raw_tag;
325
326 $self->{'saved_metadata'} = {};
327 $self->{'metaname_stack'} = [];
328 $self->{'lom_datatype'} = "";
329 $self->{'lom_language'} = undef;
330 $self->{'metadatatext'} = "<table class=\"metadata\" width=\"_pagewidth_\" >".$self->{'endline'};
331 }
332 else {
333 my $xml_depth = scalar(@{$self->{'metaname_stack'}});
334 $self->{'raw_text'} .= "\n";
335 $self->{'raw_text'} .= "&nbsp;&nbsp;" x $xml_depth;
336 $self->{'raw_text'} .= $raw_tag;
337
338 my $metaname_stack = $self->{'metaname_stack'};
339 push(@$metaname_stack,$element);
340 if (scalar(@$metaname_stack)==1) {
341 # top level LOM category
342 my $style = "class=\"metadata\"";
343 my $open_close
344 = "<a id=\"${element}opencloselink\" href=\"javascript:hideTBodyArea('$element')\">".$self->{'endline'};
345 $open_close
346 .= "<img id=\"${element}openclose\" border=\"0\" src=\"_httpopenmdicon_\"></a>".$self->{'endline'};
347
348 my $header_line = " <tr $style ><th $style colspan=\"3\">$open_close \u$element</th></tr>".$self->{'endline'};
349 my $md_tbody = "<tbody id=\"$element\">".$self->{'endline'};
350
351 $self->{'mdheader'} = $header_line;
352 $self->{'mdtbody'} = $md_tbody;
353 $self->{'mdtbody_text'} = "";
354 }
355 }
356}
357
358sub EndTag {
359 my ($expat, $element) = @_;
360
361 my $raw_tag = "&lt;/$element&gt;";
362
363 if ($element =~ m/$self->{'root_tag'}/) {
364 $self->{'raw_text'} .= $raw_tag;
365
366 my $metadatatext = $self->{'metadatatext'};
367 $metadatatext .= "</table>";
368
369 my $raw_text = $self->{'raw_text'};
370
371 $self->{'saved_metadata'}->{'MetadataTable'} = $metadatatext;
372 $self->{'metadatatext'} = "";
373
374 $self->{'saved_metadata'}->{'rawtext'} = $raw_text;
375 $self->{'raw_text'} = "";
376
377 if (defined $self->{'lom_language'}) {
378 $self->{'saved_metadata'}->{'lom_language'} = $self->{'lom_language'};
379 $self->{'lom_language'} = undef;
380 }
381 }
382 else {
383 my $metaname_stack = $self->{'metaname_stack'};
384
385 if (scalar(@$metaname_stack)==1) {
386 my $header_line = $self->{'mdheader'};
387 my $tbody_start = $self->{'mdtbody'};
388 my $tbody_text = $self->{'mdtbody_text'};
389 if ($tbody_text !~ m/^\s*$/s) {
390 my $tbody_end = "</tbody>".$self->{'endline'};
391 my $table_chunk
392 = $header_line.$tbody_start.$tbody_text.$tbody_end;
393
394 $self->{'metadatatext'} .= $table_chunk;
395 }
396 $self->{'mdtheader'} = "";
397 $self->{'mdtbody'} = "";
398 $self->{'mdtbody_text'} = "";
399 }
400
401 pop(@$metaname_stack);
402
403 my $xml_depth = scalar(@{$self->{'metaname_stack'}});
404 $self->{'raw_text'} .= "\n";
405 $self->{'raw_text'} .= "&nbsp;&nbsp;" x $xml_depth;
406 $self->{'raw_text'} .= $raw_tag;
407 }
408}
409
410sub process_datatype_info
411{
412 my $self = shift(@_);
413 my ($metaname_stack,$md_content) = @_;
414
415 my @without_dt_stack = @$metaname_stack; # without datatype stack
416
417 my $innermost_element = $without_dt_stack[$#without_dt_stack];
418
419 # Loose last item if encoding datatype information
420 if ($innermost_element =~ m/^(lang)?string$/) {
421 $self->{'lom_datatype'} = $innermost_element;
422
423 pop @without_dt_stack;
424 $innermost_element = $without_dt_stack[$#without_dt_stack];
425 }
426 elsif ($innermost_element =~ m/^date(Time)?$/i) {
427 if ($innermost_element =~ m/^date$/i) {
428 $self->{'lom_datatype'} = "dateTime";
429 }
430 else {
431 $self->{'lom_datatype'} = $innermost_element;
432
433 pop @without_dt_stack;
434 $innermost_element = $without_dt_stack[$#without_dt_stack];
435 }
436
437 if ($md_content =~ m/^(\d{1,2})\s*(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\w*\s*(\d{4})/i) {
438 my ($day,$mon,$year) = ($1,$2,$3);
439
440 my %month_lookup = ( 'jan' => 1, 'feb' => 2, 'mar' => 3,
441 'apr' => 4, 'may' => 5, 'jun' => 6,
442 'jul' => 7, 'aug' => 8, 'sep' => 9,
443 'oct' => 10, 'nov' => 11, 'dec' => 12 );
444
445 my $mon_num = $month_lookup{lc($mon)};
446
447 $md_content = sprintf("%d%02d%02d",$year,$mon_num,$day);
448 }
449
450 $md_content =~ s/\-//g;
451 }
452
453 if ($innermost_element eq "source") {
454 $self->{'lom_source'} = $md_content;
455 }
456 elsif ($innermost_element eq "value") {
457 $self->{'lom_value'} = $md_content;
458 }
459
460 return (\@without_dt_stack,$innermost_element,$md_content);
461}
462
463sub reset_datatype_info
464{
465 my $self = shift(@_);
466
467 $self->{'lom_datatype'} = "";
468}
469
470
471sub pretty_print_text
472{
473 my $self = shift(@_);
474
475 my ($pretty_print_text) = @_;
476
477## $metavalue_utf8 = &util::hyperlink_text($metavalue_utf8);
478 $pretty_print_text = &util::hyperlink_text($pretty_print_text);
479
480#### $pretty_print_text =~ s/(BEGIN:vCard.*END:vCard)/<pre>$1<\/pre>/sg;
481
482 if ($self->{'lom_datatype'} eq "dateTime") {
483 if ($pretty_print_text =~ m/^(\d{4})(\d{2})(\d{2})$/) {
484 $pretty_print_text = "$1-$2-$3";
485 }
486 }
487
488 return $pretty_print_text;
489}
490
491sub pretty_print_table_tr
492{
493 my $self = shift (@_);
494 my ($without_dt_stack) = @_;
495
496 my $style = "class=\"metadata\"";
497
498 my $innermost_element = $without_dt_stack->[scalar(@$without_dt_stack)-1];
499 my $outermost_element = $without_dt_stack->[0];
500
501 # Loose top level stack item (already named in pretty print table)
502 my @pretty_print_stack = @$without_dt_stack;
503 shift @pretty_print_stack;
504
505 if ($innermost_element eq "source") {
506 return if (!defined $self->{'lom_value'});
507 }
508
509 if ($innermost_element eq "value") {
510 return if (!defined $self->{'lom_source'});
511 }
512
513 my $pretty_print_text = "";
514
515 if (($innermost_element eq "value") || ($innermost_element eq "source")) {
516 my $source = $self->{'lom_source'};
517 my $value = $self->pretty_print_text($self->{'lom_value'});
518
519 $self->{'lom_source'} = undef;
520 $self->{'lom_value'} = undef;
521
522 pop @pretty_print_stack;
523
524 $pretty_print_text = "<td $style>$source</td><td $style>$value</td>";
525 }
526 else {
527 $pretty_print_text = $self->pretty_print_text($_);
528 $pretty_print_text = "<td $style colspan=2>$pretty_print_text</td>";
529 }
530 my $pretty_print_fmn = join(' : ',map { "\u$_"; } @pretty_print_stack);
531
532
533 # my $tr_attr = "id=\"$outermost_element\" style=\"display:block;\"";
534 my $tr_attr = "$style id=\"$outermost_element\"";
535
536 my $mdtext_line = " <tr $tr_attr><td $style><nobr>$pretty_print_fmn</nobr></td>$pretty_print_text</tr>".$self->{'endline'};
537 $self->{'mdtbody_text'} .= $mdtext_line;
538}
539
540
541sub check_for_language
542{
543 my $self = shift(@_);
544 my ($innermost_element,$md_content) = @_;
545
546 # Look for 'language' tag
547 if ($innermost_element eq "language") {
548 my $lom_lang = $self->{'lom_language'};
549
550 if (defined $lom_lang) {
551 my $new_lom_lang = $md_content;
552 $new_lom_lang =~ s/-.*//; # remove endings like -US or -GB
553
554 if ($lom_lang ne $new_lom_lang) {
555 my $outhandle = $self->{'outhandle'};
556
557 print $outhandle "Warning: Conflicting general language in record\n";
558 print $outhandle " $new_lom_lang (previous value for language = $lom_lang)\n";
559 }
560 # otherwise, existing value OK => do nothing
561 }
562 else {
563 $lom_lang = $md_content;
564 $lom_lang =~ s/-.*//; # remove endings like -US or -GB
565
566 $self->{'lom_language'} = $lom_lang;
567 }
568 }
569}
570
571sub found_specific_identifier
572{
573 my $self = shift(@_);
574 my ($specific_id,$full_mname,$md_content) = @_;
575
576 my $found_id = 0;
577 if ($full_mname eq $specific_id) {
578 if ($md_content =~ m/^(http|ftp):/) {
579 $found_id = 1;
580 }
581 }
582
583 return $found_id;
584}
585
586sub download_srcdoc
587{
588 my $self = shift(@_);
589 my ($doc_url) = @_;
590
591 my $outhandle = $self->{'outhandle'};
592 my $output_dir = $self->{'output_dir'};
593
594 $output_dir = &FileUtils::filenameConcatenate($output_dir,"_gsdldown.all");
595
596 if (! -d $output_dir) {
597 mkdir $output_dir;
598 }
599
600 my $re_dirsep = &util::get_re_dirsep();
601 my $os_dirsep = &util::get_dirsep();
602
603 my $file_url = $doc_url;
604 $file_url =~ s/$re_dirsep/$os_dirsep/g;
605 $file_url =~ s/^(http|ftp):\/\///;
606 $file_url .= "index.html" if ($file_url =~ m/\/$/);
607
608 my $full_file_url = &FileUtils::filenameConcatenate($output_dir,$file_url);
609 # the path to srcdoc will be used later in extrametadata to associate
610 # the lom metadata with the document. Needs to be relative to current
611 # directory.
612 my $srcdoc_path = &FileUtils::filenameConcatenate("_gsdldown.all", $file_url);
613 my $check_timestamp = $self->{'check_timestamp'};
614 my $status;
615
616 if (($check_timestamp) || (!$check_timestamp && !-e $full_file_url)) {
617 if (!-e $full_file_url) {
618 print $outhandle "Mirroring $doc_url\n";
619 }
620 else {
621 print $outhandle "Checking to see if update needed for $doc_url\n";
622 }
623
624 # on linux, if we pass an absolute path as -P arg to wget, then it
625 # stuffs up the
626 # URL rewriting in the file. Need a relative path or none, so now
627 # we change working directory first.
628 my $changed_dir = 0;
629 my $current_dir = cwd();
630 my $wget_cmd = "";
631 if ($ENV{'GSDLOS'} ne "windows") {
632 $changed_dir = 1;
633
634 chdir "$output_dir";
635 $wget_cmd = "wget -nv --timestamping -k -p \"$doc_url\"";
636 } else {
637 $wget_cmd = "wget -nv -P \"$output_dir\" --timestamping -k -p \"$doc_url\"";
638 }
639 ##print STDERR "**** wget = $wget_cmd\n";
640
641 # the wget binary is dependent on the gnomelib_env (particularly lib/libiconv2.dylib) being set, particularly on Mac Lions (android too?)
642 &util::set_gnomelib_env(); # this will set the gnomelib env once for each subshell launched, by first checking if GEXTGNOME is not already set
643
644 $status = system($wget_cmd);
645 if ($changed_dir) {
646 chdir $current_dir;
647 }
648 if ($status==0) {
649 $self->{'lom_srcdoc'} = $srcdoc_path;
650 }
651 else {
652 $self->{'lom_srcdoc'} = undef;
653 print $outhandle "Error: failed to execute $wget_cmd\n";
654 }
655 }
656 else {
657 # not time-stamping and file already exists
658 $status=0;
659 $self->{'lom_srcdoc'} = $srcdoc_path;
660 }
661
662 return $status==0;
663
664}
665
666
667sub check_for_identifier
668{
669 my $self = shift(@_);
670 my ($full_mname,$md_content) = @_;
671
672 my $success = 0;
673
674 my $download_re = $self->{'download_srcdocs'};
675 if (($download_re ne "") && $md_content =~ m/$download_re/) {
676
677 if ($self->found_specific_identifier("general^identifier^entry",$full_mname,$md_content)) {
678 $success = $self->download_srcdoc($md_content);
679 }
680
681 if (!$success) {
682 if ($self->found_specific_identifier("technical^location",$full_mname,$md_content)) {
683 $success = $self->download_srcdoc($md_content);
684 }
685 }
686 }
687
688 return $success;
689}
690
691
692sub Text {
693 if ($_ !~ m/^\s*$/) {
694 #
695 # Work out indentations and line wraps for raw XML
696 #
697 my $xml_depth = scalar(@{$self->{'metaname_stack'}})+1;
698 my $indent = "&nbsp;&nbsp;" x $xml_depth;
699
700 my $formatted_text = "\n".$_;
701
702 # break into lines < 80 chars on space
703 $formatted_text =~ s/(.{50,80})\s+/$1\n/mg;
704 $formatted_text =~ s/^/$indent/mg;
705 ## $formatted_text =~ s/\s+$//s;
706
707 $self->{'raw_text'} .= $formatted_text;
708 }
709
710 my $metaname_stack = $self->{'metaname_stack'};
711 if (($_ !~ /^\s*$/) && (scalar(@$metaname_stack)>0)) {
712
713 my ($without_dt_stack,$innermost_element,$md_content)
714 = $self->process_datatype_info($metaname_stack,$_);
715
716 $self->pretty_print_table_tr($without_dt_stack);
717
718 my $full_mname = join('^',@{$without_dt_stack});
719 $self->set_filere_metadata(lc($full_mname),$md_content);
720
721 $self->check_for_language($innermost_element,$md_content);
722 $self->check_for_identifier($full_mname,$md_content); # source doc
723
724 $self->reset_datatype_info();
725 }
726}
727
728# This Char function overrides the one in XML::Parser::Stream to overcome a
729# problem where $expat->{Text} is treated as the return value, slowing
730# things down significantly in some cases.
731sub Char {
732 $_[0]->{'Text'} .= $_[1];
733 return undef;
734}
735
7361;
Note: See TracBrowser for help on using the repository browser.