source: trunk/gsdl/perllib/plugins/GMLPlug.pm@ 317

Last change on this file since 317 was 317, checked in by sjboddie, 25 years ago

Added maxdocs option

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.5 KB
Line 
1# plugin which process an HTML book directory
2
3package GMLPlug;
4
5use html;
6use BasPlug;
7use util;
8use doc;
9
10
11sub BEGIN {
12 @ISA = ('BasPlug');
13}
14
15sub new {
16 my ($class) = @_;
17 $self = new BasPlug ();
18
19 return bless $self, $class;
20}
21
22sub is_recursive {
23 my $self = shift (@_);
24
25 return 0; # this is not a recursive plugin
26}
27
28sub _unescape_text {
29 my ($text) = @_;
30
31 # special characters in the gml encoding
32 $text =~ s/&lt;/</g;
33 $text =~ s/&gt;/>/g;
34 $text =~ s/&quot;/\"/g;
35 $text =~ s/&amp;/&/g; # this has to be last...
36
37 return $text;
38}
39
40# return number of files processed, undef if can't process
41# Note that $base_dir might be "" and that $file might
42# include directories
43sub read {
44 my $self = shift (@_);
45 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
46 my $fullname = &util::filename_cat ($base_dir, $file);
47
48 # see if this is a gml book
49 return undef unless (-f $fullname && $fullname =~ /\.gml$/i);
50
51 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+.gml$/;
52
53 # create a new document
54 my $doc_obj = new doc ();
55 my $section = $doc_obj->get_top_section();
56
57 print STDERR "GMLPlug: processing $file\n";
58
59 # read in the document
60 my $gml = "";
61 my $line = "";
62 if (!open (INFILE, $fullname)) {
63 print STDERR "GMLPlug::read - couldn't read $fullname\n";
64 return undef;
65 }
66 while (defined ($line = <INFILE>)) {
67 $gml .= $line;
68 }
69 close (INFILE);
70
71 # process the document
72 my $firstsection = 1;
73 while ($gml =~ /\S/) {
74 if ($gml =~ s/^\s*<gsdlsection([^>]*)>(.*?)(<\/?gsdlsection)/$3/is) {
75 my $tags = $1;
76 $tags = "" unless defined $tags;
77 my $text = &_unescape_text($2);
78
79 # create the section (unless this is the first section)
80 if (!$firstsection) {
81 $tags =~ s/gsdlnum\s*=\s*\"?(\d+)\"?//i;
82 if (defined $1) {
83 $section .= ".$1";
84 $doc_obj->create_named_section($section);
85 } else {
86 $section = $doc_obj->insert_section($doc_obj->get_end_child($section));
87 }
88 }
89 $firstsection = 0;
90
91 # add the tags
92 while ((defined $tags) && ($tags =~ s/^\s*(\w+)\s*=\s*\"([^\"]*)\"//)) {
93 $doc_obj->add_utf8_metadata($section, $1 , &_unescape_text($2))
94 if (defined $1 and defined $2);
95 }
96
97 # add the text
98 $doc_obj->add_utf8_text($section, $text)
99 if ((defined $text) && ($text ne ""));
100
101 } elsif ($gml =~ s/^\s*<\/gsdlsection>//) {
102 $section = $doc_obj->get_parent_section ($section);
103
104 } else {
105 print STDERR "GMLPlug::read - error in file $fullname\n";
106 print STDERR "text: \"$gml\"\n";
107 last;
108 }
109 }
110
111 # add the associated files
112 $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");
113 my ($assoc_file_info);
114 foreach $assoc_file_info (@$assoc_files) {
115 my ($assoc_file, $mime_type) = split (":", $assoc_file_info);
116 $doc_obj->associate_file(&util::filename_cat($parent_dir, $assoc_file),
117 $assoc_file, $mime_type);
118
119 }
120 $doc_obj->delete_metadata($doc_obj->get_top_section(), "gsdlassocfile");
121
122 # add metadata
123 foreach $field (keys(%$metadata)) {
124 # $metadata->{$field} may be an array reference
125 if (ref ($metadata->{$field}) eq "ARRAY") {
126 map {
127 $doc_obj->add_metadata ($doc_obj->get_top_section(), $field, $_);
128 } @{$metadata->{$field}};
129 } else {
130 $doc_obj->add_metadata ($doc_obj->get_top_section(), $field, $metadata->{$field});
131 }
132 }
133
134
135 # assume the document has an OID
136
137 # process the document
138 $processor->process($doc_obj, $file);
139
140 return 1; # processed the file
141}
142
143
1441;
Note: See TracBrowser for help on using the repository browser.