source: trunk/gsdl/perllib/plugins/GMLPlug.pm@ 229

Last change on this file since 229 was 139, checked in by sjboddie, 25 years ago

Got building stuff to handle subcollections and language subcollections

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.1 KB
Line 
1# plugin which process an HTML book directory
2
3package GMLPlug;
4
5use html;
6use BasPlug;
7use util;
8use doc;
9
10
11sub BEGIN {
12 @ISA = ('BasPlug');
13}
14
15sub new {
16 my ($class) = @_;
17 $self = new BasPlug ();
18
19 return bless $self, $class;
20}
21
22sub is_recursive {
23 my $self = shift (@_);
24
25 return 0; # this is not a recursive plugin
26}
27
28sub _unescape_text {
29 my ($text) = @_;
30
31 # special characters in the gml encoding
32 $text =~ s/&lt;/</g;
33 $text =~ s/&gt;/>/g;
34 $text =~ s/&quot;/\"/g;
35 $text =~ s/&amp;/&/g; # this has to be last...
36
37 return $text;
38}
39
40
41# return 1 if processed, 0 if not processed
42# Note that $base_dir might be "" and that $file might
43# include directories
44sub read {
45 my $self = shift (@_);
46 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
47 my $fullname = &util::filename_cat ($base_dir, $file);
48
49 # see if this is a gml book
50 return 0 unless (-f $fullname && $fullname =~ /\.gml$/i);
51
52 my ($parent_dir) = $fullname =~ /^(.*)\/[^\/]+.gml$/;
53
54 # create a new document
55 my $doc_obj = new doc ();
56 my $section = $doc_obj->get_top_section();
57
58 print STDERR "GMLPlug: processing $file\n";
59
60 # read in the document
61 my $gml = "";
62 my $line = "";
63 if (!open (INFILE, $fullname)) {
64 print STDERR "GMLPlug::read - couldn't read $fullname\n";
65 return 0;
66 }
67 while (defined ($line = <INFILE>)) {
68 $gml .= $line;
69 }
70 close (INFILE);
71
72 # process the document
73 my $firstsection = 1;
74 while ($gml =~ /\S/) {
75 if ($gml =~ s/^\s*<gsdlsection([^>]*)>(.*?)(<\/?gsdlsection)/$3/is) {
76 my $tags = $1;
77 $tags = "" unless defined $tags;
78 my $text = &_unescape_text($2);
79
80 # create the section (unless this is the first section)
81 if (!$firstsection) {
82 $tags =~ s/gsdlnum\s*=\s*\"?(\d+)\"?//i;
83 if (defined $1) {
84 $section .= ".$1";
85 $doc_obj->create_named_section($section);
86 } else {
87 $section = $doc_obj->insert_section($doc_obj->get_end_child($section));
88 }
89 }
90 $firstsection = 0;
91
92 # add the tags
93 while ((defined $tags) && ($tags =~ s/^\s*(\w+)\s*=\s*\"([^\"]*)\"//)) {
94 $doc_obj->add_utf8_metadata($section, $1 , &_unescape_text($2))
95 if (defined $1 and defined $2);
96 }
97
98 # add the text
99 $doc_obj->add_utf8_text($section, $text)
100 if ((defined $text) && ($text ne ""));
101
102 } elsif ($gml =~ s/^\s*<\/gsdlsection>//) {
103 $section = $doc_obj->get_parent_section ($section);
104
105 } else {
106 print STDERR "GMLPlug::read - error in file $fullname\n";
107 print STDERR "text: \"$gml\"\n";
108 last;
109 }
110 }
111
112 # add the associated files
113 $assoc_files = $doc_obj->get_metadata($doc_obj->get_top_section(), "gsdlassocfile");
114 my ($assoc_file_info);
115 foreach $assoc_file_info (@$assoc_files) {
116 my ($assoc_file, $mime_type) = split (":", $assoc_file_info);
117 $doc_obj->associate_file(&util::filename_cat($parent_dir, $assoc_file),
118 $assoc_file, $mime_type);
119
120 }
121 $doc_obj->delete_metadata($doc_obj->get_top_section(), "gsdlassocfile");
122
123 # assume the document has an OID
124
125 # process the document
126 $processor->process($doc_obj, $file);
127
128 return 1; # processed the file
129}
130
131
1321;
Note: See TracBrowser for help on using the repository browser.