source: trunk/gsdl/perllib/plugins/GAPlug.pm@ 13147

Last change on this file since 13147 was 13147, checked in by shaoqun, 18 years ago

added method used by XMLPlug to check whether its subclass can parse this doc type

  • Property svn:keywords set to Author Date Id Revision
File size: 7.8 KB
Line 
1###########################################################################
2#
3# GAPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Processes GreenstoneArchive XML documents. Note that this plugin does no
27# syntax checking (though the XML::Parser module tests for
28# well-formedness). It's assumed that the GreenstoneArchive files conform
29# to their DTD.
30
31package GAPlug;
32
33use XMLPlug;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37
38sub BEGIN {
39 @GAPlug::ISA = ('XMLPlug');
40}
41
42
43sub get_default_process_exp {
44 my $self = shift (@_);
45
46 return q^(?i)doc\.xml$^;
47}
48
49my $arguments = [
50 ];
51
52my $options = { 'name' => "GAPlug",
53 'desc' => "{GAPlug.desc}",
54 'abstract' => "no",
55 'inherits' => "yes" };
56
57sub new {
58 my ($class) = shift (@_);
59 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
60 push(@$pluginlist, $class);
61
62 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
63 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
64
65 my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
66
67 $self->{'section'} = "";
68 $self->{'section_level'} = 0;
69 $self->{'metadata_name'} = "";
70 $self->{'metadata_value'} = "";
71 $self->{'content'} = "";
72
73# # Currently used to store information for previous values controls. In
74# # the next contract I'll move to using information directly from Lucene.
75# $self->{'sqlfh'} = 0;
76
77 return bless $self, $class;
78}
79
80sub xml_start_document {
81}
82
83sub xml_end_document {
84}
85
86sub get_doctype {
87 my $self = shift(@_);
88
89 return "(Greenstone)?Archive";
90}
91
92
93sub xml_doctype {
94 my $self = shift(@_);
95
96 my ($expat, $name, $sysid, $pubid, $internal) = @_;
97
98 # allow the short-lived and badly named "GreenstoneArchive" files to be processed
99 # as well as the "Archive" files which should now be created by import.pl
100 die "" if ($name !~ /^(Greenstone)?Archive$/);
101
102 my $outhandle = $self->{'outhandle'};
103 print $outhandle "GAPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
104 print STDERR "<Processing n='$self->{'file'}' p='GAPlug'>\n" if $self->{'gli'};
105
106}
107
108
109sub xml_start_tag {
110 my $self = shift(@_);
111 my ($expat, $element) = @_;
112
113 $self->{'element'} = $element;
114 if ($element eq "Section") {
115 if ($self->{'section_level'} == 0) {
116 $self->open_document();
117 } else {
118 my $doc_obj = $self->{'doc_obj'};
119 $self->{'section'} =
120 $doc_obj->insert_section($doc_obj->get_end_child($self->{'section'}));
121 }
122
123 $self->{'section_level'} ++;
124 }
125 elsif ($element eq "Metadata") {
126 $self->{'metadata_name'} = $_{'name'};
127 }
128}
129
130sub xml_end_tag {
131 my $self = shift(@_);
132 my ($expat, $element) = @_;
133
134 if ($element eq "Section") {
135 $self->{'section_level'} --;
136 $self->{'section'} = $self->{'doc_obj'}->get_parent_section ($self->{'section'});
137 $self->close_document() if $self->{'section_level'} == 0;
138 }
139 elsif ($element eq "Metadata") {
140 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, $self->{'metadata_name'},$self->{'metadata_value'});
141 # Ensure this value is added to the allvalues database in gseditor.
142 # Note that the database constraints prevent multiple occurances of the
143 # same key-value pair.
144 # We write these out to a file, so they can all be commited in one
145 # transaction
146 #if (!$self->{'sqlfh'})
147 # {
148 # my $sql_file = $ENV{'GSDLHOME'} . "/collect/lld/tmp/gseditor.sql";
149 # # If the file doesn't already exist, open it and begin a transaction
150 # my $sql_fh;
151 # if (!-e $sql_file)
152 # {
153 # open($sql_fh, ">" . $sql_file);
154 # print $sql_fh "BEGIN TRANSACTION;\n";
155 # }
156 # else
157 # {
158 # open($sql_fh, ">>" . $sql_file);
159 # }
160 # print STDERR "Opened SQL log\n";
161 # $self->{'sqlfh'} = $sql_fh;
162 # }
163
164 #my $mvalue = $self->{'metadata_value'};
165 #$mvalue =~ s/\'/\'\'/g;
166 #$mvalue =~ s/_claimantsep_/ \& /g;
167
168 #my $fh = $self->{'sqlfh'};
169 #if ($fh)
170 # {
171 # print $fh "INSERT INTO allvalues (mkey, mvalue) VALUES ('" . $self->{'metadata_name'} . "', '" . $mvalue . "');\n";
172 # }
173
174 # Clean Up
175 $self->{'metadata_name'} = "";
176 $self->{'metadata_value'} = "";
177 }
178 elsif ($element eq "Content" && $self->{'content'} ne "") {
179 $self->{'doc_obj'}->add_utf8_text($self->{'section'}, $self->{'content'});
180 $self->{'content'} = "";
181 }
182 $self->{'element'} = "";
183}
184
185sub xml_text {
186 my $self = shift(@_);
187 my ($expat) = @_;
188
189 if ($self->{'element'} eq "Metadata") {
190 $self->{'metadata_value'} .= $_;
191 }
192 elsif ($self->{'element'} eq "Content") {
193 $self->{'content'} .= $_;
194 }
195}
196
197sub open_document {
198 my $self = shift(@_);
199
200 # create a new document
201 $self->{'doc_obj'} = new doc ();
202 $self->{'section'} = "";
203}
204
205sub close_document {
206 my $self = shift(@_);
207
208 # add the associated files
209 my $assoc_files =
210 $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
211
212 # for when "assocfilepath" isn't the same directory that doc.xml is in...
213 my $assoc_filepath_list= $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "assocfilepath");
214
215 my $assoc_filepath=shift (@$assoc_filepath_list);
216
217 #rint STDERR "Filename is: " . $self->{'filename'} . "\n";
218 #rint STDERR "Initially my assoc_filepath is: $assoc_filepath\n";
219 #rint STDERR "Custom archive dir is: " . $self->{'base_dir'} . "\n";
220 # Correct the assoc filepath if one is defined
221 if (defined ($assoc_filepath))
222 {
223 # Check whether the assoc_filepath already includes the base dir
224 if (index($assoc_filepath, $self->{'base_dir'}) == -1)
225 {
226 # And if not, append it so as to make this absolute
227 $assoc_filepath = &util::filename_cat($self->{'base_dir'}, $assoc_filepath);
228 }
229 }
230 else
231 {
232 $assoc_filepath = $self->{'filename'};
233 $assoc_filepath =~ s/[^\\\/]*$//;
234 }
235 #rint STDERR "Goned and made it absolute: $assoc_filepath\n";
236
237 foreach my $assoc_file_info (@$assoc_files) {
238 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
239 #rint STDERR "assoc_file: $assoc_file\n";
240 #rint STDERR "mime_type: $mime_type\n";
241 #rint STDERR "dir: $dir\n";
242 my $real_dir = &util::filename_cat($assoc_filepath, $assoc_file),
243 my $assoc_dir = (defined $dir && $dir ne "")
244 ? &util::filename_cat($dir, $assoc_file) : $assoc_file;
245 $self->{'doc_obj'}->associate_file($real_dir, $assoc_dir, $mime_type);
246 #rint STDERR "According to me the real assoc_filepath is: $real_dir\n";
247 }
248 $self->{'doc_obj'}->delete_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
249
250 # process the document
251 $self->{'processor'}->process($self->{'doc_obj'}, $self->{'file'});
252}
253
254
2551;
256
257
Note: See TracBrowser for help on using the repository browser.