source: gsdl/trunk/perllib/plugins/GAPlugin.pm@ 15918

Last change on this file since 15918 was 15872, checked in by kjdon, 16 years ago

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

  • Property svn:keywords set to Author Date Id Revision
File size: 7.9 KB
Line 
1###########################################################################
2#
3# GAPlugin.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Processes GreenstoneArchive XML documents. Note that this plugin does no
27# syntax checking (though the XML::Parser module tests for
28# well-formedness). It's assumed that the GreenstoneArchive files conform
29# to their DTD.
30
31package GAPlugin;
32
33use ReadXMLFile;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37
38sub BEGIN {
39 @GAPlugin::ISA = ('ReadXMLFile');
40}
41
42
43sub get_default_process_exp {
44 my $self = shift (@_);
45
46 return q^(?i)doc\.xml$^;
47}
48
49my $arguments =
50 [ { 'name' => "process_exp",
51 'desc' => "{BasePlugin.process_exp}",
52 'type' => "regexp",
53 'deft' => &get_default_process_exp(),
54 'reqd' => "no" } ];
55
56my $options = { 'name' => "GAPlugin",
57 'desc' => "{GAPlugin.desc}",
58 'abstract' => "no",
59 'inherits' => "yes",
60 'args' => $arguments };
61
62sub new {
63 my ($class) = shift (@_);
64 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
65 push(@$pluginlist, $class);
66
67 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
68 push(@{$hashArgOptLists->{"OptList"}},$options);
69
70 my $self = new ReadXMLFile($pluginlist, $inputargs, $hashArgOptLists);
71
72 $self->{'section'} = "";
73 $self->{'section_level'} = 0;
74 $self->{'metadata_name'} = "";
75 $self->{'metadata_value'} = "";
76 $self->{'content'} = "";
77
78# # Currently used to store information for previous values controls. In
79# # the next contract I'll move to using information directly from Lucene.
80# $self->{'sqlfh'} = 0;
81
82 return bless $self, $class;
83}
84
85sub xml_start_document {
86}
87
88sub xml_end_document {
89}
90
91sub get_doctype {
92 my $self = shift(@_);
93
94 return "(Greenstone)?Archive";
95}
96
97
98sub xml_doctype {
99 my $self = shift(@_);
100
101 my ($expat, $name, $sysid, $pubid, $internal) = @_;
102
103 # allow the short-lived and badly named "GreenstoneArchive" files to be processed
104 # as well as the "Archive" files which should now be created by import.pl
105 die "" if ($name !~ /^(Greenstone)?Archive$/);
106
107 my $outhandle = $self->{'outhandle'};
108 print $outhandle "GAPlugin: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
109 print STDERR "<Processing n='$self->{'file'}' p='GAPlugin'>\n" if $self->{'gli'};
110
111}
112
113
114sub xml_start_tag {
115 my $self = shift(@_);
116 my ($expat, $element) = @_;
117
118 $self->{'element'} = $element;
119 if ($element eq "Section") {
120 if ($self->{'section_level'} == 0) {
121 $self->open_document();
122 } else {
123 my $doc_obj = $self->{'doc_obj'};
124 $self->{'section'} =
125 $doc_obj->insert_section($doc_obj->get_end_child($self->{'section'}));
126 }
127
128 $self->{'section_level'} ++;
129 }
130 elsif ($element eq "Metadata") {
131 $self->{'metadata_name'} = $_{'name'};
132 }
133}
134
135sub xml_end_tag {
136 my $self = shift(@_);
137 my ($expat, $element) = @_;
138
139 if ($element eq "Section") {
140 $self->{'section_level'} --;
141 $self->{'section'} = $self->{'doc_obj'}->get_parent_section ($self->{'section'});
142 $self->close_document() if $self->{'section_level'} == 0;
143 }
144 elsif ($element eq "Metadata") {
145 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, $self->{'metadata_name'},$self->{'metadata_value'});
146 # Ensure this value is added to the allvalues database in gseditor.
147 # Note that the database constraints prevent multiple occurances of the
148 # same key-value pair.
149 # We write these out to a file, so they can all be commited in one
150 # transaction
151 #if (!$self->{'sqlfh'})
152 # {
153 # my $sql_file = $ENV{'GSDLHOME'} . "/collect/lld/tmp/gseditor.sql";
154 # # If the file doesn't already exist, open it and begin a transaction
155 # my $sql_fh;
156 # if (!-e $sql_file)
157 # {
158 # open($sql_fh, ">" . $sql_file);
159 # print $sql_fh "BEGIN TRANSACTION;\n";
160 # }
161 # else
162 # {
163 # open($sql_fh, ">>" . $sql_file);
164 # }
165 # print STDERR "Opened SQL log\n";
166 # $self->{'sqlfh'} = $sql_fh;
167 # }
168
169 #my $mvalue = $self->{'metadata_value'};
170 #$mvalue =~ s/\'/\'\'/g;
171 #$mvalue =~ s/_claimantsep_/ \& /g;
172
173 #my $fh = $self->{'sqlfh'};
174 #if ($fh)
175 # {
176 # print $fh "INSERT INTO allvalues (mkey, mvalue) VALUES ('" . $self->{'metadata_name'} . "', '" . $mvalue . "');\n";
177 # }
178
179 # Clean Up
180 $self->{'metadata_name'} = "";
181 $self->{'metadata_value'} = "";
182 }
183 elsif ($element eq "Content" && $self->{'content'} ne "") {
184 $self->{'doc_obj'}->add_utf8_text($self->{'section'}, $self->{'content'});
185 $self->{'content'} = "";
186 }
187 $self->{'element'} = "";
188}
189
190sub xml_text {
191 my $self = shift(@_);
192 my ($expat) = @_;
193
194 if ($self->{'element'} eq "Metadata") {
195 $self->{'metadata_value'} .= $_;
196 }
197 elsif ($self->{'element'} eq "Content") {
198 $self->{'content'} .= $_;
199 }
200}
201
202sub open_document {
203 my $self = shift(@_);
204
205 # create a new document
206 $self->{'doc_obj'} = new doc ();
207 $self->{'section'} = "";
208}
209
210sub close_document {
211 my $self = shift(@_);
212
213 # add the associated files
214 my $assoc_files =
215 $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
216
217 # for when "assocfilepath" isn't the same directory that doc.xml is in...
218 my $assoc_filepath_list= $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "assocfilepath");
219
220 my $assoc_filepath=shift (@$assoc_filepath_list);
221
222 #rint STDERR "Filename is: " . $self->{'filename'} . "\n";
223 #rint STDERR "Initially my assoc_filepath is: $assoc_filepath\n";
224 #rint STDERR "Custom archive dir is: " . $self->{'base_dir'} . "\n";
225 # Correct the assoc filepath if one is defined
226 if (defined ($assoc_filepath))
227 {
228 # Check whether the assoc_filepath already includes the base dir
229 if (index($assoc_filepath, $self->{'base_dir'}) == -1)
230 {
231 # And if not, append it so as to make this absolute
232 $assoc_filepath = &util::filename_cat($self->{'base_dir'}, $assoc_filepath);
233 }
234 }
235 else
236 {
237 $assoc_filepath = $self->{'filename'};
238 $assoc_filepath =~ s/[^\\\/]*$//;
239 }
240 #rint STDERR "Goned and made it absolute: $assoc_filepath\n";
241
242 foreach my $assoc_file_info (@$assoc_files) {
243 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
244 #rint STDERR "assoc_file: $assoc_file\n";
245 #rint STDERR "mime_type: $mime_type\n";
246 #rint STDERR "dir: $dir\n";
247 my $real_dir = &util::filename_cat($assoc_filepath, $assoc_file),
248 my $assoc_dir = (defined $dir && $dir ne "")
249 ? &util::filename_cat($dir, $assoc_file) : $assoc_file;
250 $self->{'doc_obj'}->associate_file($real_dir, $assoc_dir, $mime_type);
251 #rint STDERR "According to me the real assoc_filepath is: $real_dir\n";
252 }
253 $self->{'doc_obj'}->delete_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
254
255 # process the document
256 $self->{'processor'}->process($self->{'doc_obj'}, $self->{'file'});
257}
258
259
2601;
261
262
Note: See TracBrowser for help on using the repository browser.