source: trunk/gsdl/perllib/plugins/GAPlug.pm@ 12969

Last change on this file since 12969 was 12844, checked in by mdewsnip, 18 years ago

Incremental building and dynamic GDBM updating code, many thanks to John Rowe and John Thompson at DL Consulting Ltd.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.7 KB
Line 
1###########################################################################
2#
3# GAPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Processes GreenstoneArchive XML documents. Note that this plugin does no
27# syntax checking (though the XML::Parser module tests for
28# well-formedness). It's assumed that the GreenstoneArchive files conform
29# to their DTD.
30
31package GAPlug;
32
33use XMLPlug;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37
38sub BEGIN {
39 @GAPlug::ISA = ('XMLPlug');
40}
41
42
43sub get_default_process_exp {
44 my $self = shift (@_);
45
46 return q^(?i)doc\.xml$^;
47}
48
49my $arguments = [
50 ];
51
52my $options = { 'name' => "GAPlug",
53 'desc' => "{GAPlug.desc}",
54 'abstract' => "no",
55 'inherits' => "yes" };
56
57sub new {
58 my ($class) = shift (@_);
59 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
60 push(@$pluginlist, $class);
61
62 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
63 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
64
65 my $self = new XMLPlug($pluginlist, $inputargs, $hashArgOptLists);
66
67 $self->{'section'} = "";
68 $self->{'section_level'} = 0;
69 $self->{'metadata_name'} = "";
70 $self->{'metadata_value'} = "";
71 $self->{'content'} = "";
72
73# # Currently used to store information for previous values controls. In
74# # the next contract I'll move to using information directly from Lucene.
75# $self->{'sqlfh'} = 0;
76
77 return bless $self, $class;
78}
79
80sub xml_start_document {
81}
82
83sub xml_end_document {
84}
85
86sub xml_doctype {
87 my $self = shift(@_);
88
89 my ($expat, $name, $sysid, $pubid, $internal) = @_;
90
91 # allow the short-lived and badly named "GreenstoneArchive" files to be processed
92 # as well as the "Archive" files which should now be created by import.pl
93 die "" if ($name !~ /^(Greenstone)?Archive$/);
94
95 my $outhandle = $self->{'outhandle'};
96 print $outhandle "GAPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
97 print STDERR "<Processing n='$self->{'file'}' p='GAPlug'>\n" if $self->{'gli'};
98
99}
100
101
102sub xml_start_tag {
103 my $self = shift(@_);
104 my ($expat, $element) = @_;
105
106 $self->{'element'} = $element;
107 if ($element eq "Section") {
108 if ($self->{'section_level'} == 0) {
109 $self->open_document();
110 } else {
111 my $doc_obj = $self->{'doc_obj'};
112 $self->{'section'} =
113 $doc_obj->insert_section($doc_obj->get_end_child($self->{'section'}));
114 }
115
116 $self->{'section_level'} ++;
117 }
118 elsif ($element eq "Metadata") {
119 $self->{'metadata_name'} = $_{'name'};
120 }
121}
122
123sub xml_end_tag {
124 my $self = shift(@_);
125 my ($expat, $element) = @_;
126
127 if ($element eq "Section") {
128 $self->{'section_level'} --;
129 $self->{'section'} = $self->{'doc_obj'}->get_parent_section ($self->{'section'});
130 $self->close_document() if $self->{'section_level'} == 0;
131 }
132 elsif ($element eq "Metadata") {
133 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, $self->{'metadata_name'},$self->{'metadata_value'});
134 # Ensure this value is added to the allvalues database in gseditor.
135 # Note that the database constraints prevent multiple occurances of the
136 # same key-value pair.
137 # We write these out to a file, so they can all be commited in one
138 # transaction
139 #if (!$self->{'sqlfh'})
140 # {
141 # my $sql_file = $ENV{'GSDLHOME'} . "/collect/lld/tmp/gseditor.sql";
142 # # If the file doesn't already exist, open it and begin a transaction
143 # my $sql_fh;
144 # if (!-e $sql_file)
145 # {
146 # open($sql_fh, ">" . $sql_file);
147 # print $sql_fh "BEGIN TRANSACTION;\n";
148 # }
149 # else
150 # {
151 # open($sql_fh, ">>" . $sql_file);
152 # }
153 # print STDERR "Opened SQL log\n";
154 # $self->{'sqlfh'} = $sql_fh;
155 # }
156
157 #my $mvalue = $self->{'metadata_value'};
158 #$mvalue =~ s/\'/\'\'/g;
159 #$mvalue =~ s/_claimantsep_/ \& /g;
160
161 #my $fh = $self->{'sqlfh'};
162 #if ($fh)
163 # {
164 # print $fh "INSERT INTO allvalues (mkey, mvalue) VALUES ('" . $self->{'metadata_name'} . "', '" . $mvalue . "');\n";
165 # }
166
167 # Clean Up
168 $self->{'metadata_name'} = "";
169 $self->{'metadata_value'} = "";
170 }
171 elsif ($element eq "Content" && $self->{'content'} ne "") {
172 $self->{'doc_obj'}->add_utf8_text($self->{'section'}, $self->{'content'});
173 $self->{'content'} = "";
174 }
175 $self->{'element'} = "";
176}
177
178sub xml_text {
179 my $self = shift(@_);
180 my ($expat) = @_;
181
182 if ($self->{'element'} eq "Metadata") {
183 $self->{'metadata_value'} .= $_;
184 }
185 elsif ($self->{'element'} eq "Content") {
186 $self->{'content'} .= $_;
187 }
188}
189
190sub open_document {
191 my $self = shift(@_);
192
193 # create a new document
194 $self->{'doc_obj'} = new doc ();
195 $self->{'section'} = "";
196}
197
198sub close_document {
199 my $self = shift(@_);
200
201 # add the associated files
202 my $assoc_files =
203 $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
204
205 # for when "assocfilepath" isn't the same directory that doc.xml is in...
206 my $assoc_filepath_list= $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "assocfilepath");
207
208 my $assoc_filepath=shift (@$assoc_filepath_list);
209
210 #rint STDERR "Filename is: " . $self->{'filename'} . "\n";
211 #rint STDERR "Initially my assoc_filepath is: $assoc_filepath\n";
212 #rint STDERR "Custom archive dir is: " . $self->{'base_dir'} . "\n";
213 # Correct the assoc filepath if one is defined
214 if (defined ($assoc_filepath))
215 {
216 # Check whether the assoc_filepath already includes the base dir
217 if (index($assoc_filepath, $self->{'base_dir'}) == -1)
218 {
219 # And if not, append it so as to make this absolute
220 $assoc_filepath = &util::filename_cat($self->{'base_dir'}, $assoc_filepath);
221 }
222 }
223 else
224 {
225 $assoc_filepath = $self->{'filename'};
226 $assoc_filepath =~ s/[^\\\/]*$//;
227 }
228 #rint STDERR "Goned and made it absolute: $assoc_filepath\n";
229
230 foreach my $assoc_file_info (@$assoc_files) {
231 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
232 #rint STDERR "assoc_file: $assoc_file\n";
233 #rint STDERR "mime_type: $mime_type\n";
234 #rint STDERR "dir: $dir\n";
235 my $real_dir = &util::filename_cat($assoc_filepath, $assoc_file),
236 my $assoc_dir = (defined $dir && $dir ne "")
237 ? &util::filename_cat($dir, $assoc_file) : $assoc_file;
238 $self->{'doc_obj'}->associate_file($real_dir, $assoc_dir, $mime_type);
239 #rint STDERR "According to me the real assoc_filepath is: $real_dir\n";
240 }
241 $self->{'doc_obj'}->delete_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
242
243 # process the document
244 $self->{'processor'}->process($self->{'doc_obj'}, $self->{'file'});
245}
246
247
2481;
249
250
Note: See TracBrowser for help on using the repository browser.