source: trunk/gsdl/perllib/plugins/GAPlug.pm@ 10254

Last change on this file since 10254 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:keywords set to Author Date Id Revision
File size: 5.6 KB
Line 
1###########################################################################
2#
3# GAPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Processes GreenstoneArchive XML documents. Note that this plugin does no
27# syntax checking (though the XML::Parser module tests for
28# well-formedness). It's assumed that the GreenstoneArchive files conform
29# to their DTD.
30
31package GAPlug;
32
33use XMLPlug;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37
38sub BEGIN {
39 @GAPlug::ISA = ('XMLPlug');
40}
41
42
43sub get_default_process_exp {
44 my $self = shift (@_);
45
46 return q^(?i)doc\.xml$^;
47}
48
49my $arguments = [
50 ];
51
52my $options = { 'name' => "GAPlug",
53 'desc' => "{GAPlug.desc}",
54 'abstract' => "no",
55 'inherits' => "yes" };
56
57sub new {
58 my ($class) = shift (@_);
59 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
60 push(@$pluginlist, $class);
61
62 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
63 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
64
65 my $self = (defined $hashArgOptLists)? new XMLPlug($pluginlist,$inputargs,$hashArgOptLists): new XMLPlug($pluginlist,$inputargs);
66
67 $self->{'section'} = "";
68 $self->{'section_level'} = 0;
69 $self->{'metadata_name'} = "";
70 $self->{'metadata_value'} = "";
71 $self->{'content'} = "";
72
73 return bless $self, $class;
74}
75
76sub xml_start_document {
77}
78
79sub xml_end_document {
80}
81
82sub xml_doctype {
83 my $self = shift(@_);
84
85 my ($expat, $name, $sysid, $pubid, $internal) = @_;
86
87 # allow the short-lived and badly named "GreenstoneArchive" files to be processed
88 # as well as the "Archive" files which should now be created by import.pl
89 die "" if ($name !~ /^(Greenstone)?Archive$/);
90
91 my $outhandle = $self->{'outhandle'};
92 print $outhandle "GAPlug: processing $self->{'file'}\n" if $self->{'verbosity'} > 1;
93 print STDERR "<Processing n='$self->{'file'}' p='GAPlug'>\n" if $self->{'gli'};
94
95}
96
97
98sub xml_start_tag {
99 my $self = shift(@_);
100 my ($expat, $element) = @_;
101
102 $self->{'element'} = $element;
103 if ($element eq "Section") {
104 if ($self->{'section_level'} == 0) {
105 $self->open_document();
106 } else {
107 my $doc_obj = $self->{'doc_obj'};
108 $self->{'section'} =
109 $doc_obj->insert_section($doc_obj->get_end_child($self->{'section'}));
110 }
111
112 $self->{'section_level'} ++;
113 }
114 elsif ($element eq "Metadata") {
115 $self->{'metadata_name'} = $_{'name'};
116 }
117}
118
119sub xml_end_tag {
120 my $self = shift(@_);
121 my ($expat, $element) = @_;
122
123 if ($element eq "Section") {
124 $self->{'section_level'} --;
125 $self->{'section'} = $self->{'doc_obj'}->get_parent_section ($self->{'section'});
126 $self->close_document() if $self->{'section_level'} == 0;
127 }
128 elsif ($element eq "Metadata") {
129 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, $self->{'metadata_name'},$self->{'metadata_value'});
130 $self->{'metadata_name'} = "";
131 $self->{'metadata_value'} = "";
132 }
133 elsif ($element eq "Content" && $self->{'content'} ne "") {
134 $self->{'doc_obj'}->add_utf8_text($self->{'section'}, $self->{'content'});
135 $self->{'content'} = "";
136 }
137
138 $self->{'element'} = "";
139}
140
141sub xml_text {
142 my $self = shift(@_);
143 my ($expat) = @_;
144
145 if ($self->{'element'} eq "Metadata") {
146 $self->{'metadata_value'} .= $_;
147 }
148 elsif ($self->{'element'} eq "Content") {
149 $self->{'content'} .= $_;
150 }
151}
152
153sub open_document {
154 my $self = shift(@_);
155
156 # create a new document
157 $self->{'doc_obj'} = new doc ();
158 $self->{'section'} = "";
159}
160
161sub close_document {
162 my $self = shift(@_);
163
164 # add the associated files
165 my $assoc_files =
166 $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
167
168 # for when "assocfilepath" isn't the same directory that doc.xml is in...
169 my $assoc_filepath_list= $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "assocfilepath");
170
171 my $assoc_filepath=shift (@$assoc_filepath_list);
172 if (defined ($assoc_filepath)) {
173 # make absolute rather than relative...
174 $self->{'filename'} =~ m@^(.*[\\/]archives)@;
175 $assoc_filepath = "$1/$assoc_filepath/";
176 } else {
177 $assoc_filepath = $self->{'filename'};
178 $assoc_filepath =~ s/[^\\\/]*$//;
179 }
180
181 foreach my $assoc_file_info (@$assoc_files) {
182 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
183 my $real_dir = &util::filename_cat($assoc_filepath, $assoc_file),
184 my $assoc_dir = (defined $dir && $dir ne "")
185 ? &util::filename_cat($dir, $assoc_file) : $assoc_file;
186 $self->{'doc_obj'}->associate_file($real_dir, $assoc_dir, $mime_type);
187 }
188 $self->{'doc_obj'}->delete_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
189
190 # process the document
191 $self->{'processor'}->process($self->{'doc_obj'}, $self->{'file'});
192}
193
194
1951;
196
197
Note: See TracBrowser for help on using the repository browser.