source: trunk/gsdl/perllib/plugins/GAPlug.pm@ 4744

Last change on this file since 4744 was 3629, checked in by jrm21, 21 years ago

need to look for associated files in the assocfilepath, if this metadata
exists. This is not necessarily the same directory as doc.xml is in -
for example if using the -groupsize option to put more than one doc in
each .xml file.

  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1###########################################################################
2#
3# GAPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Processes GreenstoneArchive XML documents. Note that this plugin does no
27# syntax checking (though the XML::Parser module tests for
28# well-formedness). It's assumed that the GreenstoneArchive files conform
29# to their DTD.
30
31# 12/05/02 Added usage datastructure - John Thompson
32
33package GAPlug;
34
35use XMLPlug;
36
37sub BEGIN {
38 @ISA = ('XMLPlug');
39}
40
41my $options = { 'name' => "GAPlug",
42 'desc' => "Processes GreenstoneArchive XML documents. Note that this plugin does no syntax checking (though the XML::Parser module tests for well-formedness). It's assumed that the GreenstoneArchive files conform to their DTD.",
43 'inherits' => "yes" };
44
45sub new {
46 my $class = shift (@_);
47 my $self = new XMLPlug ($class, @_);
48
49 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
50 my $option_list = $self->{'option_list'};
51 push( @{$option_list}, $options );
52
53 $self->{'section'} = "";
54 $self->{'section_level'} = 0;
55 $self->{'metadata_name'} = "";
56 $self->{'metadata_value'} = "";
57 $self->{'content'} = "";
58
59 return bless $self, $class;
60}
61
62sub xml_start_document {
63}
64
65sub xml_end_document {
66}
67
68sub xml_doctype {
69 my $self = shift(@_);
70 my ($expat, $name, $sysid, $pubid, $internal) = @_;
71
72 # allow the short-lived and badly named "GreenstoneArchive" files to be processed
73 # as well as the "Archive" files which should now be created by import.pl
74 die "" if ($name !~ /^(Greenstone)?Archive$/);
75
76 my $outhandle = $self->{'outhandle'};
77 print $outhandle "GAPLug: processing $self->{'file'}\n";
78}
79
80
81sub xml_start_tag {
82 my $self = shift(@_);
83 my ($expat, $element) = @_;
84
85 $self->{'element'} = $element;
86 if ($element eq "Section") {
87 if ($self->{'section_level'} == 0) {
88 $self->open_document();
89 } else {
90 my $doc_obj = $self->{'doc_obj'};
91 $self->{'section'} =
92 $doc_obj->insert_section($doc_obj->get_end_child($self->{'section'}));
93 }
94 $self->{'section_level'} ++;
95 }
96 elsif ($element eq "Metadata") {
97 $self->{'metadata_name'} = $_{'name'};
98 }
99}
100
101sub xml_end_tag {
102 my $self = shift(@_);
103 my ($expat, $element) = @_;
104
105 if ($element eq "Section") {
106 $self->{'section_level'} --;
107 $self->{'section'} = $self->{'doc_obj'}->get_parent_section ($self->{'section'});
108 $self->close_document() if $self->{'section_level'} == 0;
109 }
110 elsif ($element eq "Metadata") {
111 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, $self->{'metadata_name'},
112 $self->{'metadata_value'});
113 $self->{'metadata_name'} = "";
114 $self->{'metadata_value'} = "";
115 }
116 elsif ($element eq "Content" && $self->{'content'} ne "") {
117 $self->{'doc_obj'}->add_utf8_text($self->{'section'}, $self->{'content'});
118 $self->{'content'} = "";
119 }
120
121 $self->{'element'} = "";
122}
123
124sub xml_text {
125 my $self = shift(@_);
126 my ($expat) = @_;
127
128 if ($self->{'element'} eq "Metadata") {
129 $self->{'metadata_value'} .= $_;
130 }
131 elsif ($self->{'element'} eq "Content") {
132 $self->{'content'} .= $_;
133 }
134}
135
136sub open_document {
137 my $self = shift(@_);
138
139 # create a new document
140 $self->{'doc_obj'} = new doc ();
141 $self->{'section'} = "";
142}
143
144sub close_document {
145 my $self = shift(@_);
146
147 # add the associated files
148 my $assoc_files =
149 $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
150
151 # for when "assocfilepath" isn't the same directory that doc.xml is in...
152 my $assoc_filepath_list= $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "assocfilepath");
153
154 my $assoc_filepath=shift (@$assoc_filepath_list);
155 if (defined ($assoc_filepath)) {
156 # make absolute rather than relative...
157 $self->{'filename'} =~ m@^(.*[\\/]archives)@;
158 $assoc_filepath = "$1/$assoc_filepath/";
159 } else {
160 $assoc_filepath = $self->{'filename'};
161 $assoc_filepath =~ s/[^\\\/]*$//;
162 }
163
164 foreach my $assoc_file_info (@$assoc_files) {
165 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
166 my $real_dir = &util::filename_cat($assoc_filepath, $assoc_file),
167 my $assoc_dir = (defined $dir && $dir ne "")
168 ? &util::filename_cat($dir, $assoc_file) : $assoc_file;
169 $self->{'doc_obj'}->associate_file($real_dir, $assoc_dir, $mime_type);
170 }
171 $self->{'doc_obj'}->delete_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
172
173 # process the document
174 $self->{'processor'}->process($self->{'doc_obj'}, $self->{'file'});
175}
176
177
1781;
179
180
Note: See TracBrowser for help on using the repository browser.