source: trunk/gsdl/perllib/plugins/GAPlug.pm@ 3073

Last change on this file since 3073 was 2925, checked in by sjboddie, 22 years ago

Altered the format of the GreenstoneArchive and GreenstoneDirectoryMetadata
XML files slightly (they're now called Archive and DirectoryMetadata
respectively).

  • Property svn:keywords set to Author Date Id Revision
File size: 4.4 KB
Line 
1###########################################################################
2#
3# GAPlug.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2001 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# Processes Greenstone Archive XML documents. Note that this plugin does no
27# syntax checking (though the XML::Parser module tests for well-formedness).
28# It's assumed that the Archive files conform to their DTD.
29
30package GAPlug;
31
32use XMLPlug;
33
34sub BEGIN {
35 @ISA = ('XMLPlug');
36}
37
38sub new {
39 my $class = shift (@_);
40 my $self = new XMLPlug ($class, @_);
41
42 $self->{'section'} = "";
43 $self->{'section_level'} = 0;
44 $self->{'metadata_name'} = "";
45 $self->{'metadata_value'} = "";
46 $self->{'content'} = "";
47
48 return bless $self, $class;
49}
50
51sub xml_start_document {
52}
53
54sub xml_end_document {
55}
56
57sub xml_doctype {
58 my $self = shift(@_);
59 my ($expat, $name, $sysid, $pubid, $internal) = @_;
60
61 # allow the short-lived and badly named "GreenstoneArchive" files to be processed
62 # as well as the "Archive" files which should now be created by import.pl
63 die "" if ($name !~ /^(Greenstone)?Archive$/);
64
65 my $outhandle = $self->{'outhandle'};
66 print $outhandle "GAPLug: processing $self->{'file'}\n";
67}
68
69
70sub xml_start_tag {
71 my $self = shift(@_);
72 my ($expat, $element) = @_;
73
74 $self->{'element'} = $element;
75 if ($element eq "Section") {
76 if ($self->{'section_level'} == 0) {
77 $self->open_document();
78 } else {
79 my $doc_obj = $self->{'doc_obj'};
80 $self->{'section'} =
81 $doc_obj->insert_section($doc_obj->get_end_child($self->{'section'}));
82 }
83 $self->{'section_level'} ++;
84 }
85 elsif ($element eq "Metadata") {
86 $self->{'metadata_name'} = $_{'name'};
87 }
88}
89
90sub xml_end_tag {
91 my $self = shift(@_);
92 my ($expat, $element) = @_;
93
94 if ($element eq "Section") {
95 $self->{'section_level'} --;
96 $self->{'section'} = $self->{'doc_obj'}->get_parent_section ($self->{'section'});
97 $self->close_document() if $self->{'section_level'} == 0;
98 }
99 elsif ($element eq "Metadata") {
100 $self->{'doc_obj'}->add_utf8_metadata($self->{'section'}, $self->{'metadata_name'},
101 $self->{'metadata_value'});
102 $self->{'metadata_name'} = "";
103 $self->{'metadata_value'} = "";
104 }
105 elsif ($element eq "Content" && $self->{'content'} ne "") {
106 $self->{'doc_obj'}->add_utf8_text($self->{'section'}, $self->{'content'});
107 $self->{'content'} = "";
108 }
109
110 $self->{'element'} = "";
111}
112
113sub xml_text {
114 my $self = shift(@_);
115 my ($expat) = @_;
116
117 if ($self->{'element'} eq "Metadata") {
118 $self->{'metadata_value'} .= $_;
119 }
120 elsif ($self->{'element'} eq "Content") {
121 $self->{'content'} .= $_;
122 }
123}
124
125sub open_document {
126 my $self = shift(@_);
127
128 # create a new document
129 $self->{'doc_obj'} = new doc ();
130 $self->{'section'} = "";
131}
132
133sub close_document {
134 my $self = shift(@_);
135
136 # add the associated files
137 my $assoc_files =
138 $self->{'doc_obj'}->get_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
139
140 my $parent_dir = $self->{'filename'};
141 $parent_dir =~ s/[^\\\/]*$//;
142
143 foreach my $assoc_file_info (@$assoc_files) {
144 my ($assoc_file, $mime_type, $dir) = split (":", $assoc_file_info);
145 my $real_dir = &util::filename_cat($parent_dir, $assoc_file),
146 my $assoc_dir = (defined $dir && $dir ne "")
147 ? &util::filename_cat($dir, $assoc_file) : $assoc_file;
148 $self->{'doc_obj'}->associate_file($real_dir, $assoc_dir, $mime_type);
149 }
150 $self->{'doc_obj'}->delete_metadata($self->{'doc_obj'}->get_top_section(), "gsdlassocfile");
151
152 # process the document
153 $self->{'processor'}->process($self->{'doc_obj'}, $self->{'file'});
154}
155
156
1571;
Note: See TracBrowser for help on using the repository browser.