source: trunk/gsdl/perllib/plugins/UnknownPlug.pm@ 10254

Last change on this file since 10254 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:keywords set to Author Date Id Revision
File size: 8.2 KB
Line 
1###########################################################################
2#
3# UnknownPlug.pm -- Plugin for files you know about but Greenstone doesn't
4#
5# A component of the Greenstone digital library software from the New
6# Zealand Digital Library Project at the University of Waikato, New
7# Zealand.
8#
9# Copyright (C) 2001 Gordon W. Paynter
10# Copyright (C) 2001 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# UnknownPlug - a plugin for unknown files
29
30# This is a simple Plugin for importing files in formats that
31# Greenstone doesn't know anything about. A fictional document will
32# be created for every such file, and the file itself will be passed
33# to Greenstone as the "associated file" of the document.
34
35# Here's an example where it is useful: I have a collection of
36# pictures that include a couple of quicktime movie files with names
37# like DCP_0163.MOV. Rather than write a new plugin for quicktime
38# movies, I add this line to the collection configuration file:
39
40# plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"
41
42# A document is created for each movie, with the associated movie
43# file's name in the "movie" metadata field. In the collection's
44# format strings, I use the {If} macro to output different text for
45# each type of file, like this:
46
47# {If}{[movie],<HTML for displaying movie>}{If}{[Image],<HTML for displaying image>}
48
49# You can also add extra metadata, such as the Title, Subject, and
50# Duration, with metadata.xml files and RecPlug. (If you want to use
51# UnknownPlug with more than one type of file, you will have to add
52# some sort of distinguishing metadata in this way.)
53
54
55
56package UnknownPlug;
57
58use BasPlug;
59
60use strict;
61no strict 'refs'; # allow filehandles to be variables and viceversa
62
63sub BEGIN {
64 @UnknownPlug::ISA = ('BasPlug');
65}
66
67my $arguments =
68 [ { 'name' => "assoc_field",
69 'desc' => "{UnknownPlug.assoc_field}",
70 'type' => "string",
71 'deft' => "",
72 'reqd' => "no" },
73 { 'name' => "file_format",
74 'desc' => "{UnknownPlug.file_format}",
75 'type' => "string",
76 'deft' => "",
77 'reqd' => "no" },
78 { 'name' => "mime_type",
79 'desc' => "{UnknownPlug.mime_type}",
80 'type' => "string",
81 'deft' => "",
82 'reqd' => "no" },
83 { 'name' => "process_extension",
84 'desc' => "{UnknownPlug.process_extension}",
85 'type' => "string",
86 'deft' => "",
87 'reqd' => "no" } ];
88
89my $options = { 'name' => "UnknownPlug",
90 'desc' => "{UnknownPlug.desc}",
91 'abstract' => "no",
92 'inherits' => "yes",
93 'args' => $arguments };
94
95
96sub new {
97 my ($class) = shift (@_);
98 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
99 push(@$pluginlist, $class);
100
101 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
102 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
103
104 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
105
106 # "-process_extension" is a simpler alternative to -process_exp for non-regexp people
107 if (!$self->{'process_exp'} && $self->{'process_extension'}) {
108 $self->{'process_exp'} = "\\." . $self->{'process_extension'} . "\$";
109 }
110
111 return bless $self, $class;
112}
113
114sub get_default_process_exp {
115 return '';
116}
117
118
119# Associate the unknown file with the new document
120
121sub associate_unknown_file {
122 my $self = shift (@_);
123 my $filename = shift (@_); # filename with full path
124 my $file = shift (@_); # filename without path
125 my $doc_obj = shift (@_);
126
127 my $verbosity = $self->{'verbosity'};
128 my $outhandle = $self->{'outhandle'};
129
130 # check the filename is okay
131 return 0 if ($file eq "" || $filename eq "");
132
133 # Add the image metadata
134 my $url = $file;
135 $url =~ s/ /%20/g;
136
137 # Add the file as an associated file ...
138 my $section = $doc_obj->get_top_section();
139 my $file_format = $self->{'file_format'} || "unknown";
140 my $mime_type = $self->{'mime_type'} || "unknown/unknown";
141 my $assoc_field = $self->{'assoc_field'} || "unknown_file";
142
143 $doc_obj->associate_file($filename, $file, $mime_type, $section);
144 $doc_obj->add_metadata ($section, "FileFormat", $file_format);
145 $doc_obj->add_metadata ($section, $assoc_field, $file);
146
147 $doc_obj->add_metadata ($section, "srclink",
148 "<a href=\"_httpcollection_/index/assoc/[assocfilepath]/[$assoc_field]\">");
149 $doc_obj->add_metadata ($section, "srcicon", "_iconunknown_");
150 $doc_obj->add_metadata ($section, "/srclink", "</a>");
151
152 return 1;
153}
154
155
156
157# The UnknownPlug read() function. This function does all the right
158# things to make general options work for a given plugin. UnknownPlug
159# overrides read() because there is no need to read the actual text of
160# the file in, because the contents of the file is not text...
161#
162#
163# Return number of files processed, undef if can't process
164#
165# Note that $base_dir might be "" and that $file might include directories
166
167sub read {
168 my $self = shift (@_);
169 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
170
171 my $outhandle = $self->{'outhandle'};
172
173 # Make sure we're processing the correct file
174 my $filename = &util::filename_cat($base_dir, $file);
175 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
176 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
177 return undef;
178 }
179 print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);
180 print $outhandle "UnknownPlug processing \"$filename\"\n"
181 if $self->{'verbosity'} > 1;
182
183 #if there's a leading directory name, eat it...
184 $file =~ s/^.*[\/\\]//;
185
186 # create a new document
187 my $doc_obj = new doc ($filename, "indexed_doc");
188 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
189 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
190 $doc_obj->add_metadata($doc_obj->get_top_section(), "Source", &ghtml::dmsafe($file)); # set the filename as Source metadata to be consistent with other plugins
191 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "FileSize", (-s $filename));
192
193 # associate the file with the document
194 if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)
195 {
196 if ($gli) {
197 print STDERR "<ProcessingError n='$file'>\n";
198 }
199 print $outhandle "UnknownPlug: couldn't process \"$filename\"\n";
200 return -1; # error during processing
201 }
202
203 #create an empty text string so we don't break downstream plugins
204 my $text = &gsprintf::lookup_string("{BasPlug.dummy_text}");
205
206 # include any metadata passed in from previous plugins
207 my $section = $doc_obj->get_top_section();
208 $self->extra_metadata ($doc_obj, $section, $metadata);
209
210 $self->title_fallback($doc_obj,$section,$file);
211
212 # do plugin specific processing of doc_obj
213 unless (defined ($self->process(\$text, $pluginfo, $base_dir, $file, $metadata, $doc_obj))) {
214 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
215 return -1;
216 }
217
218 # do any automatic metadata extraction
219 $self->auto_extract_metadata ($doc_obj);
220
221 # add an OID
222 $doc_obj->set_OID();
223 $doc_obj->add_text($section, $text);
224
225 # process the document
226 $processor->process($doc_obj);
227
228 $self->{'num_processed'} ++;
229 return 1;
230}
231
232
233# UnknownPlug processing of doc_obj. In practice we don't need to do
234# anything here because the read function takes care of everything.
235
236sub process {
237 my $self = shift (@_);
238 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
239 my $outhandle = $self->{'outhandle'};
240
241 return 1;
242}
243
244
2451;
246
247
248
249
250
251
252
253
254
255
256
Note: See TracBrowser for help on using the repository browser.