source: trunk/gsdl/perllib/plugins/UnknownPlug.pm@ 6812

Last change on this file since 6812 was 6408, checked in by jmt12, 20 years ago

Added two new attributes for script arguments. HiddenGLI controls whether the argument will be visible at all in GLI, while ModeGLI defines the lowest detail mode under which the argument will be visible (only really for import and buildcol). Also ensured that the scripts were reporting their correct default process expressions, and further refined argument types by adding the catagory regexp for any regular expression (which can then be hidden under lower detail modes in GLI)

  • Property svn:keywords set to Author Date Id Revision
File size: 7.6 KB
Line 
1###########################################################################
2#
3# UnknownPlug.pm -- Plugin for files you know about but Greenstone doesn't
4#
5# A component of the Greenstone digital library software from the New
6# Zealand Digital Library Project at the University of Waikato, New
7# Zealand.
8#
9# Copyright (C) 2001 Gordon W. Paynter
10# Copyright (C) 2001 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# UnknownPlug - a plugin for unknown files
29
30# This is a simple Plugin for importing files in formats that
31# Greenstone doesn't know anything about. A fictional document will
32# be created for every such file, and the file itself will be passed
33# to Greenstone as the "associated file" of the document.
34
35# Here's an example where it is useful: I have a collection of
36# pictures that include a couple of quicktime movie files with names
37# like DCP_0163.MOV. Rather than write a new plugin for quicktime
38# movies, I add this line to the collection configuration file:
39
40# plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"
41
42# A document is created for each movie, with the associated movie
43# file's name in the "movie" metadata field. In the collection's
44# format strings, I use the {If} macro to output different text for
45# each type of file, like this:
46
47# {If}{[movie],<HTML for displaying movie>}{If}{[Image],<HTML for displaying image>}
48
49# You can also add extra metadata, such as the Title, Subject, and
50# Duration, with metadata.xml files and RecPlug. (If you want to use
51# UnknownPlug with more than one type of file, you will have to add
52# some sort of distinguishing metadata in this way.)
53
54
55
56package UnknownPlug;
57
58use BasPlug;
59use parsargv;
60
61
62sub BEGIN {
63 @ISA = ('BasPlug');
64}
65
66my $arguments =
67 [ { 'name' => "assoc_field",
68 'desc' => "{UnknownPlug.assoc_field}",
69 'type' => "string",
70 'deft' => "",
71 'reqd' => "no" } ,
72 { 'name' => "file_type",
73 'desc' => "{UnknownPlug.file_type}",
74 'type' => "string",
75 'deft' => "",
76 'reqd' => "no" } ];
77
78my $options = { 'name' => "UnknownPlug",
79 'desc' => "{UnknownPlug.desc}",
80 'abstract' => "no",
81 'inherits' => "yes",
82 'args' => $arguments };
83
84# sub print_usage {
85# print STDERR "\n usage: plugin UnknownPlug [options]\n\n";
86# print STDERR " options:\n";
87# print STDERR " -assoc_field\t Name of the metadata field that will hold\n";
88# print STDERR "\t\t the associated file's name.\n";
89# print STDERR "\n";
90# print STDERR " -file_type\t Mime type of the file (e.g. image/gif)\n";
91# print STDERR "\n";
92# }
93
94sub new {
95 my ($class) = @_;
96 my $self = new BasPlug ($class, @_);
97 $self->{'plugin_type'} = "UnknownPlug";
98 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
99 my $option_list = $self->{'option_list'};
100 push( @{$option_list}, $options );
101
102 if (!parsargv::parse(\@_,
103 q^assoc_field/.*/^, \$self->{'assoc_field'},
104 q^file_type/.*/^, \$self->{'file_type'},
105 "allow_extra_options")) {
106 print STDERR "\nIncorrect options passed to UnknownPlug, check your collect.cfg configuration file\n";
107 $self->print_txt_usage(""); # Use default resource bundle
108 die "\n";
109 }
110
111
112 return bless $self, $class;
113}
114
115sub get_default_process_exp {
116 return '';
117}
118
119
120# Associate the unknown file with the new document
121
122sub associate_unknown_file {
123 my $self = shift (@_);
124 my $filename = shift (@_); # filename with full path
125 my $file = shift (@_); # filename without path
126 my $doc_obj = shift (@_);
127
128 my $verbosity = $self->{'verbosity'};
129 my $outhandle = $self->{'outhandle'};
130
131 # check the filename is okay
132 return 0 if ($file eq "" || $filename eq "");
133
134 # if ($filename =~ m/ /) {
135 # print $outhandle "UnknownPlug: \"$filename\" contains a space. choking.\n";
136 # return undef;
137 # }
138
139 # Add the image metadata
140 my $url = $file;
141 $url =~ s/ /%20/g;
142
143 # Add the file as an associated file ...
144 my $section = $doc_obj->get_top_section();
145 my $file_type = $self->{'file_type'} || "unknown/unknown";
146 my $assoc_field = $self->{'assoc_field'} || "unknown_file";
147
148 $doc_obj->associate_file($filename, $file, $mime_type, $section);
149 $doc_obj->add_metadata ($section, $assoc_field, $file);
150
151
152 $doc_obj->add_metadata ($section, "srclink",
153 "<a href=_httpcollection_/index/assoc/[assocfilepath]/[$assoc_field]>");
154 $doc_obj->add_metadata ($section, "/srclink", "</a>");
155
156 return 1;
157}
158
159
160
161# The UnknownPlug read() function. This function does all the right
162# things to make general options work for a given plugin. UnknownPlug
163# overrides read() because there is no need to read the actual text of
164# the file in, because the contents of the file is not text...
165#
166#
167# Return number of files processed, undef if can't process
168#
169# Note that $base_dir might be "" and that $file might include directories
170
171sub read {
172 my $self = shift (@_);
173 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
174
175 my $outhandle = $self->{'outhandle'};
176
177 # Make sure we're processing the correct file
178 my $filename = &util::filename_cat($base_dir, $file);
179 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
180 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
181 return undef;
182 }
183 print STDERR "<Processing n='$file' p='UnknownPlug'>\n" if ($gli);
184 print $outhandle "UnknownPlug processing \"$filename\"\n"
185 if $self->{'verbosity'} > 1;
186
187 #if there's a leading directory name, eat it...
188 $file =~ s/^.*[\/\\]//;
189
190 # create a new document
191 my $doc_obj = new doc ($filename, "indexed_doc");
192 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
193 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "$self->{'plugin_type'}", "1");
194
195 # associate the file with the document
196 if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)
197 {
198 print "UnknownPlug: couldn't process \"$filename\"\n";
199 return 0;
200 }
201
202 #create an empty text string so we don't break downstream plugins
203 my $text = "Dummy text inserted by UnknownPlug...";
204
205 # include any metadata passed in from previous plugins
206 my $section = $doc_obj->get_top_section();
207 $self->extra_metadata ($doc_obj, $section, $metadata);
208
209 # do plugin specific processing of doc_obj
210 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir,
211 $file, $metadata, $doc_obj));
212
213 # do any automatic metadata extraction
214 $self->auto_extract_metadata ($doc_obj);
215
216 # add an OID
217 $doc_obj->set_OID();
218 $doc_obj->add_text($section, $text);
219
220 # process the document
221 $processor->process($doc_obj);
222
223 return 1;
224}
225
226
227# UnknownPlug processing of doc_obj. In practice we don't need to do
228# anything here because the read function takes care of everything.
229
230sub process {
231 my $self = shift (@_);
232 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
233 my $outhandle = $self->{'outhandle'};
234
235 return 1;
236}
237
238
2391;
240
241
242
243
244
245
246
247
248
249
250
Note: See TracBrowser for help on using the repository browser.