source: trunk/gsdl/perllib/plugins/UnknownPlug.pm@ 2996

Last change on this file since 2996 was 2883, checked in by paynter, 23 years ago

This Plugin can be used to import any file to Greenstone, regardless
of type, as an associated file.

  • Property svn:keywords set to Author Date Id Revision
File size: 6.5 KB
Line 
1###########################################################################
2#
3# UnknownPlug.pm -- Plugin for files you know about but Greenstone doesn't
4#
5# A component of the Greenstone digital library software from the New
6# Zealand Digital Library Project at the University of Waikato, New
7# Zealand.
8#
9# Copyright (C) 2001 Gordon W. Paynter
10# Copyright (C) 2001 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful, but
18# WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20# General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28# UnknownPlug - a plugin for unknown files
29
30# This is a simple Plugin for importing files in formats that
31# Greenstone doesn't know anything about. A fictional document will
32# be created for every such file, and the file itself will be passed
33# to Greenstone as the "associated file" of the document.
34
35# Here's an example where it is useful: I have a collection of
36# pictures that include a couple of quicktime movie files with names
37# like DCP_0163.MOV. Rather than write a new plugin for quicktime
38# movies, I add this line to the collection configuration file:
39
40# plugin UnknownPlug -process_exp "*.MOV" -assoc_field "movie"
41
42# A document is created for each movie, with the associated movie
43# file's name in the "movie" metadata field. In the collection's
44# format strings, I use the {If} macro to output different text for
45# each type of file, like this:
46
47# {If}{[movie],<HTML for displaying movie>}{If}{[Image],<HTML for displaying image>}
48
49# You can also add extra metadata, such as the Title, Subject, and
50# Duration, with metadata.xml files and RecPlug. (If you want to use
51# UnknownPlug with more than one type of file, you will have to add
52# some sort of distinguishing metadata in this way.)
53
54
55
56package UnknownPlug;
57
58use BasPlug;
59use parsargv;
60
61
62sub BEGIN {
63 @ISA = ('BasPlug');
64}
65
66sub print_usage {
67 print STDERR "\n usage: plugin UnknownPlug [options]\n\n";
68 print STDERR " options:\n";
69 print STDERR " -assoc_field\t Name of the metadata field that will hold\n";
70 print STDERR "\t\t the associated file's name.\n";
71 print STDERR "\n";
72 print STDERR " -file_type\t Mime type of the file (e.g. image/gif)\n";
73 print STDERR "\n";
74}
75
76sub new {
77 my ($class) = @_;
78 my $self = new BasPlug ($class, @_);
79
80 if (!parsargv::parse(\@_,
81 q^assoc_field/.*/^, \$self->{'assoc_field'},
82 q^file_type/.*/^, \$self->{'file_type'},
83 "allow_extra_options")) {
84 print STDERR "\nIncorrect options passed to UnknownPlug, check your collect.cfg configuration file\n";
85 &print_usage();
86 die "\n";
87 }
88
89
90 return bless $self, $class;
91}
92
93sub get_default_process_exp {
94 return '';
95}
96
97
98# Associate the unknown file with the new document
99
100sub associate_unknown_file {
101 my $self = shift (@_);
102 my $filename = shift (@_); # filename with full path
103 my $file = shift (@_); # filename without path
104 my $doc_obj = shift (@_);
105
106 my $verbosity = $self->{'verbosity'};
107 my $outhandle = $self->{'outhandle'};
108
109 # check the filename is okay
110 return 0 if ($file eq "" || $filename eq "");
111
112 if ($filename =~ m/ /) {
113 print $outhandle "UnknownPlug: \"$filename\" contains a space. choking.\n";
114 return undef;
115 }
116
117 # Add the file as an associated file ...
118 my $section = $doc_obj->get_top_section();
119 my $file_type = $self->{'file_type'} || "unknown/unknown";
120 my $assoc_field = $self->{'assoc_field'} || "unknown_file";
121
122 $doc_obj->associate_file($filename, $file, $mime_type, $section);
123 $doc_obj->add_metadata ($section, $assoc_field, $file);
124
125 return 1;
126}
127
128
129
130# The UnknownPlug read() function. This function does all the right
131# things to make general options work for a given plugin. UnknownPlug
132# overrides read() because there is no need to read the actual text of
133# the file in, because the contents of the file is not text...
134#
135#
136# Return number of files processed, undef if can't process
137#
138# Note that $base_dir might be "" and that $file might include directories
139
140sub read {
141 my $self = shift (@_);
142 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs) = @_;
143
144 my $outhandle = $self->{'outhandle'};
145
146 # Make sure we're processing the correct file
147 my $filename = &util::filename_cat($base_dir, $file);
148 return 0 if $self->{'block_exp'} ne "" && $filename =~ /$self->{'block_exp'}/;
149 if ($filename !~ /$self->{'process_exp'}/ || !-f $filename) {
150 return undef;
151 }
152 print $outhandle "UnknownPlug processing \"$filename\"\n"
153 if $self->{'verbosity'} > 1;
154
155 #if there's a leading directory name, eat it...
156 $file =~ s/^.*[\/\\]//;
157
158 # create a new document
159 my $doc_obj = new doc ($filename, "indexed_doc");
160 $doc_obj->set_OIDtype ($processor->{'OIDtype'});
161
162 # associate the file with the document
163 if (associate_unknown_file($self, $filename, $file, $doc_obj) != 1)
164 {
165 print "UnknownPlug: couldn't process \"$filename\"\n";
166 return 0;
167 }
168
169 #create an empty text string so we don't break downstream plugins
170 my $text = "Dummy text inserted by UnknownPlug...";
171
172 # include any metadata passed in from previous plugins
173 my $section = $doc_obj->get_top_section();
174 $self->extra_metadata ($doc_obj, $section, $metadata);
175
176 # do plugin specific processing of doc_obj
177 return undef unless defined ($self->process (\$text, $pluginfo, $base_dir,
178 $file, $metadata, $doc_obj));
179
180 # do any automatic metadata extraction
181 $self->auto_extract_metadata ($doc_obj);
182
183 # add an OID
184 $doc_obj->set_OID();
185 $doc_obj->add_text($section, $text);
186
187 # process the document
188 $processor->process($doc_obj);
189
190 return 1;
191}
192
193
194# UnknownPlug processing of doc_obj. In practice we don't need to do
195# anything here because the read function takes care of everything.
196
197sub process {
198 my $self = shift (@_);
199 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
200 my $outhandle = $self->{'outhandle'};
201
202 return 1;
203}
204
205
2061;
207
208
209
210
211
212
213
214
215
216
217
Note: See TracBrowser for help on using the repository browser.