source: trunk/gsdl/perllib/plugins/ProCitePlug.pm@ 10254

Last change on this file since 10254 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1###########################################################################
2#
3# ProCitePlug.pm -- A plugin for (exported) ProCite databases
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 1999-2004 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package ProCitePlug;
28
29
30use multiread;
31use SplitPlug;
32
33use strict;
34no strict 'refs'; # allow filehandles to be variables and viceversa
35
36# ProCitePlug is a sub-class of SplitPlug
37sub BEGIN {
38 @ProCitePlug::ISA = ('SplitPlug');
39}
40
41
42my $arguments =
43 [ { 'name' => "process_exp",
44 'desc' => "{BasPlug.process_exp}",
45 'type' => "regexp",
46 'reqd' => "no",
47 'deft' => &get_default_process_exp() },
48 { 'name' => "split_exp",
49 'desc' => "{SplitPlug.split_exp}",
50 'type' => "regexp",
51 'deft' => &get_default_split_exp(),
52 'reqd' => "no" }
53 ];
54
55my $options = { 'name' => "ProCitePlug",
56 'desc' => "{ProCitePlug.desc}",
57 'abstract' => "no",
58 'inherits' => "yes",
59 'args' => $arguments };
60
61
62# This plugin processes exported ProCite files with the suffix ".txt"
63sub get_default_process_exp
64{
65 return q^(?i)(\.txt)$^;
66}
67
68
69# This plugin splits the input text at every line
70sub get_default_split_exp
71{
72 return q^\n^;
73}
74
75
76sub new
77{
78 my ($class) = shift (@_);
79 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
80 push(@$pluginlist, $class);
81
82 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
83 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
84
85 my $self = (defined $hashArgOptLists)? new SplitPlug($pluginlist,$inputargs,$hashArgOptLists): new SplitPlug($pluginlist,$inputargs);
86
87 return bless $self, $class;
88}
89
90
91my %crazy_workform_mapping =
92 ( "A", "Book, Long Form",
93 "B", "Book, Short Form",
94 "C", "Journal, Long Form",
95 "D", "Journal, Short Form",
96 "E", "Report",
97 "F", "Newspaper",
98 "G", "Dissertation",
99 "H", "Trade Catalog",
100 "I", "Letter (Correspondence)",
101 "J", "Manuscript",
102 "K", "Conference Proceedings",
103 "L", "Map",
104 "M", "Music Score",
105 "N", "Sound Recording",
106 "O", "Motion Picture",
107 "P", "Audiovisual Material",
108 "Q", "Video Recording",
109 "R", "Art Work",
110 "S", "Computer Program",
111 "T", "Data File" );
112
113
114sub read_file
115{
116 my $self = shift (@_);
117 my ($filename, $encoding, $language, $textref) = @_;
118
119 # Store the workform definitions for this file
120 my %workform_definitions = ();
121
122 # Read the contents of the file into $textref
123 open(PROCITE_FILE, "<$filename");
124 my $reader = new multiread();
125 $reader->set_handle ('ProCitePlug::PROCITE_FILE');
126 $reader->set_encoding ($encoding);
127 $reader->read_file ($textref);
128 close(PROCITE_FILE);
129
130 # Read the workform definitions at the start of the file
131 while ($$textref =~ /^\<Workform Definition\>/) {
132 # Remove the workform definition line so it is not processed later as a record
133 $$textref =~ s/^\<Workform Definition\>(.*)\n//;
134 my $workform_definition = $1;
135
136 # Parse the workform definitions and store them for later
137 $workform_definition =~ s/^\"([^\"]*)\",//;
138 my $workform_name = $1;
139
140 my @workform_values;
141 while ($workform_definition !~ /^$/) {
142 $workform_definition =~ s/^\"([^\"]*)\",?//;
143 my $workform_field = $1;
144 push(@workform_values, $workform_field);
145 }
146
147 # Remember this workform definition for when we're reading the records
148 $workform_definitions{$workform_name} = \@workform_values;
149 }
150
151 $self->{'workform_definitions'}->{$filename} = \%workform_definitions;
152}
153
154
155sub process
156{
157 my $self = shift (@_);
158 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
159
160 my $outhandle = $self->{'outhandle'};
161 my $filename = &util::filename_cat($base_dir, $file);
162 my $cursection = $doc_obj->get_top_section();
163 # Report that we're processing the file
164 print STDERR "<Processing n='$file' p='ProCitePlug'>\n" if ($gli);
165 print $outhandle "ProCitePlug: processing $file\n"
166 if ($self->{'verbosity'}) > 1;
167
168 # Build up an HTML view of the record for easy display at run-time
169 my $html_record = "";
170
171 # Read the record's workform indicator and record number
172 $$textref =~ s/^\"([^\"]*)\",\"([^\"]*)\",//;
173 my $workform_indicator = $1;
174 my $recordnum = $2;
175
176 # If necessary, map the workform indicator into something useful
177 if ($crazy_workform_mapping{$workform_indicator}) {
178 $workform_indicator = $crazy_workform_mapping{$workform_indicator};
179 }
180
181 # Check we know about the workform of this record
182 my %workform_definitions = %{$self->{'workform_definitions'}->{$filename}};
183 if (!$workform_definitions{$workform_indicator}) {
184 print STDERR "Unknown workform!\n";
185 return 0;
186 }
187
188 # Store the full record as the document text
189 $doc_obj->add_utf8_text($cursection, $$textref);
190
191 # Store workform and record number as metadata
192 $doc_obj->add_utf8_metadata($cursection, "pc.Workform", $workform_indicator);
193 $doc_obj->add_utf8_metadata($cursection, "pc.RecordNumber", $recordnum);
194
195 # Store FileFormat metadata
196 $doc_obj->add_metadata($cursection, "FileFormat", "ProCite");
197
198 $html_record .= "<tr><td valign=top><b>Record Number: </b></td><td valign=top>$recordnum</td></tr>";
199
200 my @workform_values = @{$workform_definitions{$workform_indicator}};
201
202 # Read each field (surrounded by quotes) of the record
203 my $fieldnum = 0;
204 while ($$textref !~ /^$/) {
205 $$textref =~ s/^\"([^\"]*)\",?//;
206 my $field_value_raw = $1;
207
208 # Add non-empty metadata values to the document
209 unless ($field_value_raw eq "") {
210 # Add the display name of the metadata field for format statement convenience
211 my $field_name = $workform_values[$fieldnum];
212 unless ($field_name eq "---") {
213 my $meta_name = "pc.Field" . ($fieldnum + 1) . "Name";
214 $doc_obj->add_utf8_metadata($cursection, $meta_name, $field_name);
215 }
216
217 $html_record .= "<tr><td valign=top><b>$field_name: </b></td><td valign=top>";
218
219 # Multiple metadata values are separated with "//"
220 foreach my $field_value (split(/\/\//, $field_value_raw)) {
221 my $meta_name = "pc.Field" . ($fieldnum + 1) . "Value";
222 $doc_obj->add_utf8_metadata($cursection, $meta_name, $field_value);
223
224 $html_record .= $field_value . "<br>";
225 }
226
227 $html_record .= "</td></tr>";
228 }
229
230 $fieldnum++;
231 }
232
233 # Store HTML view of record as metadata
234 $doc_obj->add_utf8_metadata($cursection, "HTMLDisplay", $html_record);
235
236 # Record was processed successfully
237 return 1;
238}
239
240
2411;
Note: See TracBrowser for help on using the repository browser.