source: main/trunk/greenstone2/perllib/plugins/RogPlugin.pm@ 24548

Last change on this file since 24548 was 24548, checked in by ak19, 13 years ago

Part 2 of previous commit (r24547). Added new abstract plugin MetadataRead? that defines can_process_this_file_for_metadata that MetadataPlugin? subclasses can inherit (if MetadataRead? is listed first in the ISA inheritance list) and which will then override the one defined in BasePlugin?. For now committing MARC, ISIS and OAIPlugins which now additionally inherit from MetadataRead?. Other metadataPlugins also need to be committed.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.5 KB
Line 
1###########################################################################
2#
3# RogPlugin.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document from .rog or .mdb files
27
28package RogPlugin;
29
30use BasePlugin;
31use MetadataRead;
32use sorttools;
33use doc;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37
38sub BEGIN {
39 @RogPlugin::ISA = ('MetadataRead', 'BasePlugin');
40}
41
42my $arguments =
43 [ { 'name' => "process_exp",
44 'desc' => "{BasePlugin.process_exp}",
45 'type' => "regexp",
46 'reqd' => "no",
47 'deft' => &get_default_process_exp() },
48 ];
49
50my $options = { 'name' => "RogPlugin",
51 'desc' => "{RogPlugin.desc}",
52 'abstract' => "no",
53 'inherits' => "yes",
54 'args' => $arguments };
55
56sub new {
57 my ($class) = shift (@_);
58 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
59 push(@$pluginlist, $class);
60
61 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
62 push(@{$hashArgOptLists->{"OptList"}},$options);
63
64 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
65
66 return bless $self, $class;
67}
68
69
70# This plugin processes files with the suffix ".mdb" or ".rog"
71sub get_default_process_exp {
72 return q^(?i)\.(mdb|rog)$^;
73}
74
75sub read_rog_record
76{
77 my ($self,$file_buffer, $seclevel) = @_;
78
79 my $next_line = $file_buffer->{'next_line'};
80
81 return 0 if (!defined $next_line);
82
83 if ($next_line eq "")
84 {
85 my $line;
86 while(defined($line=<FILE>))
87 {
88 $line =~ s/\r$//;
89 $file_buffer->{'line_no'}++;
90 next if ($line =~ m/^\#/);
91 $next_line = $line;
92 last;
93 }
94 }
95
96 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
97 {
98 print STDERR "Error: Malformed Rog file: $next_line";
99 return 0;
100 }
101 else
102 {
103 # init default values
104 $file_buffer->{'song'}->{'tempo'} = 120;
105 $file_buffer->{'song'}->{'ks_type'} = 0;
106 $file_buffer->{'song'}->{'ks_num'} = 0;
107 $file_buffer->{'song'}->{'metadata'} = [];
108 $file_buffer->{'song'}->{'content'} = "";
109
110 $file_buffer->{'song'}->{'subcol'} = $2;
111 $file_buffer->{'song'}->{'title'} = $4;
112 $file_buffer->{'song'}->{'tval'} = $6;
113
114 chomp($next_line);
115 my $content = $next_line;
116 if (defined $seclevel)
117 {
118 $content.= " $seclevel";
119 }
120 $content .= "\n";
121
122 $file_buffer->{'song'}->{'content'} = $content;
123
124
125 my $line;
126 while(defined($line=<FILE>))
127 {
128 $line =~ s/\r$//;
129
130 $file_buffer->{'line_no'}++;
131 next if ($line =~ m/^\#/);
132
133 if ($line =~ m/^song/)
134 {
135 $file_buffer->{'next_line'} = $line;
136 return 1;
137 }
138 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
139 {
140 $file_buffer->{'song'}->{'tempo'} = $2;
141 $file_buffer->{'song'}->{'content'} .= $line;
142 }
143 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
144 {
145 $file_buffer->{'song'}->{'ks_type'} = $2;
146 $file_buffer->{'song'}->{'ks_num'} = $4;
147 $file_buffer->{'song'}->{'content'} .= $line;
148 }
149 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
150 {
151 $file_buffer->{'song'}->{'ts_numer'} = $2;
152 $file_buffer->{'song'}->{'ts_denom'} = $4;
153 $file_buffer->{'song'}->{'content'} .= $line;
154 }
155 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
156 {
157 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
158 $file_buffer->{'song'}->{'content'} .= $line;
159 }
160 else
161 {
162 $file_buffer->{'song'}->{'content'} .= $line;
163 }
164 }
165
166 $file_buffer->{'next_line'} = undef;
167 }
168
169 return 1;
170}
171
172sub process_rog_record
173{
174 my ($self,$file,$metadata,$song,$processor) = @_;
175
176 # create a new document
177 my $doc_obj = new doc ($file, "indexed_doc", $self->{'file_rename_method'});
178 my $cursection = $doc_obj->get_top_section();
179 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
180
181 my $title = $song->{'title'};
182 my $title_safe = $title;
183 $title_safe =~ s/\'/\\\\&apos;/g;
184
185 # add metadata
186 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
187 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
188 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
189 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
190 $doc_obj->add_metadata($cursection, "Title", $title);
191 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
192 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
193 $doc_obj->add_metadata($cursection, "FileFormat", "Rog");
194 $doc_obj->add_metadata($cursection, "FileSize", (-s $file));
195
196 foreach my $md ( @{$song->{'metadata'}} )
197 {
198 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
199 }
200
201 # add contents as text
202 $doc_obj->add_text($cursection,$song->{'content'});
203
204 $self->extra_metadata($doc_obj,$cursection, $metadata);
205
206 # add OID
207 $self->add_OID($doc_obj);
208
209 my $oid = $doc_obj->get_OID();
210 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
211
212 $doc_obj->add_utf8_metadata ($cursection, "audiolink", $appletlink);
213 $doc_obj->add_utf8_metadata ($cursection, "audioicon", "_iconaudio_");
214 $doc_obj->add_utf8_metadata ($cursection, "/audiolink", "</a>");
215
216 # process the document
217 $processor->process($doc_obj);
218}
219
220# return number of files processed, undef if can't process
221# Note that $base_dir might be "" and that $file might
222# include directories
223sub read {
224 my $self = shift (@_);
225 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
226
227 my $filename = &util::filename_cat($base_dir, $file);
228
229 return undef unless ($filename =~ /\.((rog|mdb)(\.gz)?)$/i && (-e $filename));
230
231 my $gz = (defined $3) ? 1: 0;
232
233 print STDERR "<Processing n='$file' p='RogPlugin'>\n" if ($gli);
234 print STDERR "RogPlugin: processing $filename\n" if $processor->{'verbosity'};
235
236 if ($gz) {
237 open (FILE, "zcat $filename |")
238 || die "RogPlugin::read - zcat can't open $filename\n";
239 } else {
240 open (FILE, $filename)
241 || die "RogPlugin::read - can't open $filename\n";
242 }
243
244 my $doc_count = 0;
245 my $dot_count = 0;
246 my $file_buffer = { line_no => 0, next_line => "", song => {} };
247
248 while ($self->read_rog_record($file_buffer))
249 {
250 $self->process_rog_record($file,$metadata,$file_buffer->{'song'},$processor);
251 $doc_count++;
252
253 last if ($maxdocs !=-1 && ($total_count+$doc_count) >= $maxdocs);
254
255 if (($doc_count % 10) == 0)
256 {
257 print STDERR ".";
258 $dot_count++;
259 print STDERR "\n" if (($dot_count % 80) == 0);
260 }
261 }
262
263 close FILE;
264
265 print STDERR "\n";
266
267 $self->{'num_processed'} = $doc_count;
268
269 return 1; # processed the file
270}
271
2721;
273
274
275
276
277
278
279
280
281
282
283
Note: See TracBrowser for help on using the repository browser.