source: gs2-extensions/parallel-building/trunk/src/perllib/plugins/RogPlugin.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 7.5 KB
Line 
1###########################################################################
2#
3# RogPlugin.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document from .rog or .mdb files
27
28package RogPlugin;
29
30use BasePlugin;
31use MetadataRead;
32use sorttools;
33use doc;
34
35use strict;
36no strict 'refs'; # allow filehandles to be variables and viceversa
37
38sub BEGIN {
39 @RogPlugin::ISA = ('MetadataRead', 'BasePlugin');
40}
41
42my $arguments =
43 [ { 'name' => "process_exp",
44 'desc' => "{BasePlugin.process_exp}",
45 'type' => "regexp",
46 'reqd' => "no",
47 'deft' => &get_default_process_exp() },
48 ];
49
50my $options = { 'name' => "RogPlugin",
51 'desc' => "{RogPlugin.desc}",
52 'abstract' => "no",
53 'inherits' => "yes",
54 'args' => $arguments };
55
56sub new {
57 my ($class) = shift (@_);
58 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
59 push(@$pluginlist, $class);
60
61 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
62 push(@{$hashArgOptLists->{"OptList"}},$options);
63
64 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
65
66 return bless $self, $class;
67}
68
69
70# This plugin processes files with the suffix ".mdb" or ".rog"
71sub get_default_process_exp {
72 return q^(?i)\.(mdb|rog)$^;
73}
74
75sub read_rog_record
76{
77 my ($self,$file_buffer, $seclevel) = @_;
78
79 my $next_line = $file_buffer->{'next_line'};
80
81 return 0 if (!defined $next_line);
82
83 if ($next_line eq "")
84 {
85 my $line;
86 while(defined($line=<FILE>))
87 {
88 $line =~ s/\r$//;
89 $file_buffer->{'line_no'}++;
90 next if ($line =~ m/^\#/);
91 $next_line = $line;
92 last;
93 }
94 }
95
96 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
97 {
98 print STDERR "Error: Malformed Rog file: $next_line";
99 return 0;
100 }
101 else
102 {
103 # init default values
104 $file_buffer->{'song'}->{'tempo'} = 120;
105 $file_buffer->{'song'}->{'ks_type'} = 0;
106 $file_buffer->{'song'}->{'ks_num'} = 0;
107 $file_buffer->{'song'}->{'metadata'} = [];
108 $file_buffer->{'song'}->{'content'} = "";
109
110 $file_buffer->{'song'}->{'subcol'} = $2;
111 $file_buffer->{'song'}->{'title'} = $4;
112 $file_buffer->{'song'}->{'tval'} = $6;
113
114 chomp($next_line);
115 my $content = $next_line;
116 if (defined $seclevel)
117 {
118 $content.= " $seclevel";
119 }
120 $content .= "\n";
121
122 $file_buffer->{'song'}->{'content'} = $content;
123
124
125 my $line;
126 while(defined($line=<FILE>))
127 {
128 $line =~ s/\r$//;
129
130 $file_buffer->{'line_no'}++;
131 next if ($line =~ m/^\#/);
132
133 if ($line =~ m/^song/)
134 {
135 $file_buffer->{'next_line'} = $line;
136 return 1;
137 }
138 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
139 {
140 $file_buffer->{'song'}->{'tempo'} = $2;
141 $file_buffer->{'song'}->{'content'} .= $line;
142 }
143 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
144 {
145 $file_buffer->{'song'}->{'ks_type'} = $2;
146 $file_buffer->{'song'}->{'ks_num'} = $4;
147 $file_buffer->{'song'}->{'content'} .= $line;
148 }
149 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
150 {
151 $file_buffer->{'song'}->{'ts_numer'} = $2;
152 $file_buffer->{'song'}->{'ts_denom'} = $4;
153 $file_buffer->{'song'}->{'content'} .= $line;
154 }
155 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
156 {
157 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
158 $file_buffer->{'song'}->{'content'} .= $line;
159 }
160 else
161 {
162 $file_buffer->{'song'}->{'content'} .= $line;
163 }
164 }
165
166 $file_buffer->{'next_line'} = undef;
167 }
168
169 return 1;
170}
171
172sub process_rog_record
173{
174 my ($self,$file,$metadata,$song,$processor) = @_;
175
176 # create a new document
177 my $doc_obj = new doc ($file, "indexed_doc", $self->{'file_rename_method'});
178 my $cursection = $doc_obj->get_top_section();
179 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
180
181 my $title = $song->{'title'};
182 my $title_safe = $title;
183 $title_safe =~ s/\'/\\\\&apos;/g;
184
185 # add metadata
186 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
187 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
188 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
189 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
190 $doc_obj->add_metadata($cursection, "Title", $title);
191 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
192 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
193 $doc_obj->add_metadata($cursection, "FileFormat", "Rog");
194 $doc_obj->add_metadata($cursection, "FileSize", (-s $file));
195
196 foreach my $md ( @{$song->{'metadata'}} )
197 {
198 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
199 }
200
201 # add contents as text
202 $doc_obj->add_text($cursection,$song->{'content'});
203
204 $self->extra_metadata($doc_obj,$cursection, $metadata);
205
206 # add OID
207 $self->add_OID($doc_obj);
208
209 my $oid = $doc_obj->get_OID();
210 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
211
212 $doc_obj->add_utf8_metadata ($cursection, "audiolink", $appletlink);
213 $doc_obj->add_utf8_metadata ($cursection, "audioicon", "_iconaudio_");
214 $doc_obj->add_utf8_metadata ($cursection, "/audiolink", "</a>");
215
216 # process the document
217 $processor->process($doc_obj);
218}
219
220# return number of files processed, undef if can't process
221# Note that $base_dir might be "" and that $file might
222# include directories
223sub read {
224 my $self = shift (@_);
225 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
226
227 my $filename = &util::filename_cat($base_dir, $file);
228
229 return undef unless ($filename =~ /\.((rog|mdb)(\.gz)?)$/i && (-e $filename));
230
231 my $gz = (defined $3) ? 1: 0;
232
233 print STDERR "<Processing n='$file' p='RogPlugin'>\n" if ($gli);
234 print STDERR "RogPlugin: processing $filename\n" if $processor->{'verbosity'};
235
236 if ($gz) {
237 open (FILE, "zcat $filename |")
238 || die "RogPlugin::read - zcat can't open $filename\n";
239 } else {
240 open (FILE, $filename)
241 || die "RogPlugin::read - can't open $filename\n";
242 }
243
244 my $doc_count = 0;
245 my $dot_count = 0;
246 my $file_buffer = { line_no => 0, next_line => "", song => {} };
247
248 while ($self->read_rog_record($file_buffer))
249 {
250 $self->process_rog_record($file,$metadata,$file_buffer->{'song'},$processor);
251 $doc_count++;
252
253 last if ($maxdocs !=-1 && ($total_count+$doc_count) >= $maxdocs);
254
255 if (($doc_count % 10) == 0)
256 {
257 print STDERR ".";
258 $dot_count++;
259 print STDERR "\n" if (($dot_count % 80) == 0);
260 }
261 }
262
263 close FILE;
264
265 print STDERR "\n";
266
267 $self->{'num_processed'} = $doc_count;
268
269 return 1; # processed the file
270}
271
2721;
273
274
275
276
277
278
279
280
281
282
283
Note: See TracBrowser for help on using the repository browser.