source: trunk/gsdl/perllib/plugins/RogPlug.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

  • Property svn:keywords set to Author Date Id Revision
File size: 7.5 KB
Line 
1###########################################################################
2#
3# RogPlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document from .rog or .mdb files
27
28package RogPlug;
29
30use BasPlug;
31use sorttools;
32use doc;
33
34sub BEGIN {
35 @ISA = ('BasPlug');
36}
37
38my $arguments =
39 [ { 'name' => "process_exp",
40 'desc' => "{BasPlug.process_exp}",
41 'type' => "regexp",
42 'reqd' => "no",
43 'deft' => &get_default_process_exp() },
44 ];
45
46my $options = { 'name' => "RogPlug",
47 'desc' => "{RogPlug.desc}",
48 'abstract' => "no",
49 'inherits' => "Yes",
50 'args' => $arguments };
51
52sub new {
53 my ($class) = shift (@_);
54 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
55 push(@$pluginlist, $class);
56
57 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
58 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
59
60 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
61
62 return bless $self, $class;
63}
64
65sub is_recursive {
66 my $self = shift (@_);
67
68 return 0; # this is not a recursive plugin
69}
70
71
72# This plugin processes files with the suffix ".mdb" or ".rog"
73sub get_default_process_exp {
74 return q^(?i)\.(mdb|rog)$^;
75}
76
77sub read_rog_record
78{
79 my ($self,$file_buffer, $seclevel) = @_;
80
81 my $next_line = $file_buffer->{'next_line'};
82
83 return 0 if (!defined $next_line);
84
85 if ($next_line eq "")
86 {
87 my $line;
88 while(defined($line=<FILE>))
89 {
90 $line =~ s/\r$//;
91 $file_buffer->{'line_no'}++;
92 next if ($line =~ m/^\#/);
93 $next_line = $line;
94 last;
95 }
96 }
97
98 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
99 {
100 print STDERR "Error: Malformed Rog file: $next_line";
101 return 0;
102 }
103 else
104 {
105 # init default values
106 $file_buffer->{'song'}->{'tempo'} = 120;
107 $file_buffer->{'song'}->{'ks_type'} = 0;
108 $file_buffer->{'song'}->{'ks_num'} = 0;
109 $file_buffer->{'song'}->{'metadata'} = [];
110 $file_buffer->{'song'}->{'content'} = "";
111
112 $file_buffer->{'song'}->{'subcol'} = $2;
113 $file_buffer->{'song'}->{'title'} = $4;
114 $file_buffer->{'song'}->{'tval'} = $6;
115
116 chomp($next_line);
117 my $content = $next_line;
118 if (defined $seclevel)
119 {
120 $content.= " $seclevel";
121 }
122 $content .= "\n";
123
124 $file_buffer->{'song'}->{'content'} = $content;
125
126
127 my $line;
128 while(defined($line=<FILE>))
129 {
130 $line =~ s/\r$//;
131
132 $file_buffer->{'line_no'}++;
133 next if ($line =~ m/^\#/);
134
135 if ($line =~ m/^song/)
136 {
137 $file_buffer->{'next_line'} = $line;
138 return 1;
139 }
140 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
141 {
142 $file_buffer->{'song'}->{'tempo'} = $2;
143 $file_buffer->{'song'}->{'content'} .= $line;
144 }
145 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
146 {
147 $file_buffer->{'song'}->{'ks_type'} = $2;
148 $file_buffer->{'song'}->{'ks_num'} = $4;
149 $file_buffer->{'song'}->{'content'} .= $line;
150 }
151 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
152 {
153 $file_buffer->{'song'}->{'ts_numer'} = $2;
154 $file_buffer->{'song'}->{'ts_denom'} = $4;
155 $file_buffer->{'song'}->{'content'} .= $line;
156 }
157 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
158 {
159 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
160 $file_buffer->{'song'}->{'content'} .= $line;
161 }
162 else
163 {
164 $file_buffer->{'song'}->{'content'} .= $line;
165 }
166 }
167
168 $file_buffer->{'next_line'} = undef;
169 }
170
171 return 1;
172}
173
174sub process_rog_record
175{
176 my ($self,$file,$metadata,$song,$processor) = @_;
177
178 # create a new document
179 my $doc_obj = new doc ($file, "indexed_doc");
180 my $cursection = $doc_obj->get_top_section();
181 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
182
183 my $title = $song->{'title'};
184 my $title_safe = $title;
185 $title_safe =~ s/\'/\\\\&apos;/g;
186
187 # add metadata
188 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
189 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
190 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
191 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
192 $doc_obj->add_metadata($cursection, "Title", $title);
193 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
194 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
195 $doc_obj->add_metadata($cursection, "FileFormat", "Rog");
196 $doc_obj->add_metadata($cursection, "FileSize", (-s $file));
197
198 foreach $md ( @{$song->{'metadata'}} )
199 {
200 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
201 }
202
203 # add contents as text
204 $doc_obj->add_text($cursection,$song->{'content'});
205
206 $self->extra_metadata($doc_obj,$cursection, $metadata);
207
208 # add OID
209 $doc_obj->set_OID ();
210
211 my $oid = $doc_obj->get_OID();
212 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
213
214 $doc_obj->add_utf8_metadata ($cursection, "audiolink", $appletlink);
215 $doc_obj->add_utf8_metadata ($cursection, "audioicon", "_iconaudio_");
216 $doc_obj->add_utf8_metadata ($cursection, "/audiolink", "</a>");
217
218 # process the document
219 $processor->process($doc_obj);
220}
221
222# return number of files processed, undef if can't process
223# Note that $base_dir might be "" and that $file might
224# include directories
225sub read {
226 my $self = shift (@_);
227 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
228
229 my $filename = &util::filename_cat($base_dir, $file);
230
231 return undef unless ($filename =~ /\.((rog|mdb)(\.gz)?)$/i && (-e $filename));
232
233 my $gz = (defined $3) ? 1: 0;
234
235 print STDERR "<Processing n='$file' p='RogPlug'>\n" if ($gli);
236 print STDERR "RogPlug: processing $filename\n" if $processor->{'verbosity'};
237
238 if ($gz) {
239 open (FILE, "zcat $filename |")
240 || die "RogPlug::read - zcat can't open $filename\n";
241 } else {
242 open (FILE, $filename)
243 || die "RogPlug::read - can't open $filename\n";
244 }
245
246 my $doc_count = 0;
247 my $dot_count = 0;
248 my $file_buffer = { line_no => 0, next_line => "", song => {} };
249
250 while ($self->read_rog_record($file_buffer))
251 {
252 $self->process_rog_record($file,$metadata,$file_buffer->{'song'},$processor);
253 $doc_count++;
254
255 last if ($maxdocs !=-1 && ($total_count+$doc_count) >= $maxdocs);
256
257 if (($doc_count % 10) == 0)
258 {
259 print STDERR ".";
260 $dot_count++;
261 print STDERR "\n" if (($dot_count % 80) == 0);
262 }
263 }
264
265 close FILE;
266
267 print STDERR "\n";
268
269 $self->{'num_processed'} = $doc_count;
270
271 return 1; # processed the file
272}
273
2741;
275
276
277
278
279
280
281
282
283
284
285
Note: See TracBrowser for help on using the repository browser.