source: gsdl/trunk/perllib/plugins/RogPlugin.pm@ 17026

Last change on this file since 17026 was 17026, checked in by kjdon, 16 years ago

OID generation modifications: OIDtype and OIDmetadata options now available for plugins as well as import. OIDtype for plugins defaults to auto - if set to auto, then use the values from import. All plugins now call self->add_OID instead of doc_obj->set_OID. This sets the doc_obj OIDtype so that doesn't need to be donein other places any more. all plugins have the get_oid_hash_type method - normally returns hash_on_file, but can be overridden to return hash_on_ga_xml for those plugins that don't want hashing on file (MP3,OggVorbis...)

  • Property svn:keywords set to Author Date Id Revision
File size: 7.4 KB
RevLine 
[3737]1###########################################################################
2#
[15872]3# RogPlugin.pm -- simple text plugin
[3737]4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document from .rog or .mdb files
27
[15872]28package RogPlugin;
[3737]29
[15872]30use BasePlugin;
[3737]31use sorttools;
32use doc;
33
[10254]34use strict;
35no strict 'refs'; # allow filehandles to be variables and viceversa
36
[3737]37sub BEGIN {
[15872]38 @RogPlugin::ISA = ('BasePlugin');
[3737]39}
40
[6408]41my $arguments =
42 [ { 'name' => "process_exp",
[15872]43 'desc' => "{BasePlugin.process_exp}",
[6408]44 'type' => "regexp",
45 'reqd' => "no",
46 'deft' => &get_default_process_exp() },
47 ];
48
[15872]49my $options = { 'name' => "RogPlugin",
50 'desc' => "{RogPlugin.desc}",
[6408]51 'abstract' => "no",
[11676]52 'inherits' => "yes",
[6408]53 'args' => $arguments };
[4744]54
[3737]55sub new {
[10218]56 my ($class) = shift (@_);
57 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
58 push(@$pluginlist, $class);
[4744]59
[15872]60 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
61 push(@{$hashArgOptLists->{"OptList"}},$options);
[10218]62
[15872]63 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
[10218]64
[3737]65 return bless $self, $class;
66}
67
68
69# This plugin processes files with the suffix ".mdb" or ".rog"
70sub get_default_process_exp {
71 return q^(?i)\.(mdb|rog)$^;
72}
73
74sub read_rog_record
75{
76 my ($self,$file_buffer, $seclevel) = @_;
77
78 my $next_line = $file_buffer->{'next_line'};
79
80 return 0 if (!defined $next_line);
81
82 if ($next_line eq "")
83 {
84 my $line;
85 while(defined($line=<FILE>))
86 {
87 $line =~ s/\r$//;
88 $file_buffer->{'line_no'}++;
89 next if ($line =~ m/^\#/);
90 $next_line = $line;
91 last;
92 }
93 }
94
95 if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
96 {
97 print STDERR "Error: Malformed Rog file: $next_line";
98 return 0;
99 }
100 else
101 {
102 # init default values
103 $file_buffer->{'song'}->{'tempo'} = 120;
104 $file_buffer->{'song'}->{'ks_type'} = 0;
105 $file_buffer->{'song'}->{'ks_num'} = 0;
106 $file_buffer->{'song'}->{'metadata'} = [];
107 $file_buffer->{'song'}->{'content'} = "";
108
109 $file_buffer->{'song'}->{'subcol'} = $2;
110 $file_buffer->{'song'}->{'title'} = $4;
111 $file_buffer->{'song'}->{'tval'} = $6;
112
113 chomp($next_line);
114 my $content = $next_line;
115 if (defined $seclevel)
116 {
117 $content.= " $seclevel";
118 }
119 $content .= "\n";
120
121 $file_buffer->{'song'}->{'content'} = $content;
122
123
124 my $line;
125 while(defined($line=<FILE>))
126 {
127 $line =~ s/\r$//;
128
129 $file_buffer->{'line_no'}++;
130 next if ($line =~ m/^\#/);
131
132 if ($line =~ m/^song/)
133 {
134 $file_buffer->{'next_line'} = $line;
135 return 1;
136 }
137 elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
138 {
139 $file_buffer->{'song'}->{'tempo'} = $2;
140 $file_buffer->{'song'}->{'content'} .= $line;
141 }
142 elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
143 {
144 $file_buffer->{'song'}->{'ks_type'} = $2;
145 $file_buffer->{'song'}->{'ks_num'} = $4;
146 $file_buffer->{'song'}->{'content'} .= $line;
147 }
148 elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
149 {
150 $file_buffer->{'song'}->{'ts_numer'} = $2;
151 $file_buffer->{'song'}->{'ts_denom'} = $4;
152 $file_buffer->{'song'}->{'content'} .= $line;
153 }
154 elsif ($line =~ m/^metadata ([^:]*): (.*)/)
155 {
156 push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
157 $file_buffer->{'song'}->{'content'} .= $line;
158 }
159 else
160 {
161 $file_buffer->{'song'}->{'content'} .= $line;
162 }
163 }
164
165 $file_buffer->{'next_line'} = undef;
166 }
167
168 return 1;
169}
170
171sub process_rog_record
172{
173 my ($self,$file,$metadata,$song,$processor) = @_;
174
175 # create a new document
176 my $doc_obj = new doc ($file, "indexed_doc");
177 my $cursection = $doc_obj->get_top_section();
[7508]178 $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
[3737]179
180 my $title = $song->{'title'};
181 my $title_safe = $title;
182 $title_safe =~ s/\'/\\\\&apos;/g;
183
184 # add metadata
185 $doc_obj->add_metadata($cursection, "Tempo", $song->{'tempo'});
186 $doc_obj->add_metadata($cursection, "KeySigType", $song->{'ks_type'});
187 $doc_obj->add_metadata($cursection, "KeySigNum", $song->{'ks_num'});
188 $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
189 $doc_obj->add_metadata($cursection, "Title", $title);
190 $doc_obj->add_metadata($cursection, "TitleSafe", $title_safe);
191 $doc_obj->add_metadata($cursection, "TVal", $song->{'tval'});
[8166]192 $doc_obj->add_metadata($cursection, "FileFormat", "Rog");
193 $doc_obj->add_metadata($cursection, "FileSize", (-s $file));
[3737]194
[10254]195 foreach my $md ( @{$song->{'metadata'}} )
[3737]196 {
197 $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
198 }
199
200 # add contents as text
201 $doc_obj->add_text($cursection,$song->{'content'});
202
203 $self->extra_metadata($doc_obj,$cursection, $metadata);
204
205 # add OID
[17026]206 $self->add_OID($doc_obj);
[3737]207
208 my $oid = $doc_obj->get_OID();
209 my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
210
211 $doc_obj->add_utf8_metadata ($cursection, "audiolink", $appletlink);
212 $doc_obj->add_utf8_metadata ($cursection, "audioicon", "_iconaudio_");
213 $doc_obj->add_utf8_metadata ($cursection, "/audiolink", "</a>");
214
215 # process the document
216 $processor->process($doc_obj);
217}
218
219# return number of files processed, undef if can't process
220# Note that $base_dir might be "" and that $file might
221# include directories
222sub read {
223 my $self = shift (@_);
[16392]224 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
[3737]225
226 my $filename = &util::filename_cat($base_dir, $file);
227
228 return undef unless ($filename =~ /\.((rog|mdb)(\.gz)?)$/i && (-e $filename));
229
230 my $gz = (defined $3) ? 1: 0;
231
[15872]232 print STDERR "<Processing n='$file' p='RogPlugin'>\n" if ($gli);
233 print STDERR "RogPlugin: processing $filename\n" if $processor->{'verbosity'};
[3737]234
235 if ($gz) {
236 open (FILE, "zcat $filename |")
[15872]237 || die "RogPlugin::read - zcat can't open $filename\n";
[3737]238 } else {
239 open (FILE, $filename)
[15872]240 || die "RogPlugin::read - can't open $filename\n";
[3737]241 }
242
243 my $doc_count = 0;
244 my $dot_count = 0;
245 my $file_buffer = { line_no => 0, next_line => "", song => {} };
246
247 while ($self->read_rog_record($file_buffer))
248 {
249 $self->process_rog_record($file,$metadata,$file_buffer->{'song'},$processor);
250 $doc_count++;
251
[9853]252 last if ($maxdocs !=-1 && ($total_count+$doc_count) >= $maxdocs);
253
[3737]254 if (($doc_count % 10) == 0)
255 {
256 print STDERR ".";
257 $dot_count++;
258 print STDERR "\n" if (($dot_count % 80) == 0);
259 }
260 }
261
262 close FILE;
263
264 print STDERR "\n";
265
266 $self->{'num_processed'} = $doc_count;
267
268 return 1; # processed the file
269}
270
2711;
272
273
274
275
276
277
278
279
280
281
282
Note: See TracBrowser for help on using the repository browser.