root/gsdl/trunk/perllib/plugins/RogPlugin.pm @ 18327

Revision 18327, 7.5 KB (checked in by ak19, 11 years ago)

Extra parameter to new doc(): the renaming method to be used on the file (base64 or URL encoding).

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# RogPlugin.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document from .rog or .mdb files
27
28package RogPlugin;
29
30use BasePlugin;
31use sorttools;
32use doc;
33
34use strict;
35no strict 'refs'; # allow filehandles to be variables and viceversa
36
37sub BEGIN {
38    @RogPlugin::ISA = ('BasePlugin');
39}
40
41my $arguments =
42    [ { 'name' => "process_exp",
43    'desc' => "{BasePlugin.process_exp}",
44    'type' => "regexp",
45    'reqd' => "no",
46    'deft' => &get_default_process_exp() },
47      ];
48
49my $options = { 'name'     => "RogPlugin",
50        'desc'     => "{RogPlugin.desc}",
51        'abstract' => "no",
52        'inherits' => "yes",
53        'args' => $arguments };
54
55sub new {
56    my ($class) = shift (@_);
57    my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
58    push(@$pluginlist, $class);
59
60    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
61    push(@{$hashArgOptLists->{"OptList"}},$options);
62
63    my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
64
65    return bless $self, $class;
66}
67
68
69# This plugin processes files with the suffix ".mdb" or ".rog"
70sub get_default_process_exp {
71    return q^(?i)\.(mdb|rog)$^;
72}
73
74sub read_rog_record
75{
76    my ($self,$file_buffer, $seclevel) = @_;
77
78    my $next_line = $file_buffer->{'next_line'};
79
80    return 0 if (!defined $next_line);
81
82    if ($next_line eq "")
83    {
84    my $line;
85    while(defined($line=<FILE>)) 
86    {
87        $line =~ s/\r$//;
88        $file_buffer->{'line_no'}++;
89        next if ($line =~ m/^\#/);
90        $next_line = $line;
91        last;
92    }
93    }
94       
95    if ($next_line !~ m/^song( +)\"([^\"]*)\"( +)\"([^\"]*)\"( +)(\d+)( *)$/)
96    {
97    print STDERR "Error: Malformed Rog file: $next_line";
98    return 0;
99    }
100    else
101    {
102    # init default values
103    $file_buffer->{'song'}->{'tempo'}    = 120;
104    $file_buffer->{'song'}->{'ks_type'}  = 0;
105    $file_buffer->{'song'}->{'ks_num'}   = 0;
106    $file_buffer->{'song'}->{'metadata'} = [];
107    $file_buffer->{'song'}->{'content'}  = "";
108   
109    $file_buffer->{'song'}->{'subcol'} = $2;
110    $file_buffer->{'song'}->{'title'}  = $4;
111    $file_buffer->{'song'}->{'tval'}   = $6;
112
113    chomp($next_line);
114    my $content = $next_line;
115    if (defined $seclevel)
116    {
117        $content.= " $seclevel";
118    }
119    $content .= "\n";
120
121    $file_buffer->{'song'}->{'content'} = $content;
122
123
124    my $line;
125    while(defined($line=<FILE>)) 
126    {
127        $line =~ s/\r$//;
128
129        $file_buffer->{'line_no'}++;
130        next if ($line =~ m/^\#/);
131   
132        if ($line =~ m/^song/)
133        {   
134        $file_buffer->{'next_line'} = $line;
135        return 1;
136        }
137        elsif ($line =~ m/^tempo( +)(\d+)( *)$/)
138        {
139        $file_buffer->{'song'}->{'tempo'} = $2;
140        $file_buffer->{'song'}->{'content'} .= $line;
141        }
142        elsif ($line =~ m/^keysig( +)(\d+)( +)(\d+)( *)$/)
143        {
144        $file_buffer->{'song'}->{'ks_type'} = $2;
145        $file_buffer->{'song'}->{'ks_num'}  = $4;
146        $file_buffer->{'song'}->{'content'} .= $line;     
147        }
148        elsif ($line =~ m/^timesig( +)(\d+)( +)(\d+)( *)$/)
149        {
150        $file_buffer->{'song'}->{'ts_numer'} = $2;
151        $file_buffer->{'song'}->{'ts_denom'} = $4;
152        $file_buffer->{'song'}->{'content'} .= $line;
153        }
154        elsif ($line =~ m/^metadata ([^:]*): (.*)/)
155        {
156        push(@{$file_buffer->{'song'}->{'metadata'}},[$1,$2]);
157        $file_buffer->{'song'}->{'content'} .= $line;
158        }
159        else
160        {
161        $file_buffer->{'song'}->{'content'} .= $line;
162        }
163    }
164   
165    $file_buffer->{'next_line'} = undef;
166    }
167
168    return 1;
169}
170
171sub process_rog_record
172{
173    my ($self,$file,$metadata,$song,$processor) = @_;
174
175    # create a new document
176    my $doc_obj = new doc ($file, "indexed_doc", $self->{'file_rename_method'});
177    my $cursection = $doc_obj->get_top_section();
178    $doc_obj->add_utf8_metadata($doc_obj->get_top_section(), "Plugin", "$self->{'plugin_type'}");
179
180    my $title = $song->{'title'};
181    my $title_safe = $title;
182    $title_safe =~ s/\'/\\\\&apos;/g;
183
184    # add metadata
185    $doc_obj->add_metadata($cursection, "Tempo",         $song->{'tempo'});
186    $doc_obj->add_metadata($cursection, "KeySigType",    $song->{'ks_type'});
187    $doc_obj->add_metadata($cursection, "KeySigNum",     $song->{'ks_num'});
188    $doc_obj->add_metadata($cursection, "SubCollection", $song->{'subcol'});
189    $doc_obj->add_metadata($cursection, "Title",         $title);
190    $doc_obj->add_metadata($cursection, "TitleSafe",     $title_safe);
191    $doc_obj->add_metadata($cursection, "TVal",          $song->{'tval'});
192    $doc_obj->add_metadata($cursection, "FileFormat",    "Rog");
193    $doc_obj->add_metadata($cursection, "FileSize",      (-s $file));
194
195    foreach my $md ( @{$song->{'metadata'}} )
196    {
197    $doc_obj->add_metadata($cursection, $md->[0], $md->[1]);
198    }
199
200    # add contents as text
201    $doc_obj->add_text($cursection,$song->{'content'});
202
203    $self->extra_metadata($doc_obj,$cursection, $metadata);
204   
205    # add OID
206    $self->add_OID($doc_obj);
207
208    my $oid = $doc_obj->get_OID();
209    my $appletlink = "<a href=\"javascript:meldexout(\'$oid\','[TitleSafe]')\">";
210
211    $doc_obj->add_utf8_metadata ($cursection, "audiolink",  $appletlink);
212    $doc_obj->add_utf8_metadata ($cursection, "audioicon",  "_iconaudio_");
213    $doc_obj->add_utf8_metadata ($cursection, "/audiolink", "</a>");
214
215    # process the document
216    $processor->process($doc_obj);
217}
218
219# return number of files processed, undef if can't process
220# Note that $base_dir might be "" and that $file might
221# include directories
222sub read {
223    my $self = shift (@_);
224    my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
225
226    my $filename = &util::filename_cat($base_dir, $file);
227
228    return undef unless ($filename =~ /\.((rog|mdb)(\.gz)?)$/i && (-e $filename));
229
230    my $gz = (defined $3) ? 1: 0;
231
232    print STDERR "<Processing n='$file' p='RogPlugin'>\n" if ($gli);
233    print STDERR "RogPlugin: processing $filename\n" if $processor->{'verbosity'};
234   
235    if ($gz) {
236    open (FILE, "zcat $filename |")
237        || die "RogPlugin::read - zcat can't open $filename\n";
238    } else {
239    open (FILE, $filename)
240        || die "RogPlugin::read - can't open $filename\n";
241    }
242
243    my $doc_count = 0;
244    my $dot_count = 0;
245    my $file_buffer = { line_no => 0, next_line => "", song => {} };
246
247    while ($self->read_rog_record($file_buffer))
248    {
249    $self->process_rog_record($file,$metadata,$file_buffer->{'song'},$processor);
250    $doc_count++;
251
252    last if ($maxdocs !=-1 && ($total_count+$doc_count) >= $maxdocs);
253   
254    if (($doc_count % 10) == 0)
255    {
256        print STDERR ".";
257        $dot_count++;
258        print STDERR "\n" if (($dot_count % 80) == 0);
259    }
260    }
261
262    close FILE;
263
264    print STDERR "\n";
265
266    $self->{'num_processed'} = $doc_count;
267
268    return 1; # processed the file
269}
270
2711;
272
273
274
275
276
277
278
279
280
281
282
Note: See TracBrowser for help on using the browser.