source: main/trunk/model-sites-dev/mozarts-laptop/collect/digital-music-stand/perllib/plugins/SimpleRDFMetadataPlugin.pm@ 30446

Last change on this file since 30446 was 30446, checked in by davidb, 8 years ago

... and the example collection (initial version)

File size: 6.1 KB
Line 
1###########################################################################
2#
3# SimpleRDFMetadataPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# SimpleRDFMetadataPlugin process metadata contained in RDF description tags
27
28
29package SimpleRDFMetadataPlugin;
30
31use strict;
32no strict 'refs';
33use BasePlugin;
34use util;
35use metadatautil;
36
37use MetadataRead;
38
39sub BEGIN {
40 @SimpleRDFMetadataPlugin::ISA = ('MetadataRead', 'BasePlugin');
41 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
42}
43
44use XMLParser;
45use XML::Rules;
46
47my $arguments = [
48 { 'name' => "process_exp",
49 'desc' => "{BasePlugin.process_exp}",
50 'type' => "regexp",
51 'reqd' => "no",
52 'deft' => &get_default_process_exp() }
53
54];
55
56my $options = { 'name' => "SimpleRDFMetadataPlugin",
57 'desc' => "{SimpleRDFMetadataPlugin.desc}",
58 'abstract' => "no",
59 'inherits' => "yes",
60 'args' => $arguments };
61
62my ($self);
63
64my @rules =
65 ( 'rdf:Description' => \&rdf_description,
66 '_default' => \&rdf_default );
67
68
69
70sub new {
71 my ($class) = shift (@_);
72 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
73 push(@$pluginlist, $class);
74
75 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
76 push(@{$hashArgOptLists->{"OptList"}},$options);
77
78 $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
79
80 if ($self->{'info_only'}) {
81 # don't worry about any options or initialisations etc
82 return bless $self, $class;
83 }
84
85 # create XML::Rules Parser object for parsing metadata.xml files
86 my $parser = XML::Rules->new(rules => \@rules);
87
88
89 $self->{'parser'} = $parser;
90 $self->{'in_filename'} = 0;
91
92
93 return bless $self, $class;
94}
95
96
97sub get_default_process_exp
98{
99 return q^.*\.rdf$^;
100}
101
102
103sub metadata_read
104{
105 my $self = shift (@_);
106 my ($pluginfo, $base_dir, $file, $block_hash, $extrametakeys, $extrametadata,
107$processor, $maxdocs, $gli) = @_;
108
109 my $filename = &util::filename_cat($base_dir, $file);
110 my $process_exp = $self->{'process_exp'};
111
112 if ($filename !~ /$process_exp/ || !-f $filename) {
113 return undef;
114 }
115
116 my $outhandle = $self->{'outhandle'};
117
118 print STDERR "\n<Processing n='$file' p='SimpleRDFMetadataPlugin'>\n" if ($gli);
119 print $outhandle "SimpleRDFMetadataPlugin: processing $file\n" if ($self->{'verbosity'})> 1;
120
121 # add the file to the block list so that it won't be processed in
122 # read, as we will do all we can with it here
123 &util::block_filename($block_hash,$filename);
124 $self->{'metadataref'} = $extrametadata;
125 $self->{'metakeysref'} = $extrametakeys;
126
127 eval {
128 $self->{'saved_metadata'} = {};
129 $self->{'saved_metadata_count'} = 0;
130
131 $self->{'parser'}->parsefile($filename);
132 };
133
134 if ($@) {
135 my $plugin_name = ref ($self);
136 print $outhandle "$plugin_name failed to process $file ($@)\n";
137
138 return -1; #error
139 }
140
141 return 1;
142
143}
144
145
146sub file_block_read {
147 my $self = shift (@_);
148 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
149
150 my $filename_full_path = &util::filename_cat($base_dir, $file);
151 return undef unless $self->can_process_this_file($filename_full_path);
152
153 if ($ENV{'GSDLOS'} =~ m/^windows$/) {
154
155 my $lower_drive = $filename_full_path;
156 $lower_drive =~ s/^([A-Z]):/\l$1:/i;
157
158 my $upper_drive = $filename_full_path;
159 $upper_drive =~ s/^([A-Z]):/\u$1:/i;
160
161 $block_hash->{'metadata_files'}->{$lower_drive} = 1;
162 $block_hash->{'metadata_files'}->{$upper_drive} = 1;
163 }
164 else {
165 $block_hash->{'metadata_files'}->{$filename_full_path} = 1;
166 }
167
168 return 1;
169}
170
171
172sub rdf_description
173{
174
175 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
176
177 my $target = $attrHash->{'rdf:about'};
178 if ($target eq ".") {
179 $target = ".*";
180 }
181
182 my $file_metadata = $self->{'metadataref'}->{$target};
183 my $saved_metadata = $self->{'saved_metadata'};
184 if (!defined $file_metadata) {
185 $self->{'metadataref'}->{$target} = $saved_metadata;
186
187 # not had target before
188 push (@{$self->{'metakeysref'}}, $target);
189 }
190 else {
191 &metadatautil::combine_metadata_structures($file_metadata,$saved_metadata);
192 }
193
194 my $outhandle = $self->{'outhandle'};
195 print $outhandle "SimpleRDFMetadataPlugin associated ";
196 print $outhandle $self->{'saved_metadata_count'};
197 print $outhandle " piece(s) of metadata with $target\n";
198
199 $self->{'saved_metadata'} = {};
200 $self->{'saved_metadata_count'} = 0;
201
202}
203
204sub rdf_default
205{
206
207 my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
208
209 my $num_enclosing_nodes = scalar(@$contextArray);
210 if ($num_enclosing_nodes>0) {
211
212 my $immediate_parent = $contextArray->[$num_enclosing_nodes-1];
213 return if ($immediate_parent ne "rdf:Description");
214
215 my ($md_set,$md_name) = ($tagname =~ m/^(\w+):(\w+)/);
216 my $md_content = $attrHash->{'_content'};
217
218 if (defined $md_content) {
219
220 my $full_md_name = "$md_set.$md_name";
221 &metadatautil::store_saved_metadata($self,$full_md_name,
222 $md_content, 0);
223
224 if ($full_md_name eq "mp.title") {
225 my $sort_by_title = $md_content;
226 $sort_by_title =~ s/^\s*(The|Die|Das|Der|Le|La|Les)\s+//i;
227 my ($initial_letter) = ($sort_by_title =~ m/([A-Z])/i);
228
229 &metadatautil::store_saved_metadata($self,"mp.titleInitialLetter",
230 $initial_letter, 0);
231 }
232
233 $self->{'saved_metadata_count'}++;
234 }
235 }
236
237}
238
239
240
2411;
Note: See TracBrowser for help on using the repository browser.