source: main/trunk/greenstone2/perllib/plugins/MetadataRead.pm@ 36372

Last change on this file since 36372 was 36372, checked in by kjdon, 21 months ago

tidy up of extrametautil, renaming some methods to make them easier to understand, removing anything unused. then modifying plugins to use new methods. Also, moved some common code to MetadataRead function, can call this from several plugins instead of duplicating code. This is an interim commit, where I have left in the old code to make it easier to track changes. Next commit will have everything tidied up.

File size: 5.1 KB
Line 
1###########################################################################
2#
3# MetadataRead - like a Java interface that defines that a subclass is
4# a Plugin that extracts Metadata
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2008 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package MetadataRead;
29
30use PrintInfo;
31use strict;
32
33# MetadataRead is an abstract superclass that does not inherit from anything else.
34# It exists solely to define the can_process_this_file_for_metadata() method in
35# such a way that those MetadataPlugins that inherit from MetadataRead don't need
36# to define this method and will always process the files associated with them for
37# metadata and other plugins in the pipeline won't be passed these files anymore.
38
39# MetadataRead defines method can_process_this_file_for_metadata() with identical
40# signature to BaseImporter. (MetadataRead doesn't inherit from BaseImporter, so it's
41# not 'overriding' it.) Subclasses of MetadataRead that want to use this method
42# definition can override their inherited BaseImporter version of the method by
43# listing MetadataRead as the *first* superclass they inherit from in the ISA list.
44# This is the way Perl resolves conflicting method definitions.
45
46my $arguments = [];
47
48my $options = { 'name' => "MetadataRead",
49 'desc' => "{MetadataRead.desc}",
50 'abstract' => "yes",
51 'inherits' => "yes",
52 'args' => $arguments };
53
54
55sub new {
56 my ($class) = shift (@_);
57 my ($pluginlist,$inputargs,$hashArgOptLists,$auxiliary) = @_;
58 push(@$pluginlist, $class);
59
60 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
61 push(@{$hashArgOptLists->{"OptList"}},$options);
62
63 # Like PrintInfo, MetadataRead has no superclass,
64 # so $self is intialised to an empty array.
65 my $self = {};
66 return bless $self, $class;
67
68}
69
70# MetadataPlugins that inherit from MetadataRead will by default
71# process all the metadata in files whose extensions match.
72# Override this method in a subclass to return undef if other
73# files should also be allowed to process the metadata therafter.
74sub can_process_this_file_for_metadata {
75 my $self = shift(@_);
76
77# print STDERR "********* MetadataRead::can_process_this_file_for_metadata() called.\n";
78
79 return $self->can_process_this_file(@_);
80}
81
82# filename_for_metadata is the name of the file to attach metadata to. $new_metadata is a hash of all the metadata. file is the metadata file, filename_full_path is full path to metadata file
83sub store_meta_in_extrametadata
84{
85 my $self = shift(@_);
86
87 my ($filename_for_metadata, $new_metadata, $file, $filename_full_path,
88 $extrametakeys, $extrametadata, $extrametafile) = @_;
89
90 # Extrametadata keys should be regular expressions
91 # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
92 # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
93 # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
94 $filename_for_metadata = &util::filepath_to_url_format($filename_for_metadata);
95 $filename_for_metadata = &util::filename_to_regex($filename_for_metadata);
96
97 # Check that we haven't already got some metadata
98 if (defined &extrametautil::getmetadata($extrametadata, $filename_for_metadata)) {
99 print STDERR "\n**** MetadataRead: Need to merge new metadata with existing stored metadata: file = $filename_for_metadata\n" if $self->{'verbosity'} > 3;
100
101 my $file_metadata_table = &extrametautil::getmetadata($extrametadata, $filename_for_metadata);
102
103 foreach my $metaname (keys %{$new_metadata}) {
104 # will create new entry if one does not already exist
105 push(@{$file_metadata_table->{$metaname}}, @{$new_metadata->{$metaname}});
106 }
107
108 } else {
109 &extrametautil::setmetadata($extrametadata, $filename_for_metadata, $new_metadata);
110 &extrametautil::addmetakey($extrametakeys, $filename_for_metadata);
111 }
112
113 #if ($srcdoc_exists) {
114# if (!defined &extrametautil::getmetafile($extrametafile, $filename_for_metadata)) {
115# &extrametautil::setmetafile($extrametafile, $filename_for_metadata, {});
116# }
117 if (defined $file && defined $filename_for_metadata) {
118 #maps the file to full path
119 &extrametautil::addmetafile($extrametafile, $filename_for_metadata, $file, $filename_full_path);
120 }
121
122
123# }
124}
125
1261;
Note: See TracBrowser for help on using the repository browser.