1 | ###########################################################################
|
---|
2 | #
|
---|
3 | # MetadataRead - like a Java interface that defines that a subclass is
|
---|
4 | # a Plugin that extracts Metadata
|
---|
5 | #
|
---|
6 | # A component of the Greenstone digital library software
|
---|
7 | # from the New Zealand Digital Library Project at the
|
---|
8 | # University of Waikato, New Zealand.
|
---|
9 | #
|
---|
10 | # Copyright (C) 2008 New Zealand Digital Library Project
|
---|
11 | #
|
---|
12 | # This program is free software; you can redistribute it and/or modify
|
---|
13 | # it under the terms of the GNU General Public License as published by
|
---|
14 | # the Free Software Foundation; either version 2 of the License, or
|
---|
15 | # (at your option) any later version.
|
---|
16 | #
|
---|
17 | # This program is distributed in the hope that it will be useful,
|
---|
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | # GNU General Public License for more details.
|
---|
21 | #
|
---|
22 | # You should have received a copy of the GNU General Public License
|
---|
23 | # along with this program; if not, write to the Free Software
|
---|
24 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | #
|
---|
26 | ###########################################################################
|
---|
27 |
|
---|
28 | package MetadataRead;
|
---|
29 |
|
---|
30 | use PrintInfo;
|
---|
31 | use strict;
|
---|
32 |
|
---|
33 | # MetadataRead is an abstract superclass that does not inherit from anything else.
|
---|
34 | # It exists solely to define the can_process_this_file_for_metadata() method in
|
---|
35 | # such a way that those MetadataPlugins that inherit from MetadataRead don't need
|
---|
36 | # to define this method and will always process the files associated with them for
|
---|
37 | # metadata and other plugins in the pipeline won't be passed these files anymore.
|
---|
38 |
|
---|
39 | # MetadataRead defines method can_process_this_file_for_metadata() with identical
|
---|
40 | # signature to BaseImporter. (MetadataRead doesn't inherit from BaseImporter, so it's
|
---|
41 | # not 'overriding' it.) Subclasses of MetadataRead that want to use this method
|
---|
42 | # definition can override their inherited BaseImporter version of the method by
|
---|
43 | # listing MetadataRead as the *first* superclass they inherit from in the ISA list.
|
---|
44 | # This is the way Perl resolves conflicting method definitions.
|
---|
45 |
|
---|
46 | my $arguments = [];
|
---|
47 |
|
---|
48 | my $options = { 'name' => "MetadataRead",
|
---|
49 | 'desc' => "{MetadataRead.desc}",
|
---|
50 | 'abstract' => "yes",
|
---|
51 | 'inherits' => "yes",
|
---|
52 | 'args' => $arguments };
|
---|
53 |
|
---|
54 |
|
---|
55 | sub new {
|
---|
56 | my ($class) = shift (@_);
|
---|
57 | my ($pluginlist,$inputargs,$hashArgOptLists,$auxiliary) = @_;
|
---|
58 | push(@$pluginlist, $class);
|
---|
59 |
|
---|
60 | push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
|
---|
61 | push(@{$hashArgOptLists->{"OptList"}},$options);
|
---|
62 |
|
---|
63 | # Like PrintInfo, MetadataRead has no superclass,
|
---|
64 | # so $self is intialised to an empty array.
|
---|
65 | my $self = {};
|
---|
66 | return bless $self, $class;
|
---|
67 |
|
---|
68 | }
|
---|
69 |
|
---|
70 | # MetadataPlugins that inherit from MetadataRead will by default
|
---|
71 | # process all the metadata in files whose extensions match.
|
---|
72 | # Override this method in a subclass to return undef if other
|
---|
73 | # files should also be allowed to process the metadata therafter.
|
---|
74 | sub can_process_this_file_for_metadata {
|
---|
75 | my $self = shift(@_);
|
---|
76 |
|
---|
77 | # print STDERR "********* MetadataRead::can_process_this_file_for_metadata() called.\n";
|
---|
78 |
|
---|
79 | return $self->can_process_this_file(@_);
|
---|
80 | }
|
---|
81 |
|
---|
82 | # filename_for_metadata is the name of the file to attach metadata to. $new_metadata is a hash of all the metadata. file is the metadata file, filename_full_path is full path to metadata file
|
---|
83 | sub store_meta_in_extrametadata
|
---|
84 | {
|
---|
85 | my $self = shift(@_);
|
---|
86 |
|
---|
87 | my ($filename_for_metadata, $new_metadata, $file, $filename_full_path,
|
---|
88 | $extrametakeys, $extrametadata, $extrametafile) = @_;
|
---|
89 |
|
---|
90 | # Extrametadata keys should be regular expressions
|
---|
91 | # Indexing into the extrameta data structures requires the filename's style of slashes to be in URL format
|
---|
92 | # Then need to convert the filename to a regex, no longer to protect windows directory chars \, but for
|
---|
93 | # protecting special characters like brackets in the filepath such as "C:\Program Files (x86)\Greenstone".
|
---|
94 | $filename_for_metadata = &util::filepath_to_url_format($filename_for_metadata);
|
---|
95 | $filename_for_metadata = &util::filename_to_regex($filename_for_metadata);
|
---|
96 |
|
---|
97 | # Check that we haven't already got some metadata
|
---|
98 | if (defined &extrametautil::getmetadata($extrametadata, $filename_for_metadata)) {
|
---|
99 | print STDERR "\n**** MetadataRead: Need to merge new metadata with existing stored metadata: file = $filename_for_metadata\n" if $self->{'verbosity'} > 3;
|
---|
100 |
|
---|
101 | my $file_metadata_table = &extrametautil::getmetadata($extrametadata, $filename_for_metadata);
|
---|
102 |
|
---|
103 | foreach my $metaname (keys %{$new_metadata}) {
|
---|
104 | # will create new entry if one does not already exist
|
---|
105 | push(@{$file_metadata_table->{$metaname}}, @{$new_metadata->{$metaname}});
|
---|
106 | }
|
---|
107 |
|
---|
108 | } else {
|
---|
109 | &extrametautil::setmetadata($extrametadata, $filename_for_metadata, $new_metadata);
|
---|
110 | &extrametautil::addmetakey($extrametakeys, $filename_for_metadata);
|
---|
111 | }
|
---|
112 |
|
---|
113 | #if ($srcdoc_exists) {
|
---|
114 | # if (!defined &extrametautil::getmetafile($extrametafile, $filename_for_metadata)) {
|
---|
115 | # &extrametautil::setmetafile($extrametafile, $filename_for_metadata, {});
|
---|
116 | # }
|
---|
117 | if (defined $file && defined $filename_for_metadata) {
|
---|
118 | #maps the file to full path
|
---|
119 | &extrametautil::addmetafile($extrametafile, $filename_for_metadata, $file, $filename_full_path);
|
---|
120 | }
|
---|
121 |
|
---|
122 |
|
---|
123 | # }
|
---|
124 | }
|
---|
125 |
|
---|
126 | 1;
|
---|