source: gsdl/trunk/perllib/plugins/MetadataCSVPlugin.pm@ 15918

Last change on this file since 15918 was 15872, checked in by kjdon, 16 years ago

plugin overhaul: plugins renamed to xxPlugin, and in some cases the names are made more sensible. They now use the new base plugins. Hopefully we have better code reuse. Some of the plugins still need work done as I didn't want to spend another month doing this before committing it. Alos, I haven't really tested anything yet...

  • Property svn:keywords set to Author Date Id Revision
File size: 5.4 KB
Line 
1###########################################################################
2#
3# MetadataCSVPlugin.pm -- A plugin for metadata in comma-separated value format
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright 2006 New Zealand Digital Library Project
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27package MetadataCSVPlugin;
28
29
30use BasePlugin;
31use strict;
32
33
34sub BEGIN {
35 @MetadataCSVPlugin::ISA = ('BasePlugin');
36}
37
38
39my $arguments =
40 [ { 'name' => "block_exp",
41 'desc' => "{BasePlugin.block_exp}",
42 'type' => "regexp",
43 'reqd' => "no",
44 'deft' => &get_default_block_exp() } ];
45
46
47my $options = { 'name' => "MetadataCSVPlugin",
48 'desc' => "{MetadataCSVPlugin.desc}",
49 'abstract' => "no",
50 'inherits' => "yes",
51 'args' => $arguments };
52
53
54sub new
55{
56 my ($class) = shift (@_);
57 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
58 push(@$pluginlist, $class);
59
60 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
61 push(@{$hashArgOptLists->{"OptList"}},$options);
62
63 my $self = new BasePlugin($pluginlist, $inputargs, $hashArgOptLists);
64
65 return bless $self, $class;
66}
67
68
69# Not used, just here to prevent a warning
70sub get_default_process_exp
71{
72 return q^(?i)\.csv$^;
73}
74
75
76# Used by BasePlugin read to block this file
77sub get_default_block_exp
78{
79 return q^(?i)\.csv$^;
80}
81
82
83sub metadata_read
84{
85 my $self = shift (@_);
86 my ($pluginfo, $base_dir, $file, $metadata, $extrametakeys, $extrametadata, $processor, $maxdocs, $gli) = @_;
87
88 # Read metadata from CSV files
89 my $filename = &util::filename_cat($base_dir, $file);
90 if ($filename !~ /\.csv$/ || !-f $filename) {
91 return undef;
92 }
93 print STDERR "\n<Processing n='$file' p='MetadataCSVPlugin'>\n" if ($gli);
94 print STDERR "MetadataCSVPlugin: processing $file\n" if ($self->{'verbosity'}) > 1;
95
96 # Read the CSV file to get the metadata
97 my $csv_file_content;
98 open(CSV_FILE, "$filename");
99 my $csv_file_reader = new multiread();
100 $csv_file_reader->set_handle('MetadataCSVPlugin::CSV_FILE');
101 $csv_file_reader->read_file(\$csv_file_content);
102 close(CSV_FILE);
103
104 # Split the file into lines and read the first line (contains the metadata names)
105 $csv_file_content =~ s/\r/\n/g; # Handle non-Unix line endings
106 $csv_file_content =~ s/\n+/\n/g;
107 my @csv_file_lines = split(/\n/, $csv_file_content);
108 my $csv_file_field_line = shift(@csv_file_lines);
109 my @csv_file_fields = split(/\,/, $csv_file_field_line);
110 my $found_filename_field = 0;
111 for (my $i = 0; $i < scalar(@csv_file_fields); $i++) {
112 # Remove any spaces from the field names
113 $csv_file_fields[$i] =~ s/ //g;
114 if ($csv_file_fields[$i] eq "Filename") {
115 $found_filename_field = 1;
116 }
117 }
118
119 if (!$found_filename_field) {
120 print STDERR "MetadataCSVPlugin Error: No Filename field in CSV file: $filename\n";
121 return -1; # error
122 }
123 # Read each line of the file and assign the metadata appropriately
124 foreach my $csv_line (@csv_file_lines) {
125 # Ignore lines containing only whitespace
126 next if ($csv_line =~ /^\s*$/);
127 my $orig_csv_line = $csv_line;
128 # Build a hash of metadata name to metadata value for this line
129 my %csv_line_metadata;
130 my $i = 0;
131 $csv_line .= ","; # To make the regular expressions simpler
132 while ($csv_line ne "") {
133 # Metadata values containing commas are quoted
134 if ($csv_line =~ s/^\"(.*?)\"\,//) {
135 # Only bother with non-empty values
136 if ($1 ne "" && defined($csv_file_fields[$i])) {
137 if (!defined $csv_line_metadata{$csv_file_fields[$i]}) {
138 $csv_line_metadata{$csv_file_fields[$i]} = [];
139 }
140 push (@{$csv_line_metadata{$csv_file_fields[$i]}}, $1);
141 }
142 }
143 # Normal comma-separated case
144 elsif ($csv_line =~ s/^(.*?)\,//) {
145 # Only bother with non-empty values
146 if ($1 ne "" && defined($csv_file_fields[$i])) {
147 if (!defined $csv_line_metadata{$csv_file_fields[$i]}) {
148 $csv_line_metadata{$csv_file_fields[$i]} = [];
149 }
150 push (@{$csv_line_metadata{$csv_file_fields[$i]}}, $1);
151 }
152 }
153 # The line must be formatted incorrectly
154 else {
155 print STDERR "MetadataCSVPlugin Error: Badly formatted CSV line: $csv_line.\n";
156 last;
157 }
158
159 $i++;
160 }
161
162 # We can't associate any metadata without knowing the file to associate it with
163 my $csv_line_filename_array = $csv_line_metadata{"Filename"};
164 if (!defined $csv_line_filename_array) {
165 print STDERR "MetadataCSVPlugin Error: No Filename metadata in CSV line: $orig_csv_line\n";
166 next;
167 }
168 my $csv_line_filename = shift(@$csv_line_filename_array);
169 delete $csv_line_metadata{"Filename"};
170
171 # Associate the metadata now
172 $extrametadata->{$csv_line_filename} = \%csv_line_metadata;
173 push(@$extrametakeys, $csv_line_filename);
174 }
175}
176
177
1781;
Note: See TracBrowser for help on using the repository browser.