source: main/trunk/greenstone2/perllib/plugouts/DSpacePlugout.pm@ 32524

Last change on this file since 32524 was 32511, checked in by ak19, 6 years ago

Running plugoutinfo.pl with describeall or listall flag would break on FedoraMETSPlugout when either FEDORA_HOME or FEDORA_VERSION aren't set (as is often the case), as there's a die statement in the BEGIN of FedoraMETSPlugout. Needed to run die if either FEDORA env var is not set only if the plugout is NOT in info_only mode in plugout constructor. However, info_only mode was never set in any of the plugouts, so had to add set up the infrastructure for it in plugoutinfo.pl and plugout.pm. Then added the info_only test to all teh plugouts, even though it's redundant in most of them for making sure future changes to any plugout's constructors does not break plugoutinfo.pl.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.6 KB
Line 
1###########################################################################
2#
3# DSpacePlugout.pm -- the plugout module for DSpace archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package DSpacePlugout;
27
28use strict;
29no strict 'refs';
30use utf8;
31eval {require bytes};
32use util;
33use FileUtils;
34use BasePlugout;
35
36sub BEGIN {
37 @DSpacePlugout::ISA = ('BasePlugout');
38}
39
40my $arguments = [
41 { 'name' => "metadata_prefix",
42 'desc' => "{DSpacePlugout.metadata_prefix}",
43 'type' => "string",
44 'reqd' => "no",
45 'hiddengli' => "no"} ];
46
47
48my $options = { 'name' => "DSpacePlugout",
49 'desc' => "{DSpacePlugout.desc}",
50 'abstract' => "no",
51 'inherits' => "yes",
52 'args' => $arguments };
53
54sub new {
55 my ($class) = shift (@_);
56 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
57 push(@$plugoutlist, $class);
58
59 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
60 push(@{$hashArgOptLists->{"OptList"}},$options);
61
62 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
63
64 if ($self->{'info_only'}) {
65 # don't worry about any options etc
66 return bless $self, $class;
67 }
68# print STDERR "***** metadata prefix = \"", $self->{'metadata_prefix'}, "\"\n";
69
70 return bless $self, $class;
71}
72
73sub saveas_dspace_metadata
74{
75 my $self = shift (@_);
76 my ($doc_obj,$working_dir,$metadata_file,$docroot,$metadata_prefix) = @_;
77
78 # my $docroot_attributes = ($metadata_prefix eq "dc") ? undef : "schema=\"$metadata_prefix\"";
79 my $docroot_attributes = "schema=\"$metadata_prefix\"";
80
81 my $doc_dc_file = &FileUtils::filenameConcatenate ($working_dir, $metadata_file);
82 $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
83
84 my $outhandler;
85 if (defined $self->{'xslt_writer'}){
86 $outhandler = $self->{'xslt_writer'};
87 }
88 else{
89 $outhandler = $self->get_output_handler($doc_dc_file);
90 }
91
92 $self->output_general_xml_header($outhandler, $docroot, $docroot_attributes);
93
94 my $metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
95 $metadata_prefix);
96
97 if(defined $metadata_prefix && $metadata_prefix ne "") {
98 # merge dc with any ex.dc
99
100 my $ex_dc_metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
101 "ex.dc");
102
103 foreach my $metaname (keys %$ex_dc_metadata_hashmap) {
104 my $metaname_without_ex_prefix = $metaname;
105 $metaname_without_ex_prefix =~ s/^ex\.(.*)/$1/; # remove any ex from the ex.dc prefix
106
107 # if there's an ex.dc value for a metaname for which no dc
108 # value was assigned, put the ex.dc value into the hashmap
109 if(!defined $metadata_hashmap->{$metaname_without_ex_prefix}) {
110 $metadata_hashmap->{$metaname_without_ex_prefix} = [];
111 push(@{$metadata_hashmap->{$metaname_without_ex_prefix}},@{$ex_dc_metadata_hashmap->{$metaname}});
112 }
113 }
114
115 }
116
117 foreach my $metaname (keys %$metadata_hashmap) {
118 my $metavals = $metadata_hashmap->{$metaname};
119
120 my $qualifier = undef;
121 my $element;
122 if ($metaname =~ m/^(.*?)\^(.*?)$/) {
123 $element = $1;
124 $qualifier = $2;
125 $qualifier = lc($qualifier);
126 }
127 else {
128 $element = $metaname;
129 }
130 $element =~ s/^.*?\.//;
131 $element = lc($element);
132
133 foreach my $metaval (@$metavals) {
134
135 #if element is empty then no need to export it.
136
137 if ($metaval =~ /\S/) {
138 print $outhandler " <dcvalue element=\"$element\"";
139 #If no qualifier then add qualifier="none"
140 #print $outhandler " qualifier=\"$qualifier\"" if defined $qualifier;
141 if (defined $qualifier) {
142 print $outhandler " qualifier=\"$qualifier\"" ;
143 }
144 else {
145 print $outhandler " qualifier=\"none\" language=\"\"" ;
146 }
147 print $outhandler ">$metaval";
148 print $outhandler "</dcvalue>\n";
149 }
150
151
152
153 }
154 }
155
156 $self->output_general_xml_footer($outhandler,$docroot);
157
158 if (defined $self->{'xslt_writer'}){
159 $self->close_xslt_pipe();
160 }
161 else{
162 close($outhandler);
163 }
164
165}
166
167sub saveas {
168 my $self = shift (@_);
169 my ($doc_obj,$doc_dir) = @_;
170
171 my $output_dir = $self->get_output_dir();
172 &FileUtils::makeAllDirectories ($output_dir) unless -e $output_dir;
173
174 my $working_dir = &FileUtils::filenameConcatenate ($output_dir, $doc_dir);
175 &FileUtils::makeAllDirectories ($working_dir, $doc_dir);
176
177 #########################
178 # save the handle file
179 #########################
180 my $outhandle = $self->{'output_handle'};
181
182 my $generate_handle = 0;
183 if ($generate_handle) {
184 # Genereate handle file
185 # (Note: this section of code would benefit from being restructured)
186 my $doc_handle_file = &FileUtils::filenameConcatenate ($working_dir, "handle");
187
188 my $env_hp = $ENV{'DSPACE_HANDLE_PREFIX'};
189 my $handle_prefix = (defined $env_hp) ? $env_hp : "123456789";
190
191 my $outhandler = $self->get_output_handler($doc_handle_file);
192
193 my ($handle) = ($doc_dir =~ m/^(.*)(:?\.dir)?$/);
194
195 print $outhandler "$handle_prefix/$handle\n";
196
197 close ($outhandler);
198 }
199
200 #########################
201 # save the content file
202 #########################
203 my $doc_contents_file = &FileUtils::filenameConcatenate ($working_dir, "contents");
204
205 my $outhandler = $self->get_output_handler($doc_contents_file);
206
207 $self->process_assoc_files ($doc_obj, $doc_dir, $outhandler);
208
209 $self->process_metafiles_metadata ($doc_obj);
210
211 close($outhandler);
212
213 #############################
214 # save the dublin_core.xml file
215 ###############################
216# my $doc_dc_file = &FileUtils::filenameConcatenate ($working_dir, "dublin_core.xml");
217# $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
218
219# if (defined $self->{'xslt_writer'}){
220# $outhandler = $self->{'xslt_writer'};
221# }
222# else{
223# $outhandler = $self->get_output_handler($doc_dc_file);
224# }
225
226# $self->output_general_xml_header($outhandler, "dublin_core");
227
228# my $all_text = $self->get_dc_metadata($doc_obj, $doc_obj->get_top_section());
229# print $outhandler $all_text;
230
231# $self->output_general_xml_footer($outhandler,"dublin_core");
232
233# if (defined $self->{'xslt_writer'}){
234# $self->close_xslt_pipe();
235# }
236# else{
237# close($outhandler);
238# }
239
240 $self->saveas_dspace_metadata($doc_obj,$working_dir,
241 "dublin_core.xml","dublin_core","dc");
242
243 my $metadata_prefix_list = $self->{'metadata_prefix'};
244# print STDERR "***!! md prefix = $metadata_prefix_list\n";
245
246 my @metadata_prefixes = split(/,\s*/,$metadata_prefix_list);
247 foreach my $ep (@metadata_prefixes) {
248 $self->saveas_dspace_metadata($doc_obj,$working_dir,
249 "metadata_$ep.xml","dublin_core",$ep);
250 }
251
252 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate ($doc_dir, "dublin_core.xml");
253 $self->store_output_info_reference($doc_obj);
254}
255
256 sub process_assoc_files {
257 my $self = shift (@_);
258 my ($doc_obj, $doc_dir, $handle) = @_;
259
260 my $outhandler = $self->{'output_handle'};
261
262 my $output_dir = $self->get_output_dir();
263 return if (!defined $output_dir);
264
265 my $working_dir = &FileUtils::filenameConcatenate($output_dir, $doc_dir);
266
267 my @assoc_files = ();
268 my $filename;;
269
270 my $source_filename = $doc_obj->get_source_filename();
271
272 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
273
274 if (defined $collect_dir) {
275 my $dirsep_regexp = &util::get_os_dirsep();
276
277 if ($collect_dir !~ /$dirsep_regexp$/) {
278 $collect_dir .= &util::get_dirsep(); # ensure there is a slash at the end
279 }
280
281 # This test is never going to fail on Windows -- is this a problem?
282 if ($source_filename !~ /^$dirsep_regexp/) {
283 $source_filename = &FileUtils::filenameConcatenate($collect_dir, $source_filename);
284 }
285 }
286
287 my ($tail_filename) = ($source_filename =~ m/([^\/\\]*)$/);
288
289 print $handle "$tail_filename\n";
290
291 $filename = &FileUtils::filenameConcatenate($working_dir, $tail_filename);
292 &FileUtils::hardLink ($source_filename, $filename, $self->{'verbosity'});
293
294 # set the assocfile path (even if we have no assoc files - need this for lucene)
295 $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(),
296 "assocfilepath",
297 "$doc_dir");
298 foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
299 my ($dir, $afile) = $assoc_file_rec->[1] =~ /^(.*?)([^\/\\]+)$/;
300 $dir = "" unless defined $dir;
301
302
303 my $real_filename = $assoc_file_rec->[0];
304 # for some reasons the image associate file has / before the full path
305 $real_filename =~ s/^\\(.*)/$1/i;
306 if (-e $real_filename) {
307 # escape backslashes and brackets in path for upcoming regex match
308 my $escaped_source_filename = &util::filename_to_regex($source_filename);
309 if ($real_filename =~ m/$escaped_source_filename$/) {
310 next;
311 }
312 else {
313 my $bundle = "bundle:ORIGINAL";
314
315 if ($afile =~ m/^thumbnail\./) {
316 $bundle = "bundle:THUMBNAIL";
317 }
318
319 # Store the associated file to the "contents" file. Cover.pdf not needed.
320 if ($afile ne "cover.jpg") {
321 print $handle "$assoc_file_rec->[1]\t$bundle\n";
322 }
323 }
324
325 $filename = &FileUtils::filenameConcatenate($working_dir, $afile);
326
327 if ($afile ne "cover.jpg") {
328 &FileUtils::hardLink ($real_filename, $filename, $self->{'verbosity'});
329 $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(),
330 "gsdlassocfile",
331 "$afile:$assoc_file_rec->[2]:$dir");
332 }
333 } elsif ($self->{'verbosity'} > 2) {
334 print $outhandler "DSpacePlugout::process couldn't copy the associated file " .
335 "$real_filename to $afile\n";
336 }
337 }
338}
339
340
341sub get_new_doc_dir{
342 my $self = shift (@_);
343 my($working_info,$working_dir,$OID) = @_;
344
345 return $OID;
346
347}
Note: See TracBrowser for help on using the repository browser.