source: main/trunk/greenstone2/perllib/plugouts/DSpacePlugout.pm

Last change on this file was 37200, checked in by davidb, 15 months ago

New minus option added in to allow control over whether hardlinking or copying of files is used; also removed some deprecated functions from FileUtils.pm

  • Property svn:keywords set to Author Date Id Revision
File size: 11.0 KB
Line 
1###########################################################################
2#
3# DSpacePlugout.pm -- the plugout module for DSpace archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package DSpacePlugout;
27
28use strict;
29no strict 'refs';
30use utf8;
31eval {require bytes};
32use util;
33use FileUtils;
34use BasePlugout;
35
36sub BEGIN {
37 @DSpacePlugout::ISA = ('BasePlugout');
38}
39
40my $arguments = [
41 { 'name' => "metadata_prefix",
42 'desc' => "{DSpacePlugout.metadata_prefix}",
43 'type' => "string",
44 'reqd' => "no",
45 'hiddengli' => "no"} ];
46
47
48my $options = { 'name' => "DSpacePlugout",
49 'desc' => "{DSpacePlugout.desc}",
50 'abstract' => "no",
51 'inherits' => "yes",
52 'args' => $arguments };
53
54sub new {
55 my ($class) = shift (@_);
56 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
57 push(@$plugoutlist, $class);
58
59 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
60 push(@{$hashArgOptLists->{"OptList"}},$options);
61
62 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
63
64 if ($self->{'info_only'}) {
65 # don't worry about any options etc
66 return bless $self, $class;
67 }
68# print STDERR "***** metadata prefix = \"", $self->{'metadata_prefix'}, "\"\n";
69
70 return bless $self, $class;
71}
72
73sub saveas_dspace_metadata
74{
75 my $self = shift (@_);
76 my ($doc_obj,$working_dir,$metadata_file,$docroot,$metadata_prefix) = @_;
77
78 # my $docroot_attributes = ($metadata_prefix eq "dc") ? undef : "schema=\"$metadata_prefix\"";
79 my $docroot_attributes = "schema=\"$metadata_prefix\"";
80
81 my $doc_dc_file = &FileUtils::filenameConcatenate ($working_dir, $metadata_file);
82 $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
83
84 my $outhandler;
85 if (defined $self->{'xslt_writer'}){
86 $outhandler = $self->{'xslt_writer'};
87 }
88 else{
89 $outhandler = $self->get_output_handler($doc_dc_file);
90 }
91
92 $self->output_general_xml_header($outhandler, $docroot, $docroot_attributes);
93
94 my $metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
95 $metadata_prefix);
96
97 if(defined $metadata_prefix && $metadata_prefix ne "") {
98 # merge dc with any ex.dc
99
100 my $ex_dc_metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
101 "ex.dc");
102
103 foreach my $metaname (keys %$ex_dc_metadata_hashmap) {
104 my $metaname_without_ex_prefix = $metaname;
105 $metaname_without_ex_prefix =~ s/^ex\.(.*)/$1/; # remove any ex from the ex.dc prefix
106
107 # if there's an ex.dc value for a metaname for which no dc
108 # value was assigned, put the ex.dc value into the hashmap
109 if(!defined $metadata_hashmap->{$metaname_without_ex_prefix}) {
110 $metadata_hashmap->{$metaname_without_ex_prefix} = [];
111 push(@{$metadata_hashmap->{$metaname_without_ex_prefix}},@{$ex_dc_metadata_hashmap->{$metaname}});
112 }
113 }
114
115 }
116
117 foreach my $metaname (keys %$metadata_hashmap) {
118 my $metavals = $metadata_hashmap->{$metaname};
119
120 my $qualifier = undef;
121 my $element;
122 if ($metaname =~ m/^(.*?)\^(.*?)$/) {
123 $element = $1;
124 $qualifier = $2;
125 $qualifier = lc($qualifier);
126 }
127 else {
128 $element = $metaname;
129 }
130 $element =~ s/^.*?\.//;
131 $element = lc($element);
132
133 foreach my $metaval (@$metavals) {
134
135 #if element is empty then no need to export it.
136
137 if ($metaval =~ /\S/) {
138 print $outhandler " <dcvalue element=\"$element\"";
139 #If no qualifier then add qualifier="none"
140 #print $outhandler " qualifier=\"$qualifier\"" if defined $qualifier;
141 if (defined $qualifier) {
142 print $outhandler " qualifier=\"$qualifier\"" ;
143 }
144 else {
145 print $outhandler " qualifier=\"none\" language=\"\"" ;
146 }
147 print $outhandler ">$metaval";
148 print $outhandler "</dcvalue>\n";
149 }
150
151
152
153 }
154 }
155
156 $self->output_general_xml_footer($outhandler,$docroot);
157
158 if (defined $self->{'xslt_writer'}){
159 $self->close_xslt_pipe();
160 }
161 else{
162 close($outhandler);
163 }
164
165}
166
167sub saveas {
168 my $self = shift (@_);
169 my ($doc_obj,$doc_dir) = @_;
170
171 my $output_dir = $self->get_output_dir();
172 &FileUtils::makeAllDirectories ($output_dir) unless -e $output_dir;
173
174 my $working_dir = &FileUtils::filenameConcatenate ($output_dir, $doc_dir);
175 &FileUtils::makeAllDirectories ($working_dir, $doc_dir);
176
177 #########################
178 # save the handle file
179 #########################
180 my $outhandle = $self->{'output_handle'};
181
182 my $generate_handle = 0;
183 if ($generate_handle) {
184 # Genereate handle file
185 # (Note: this section of code would benefit from being restructured)
186 my $doc_handle_file = &FileUtils::filenameConcatenate ($working_dir, "handle");
187
188 my $env_hp = $ENV{'DSPACE_HANDLE_PREFIX'};
189 my $handle_prefix = (defined $env_hp) ? $env_hp : "123456789";
190
191 my $outhandler = $self->get_output_handler($doc_handle_file);
192
193 my ($handle) = ($doc_dir =~ m/^(.*)(:?\.dir)?$/);
194
195 print $outhandler "$handle_prefix/$handle\n";
196
197 close ($outhandler);
198 }
199
200 #########################
201 # save the content file
202 #########################
203 my $doc_contents_file = &FileUtils::filenameConcatenate ($working_dir, "contents");
204
205 my $outhandler = $self->get_output_handler($doc_contents_file);
206
207 $self->process_assoc_files ($doc_obj, $doc_dir, $outhandler);
208
209 $self->process_metafiles_metadata ($doc_obj);
210
211 close($outhandler);
212
213 #############################
214 # save the dublin_core.xml file
215 ###############################
216# my $doc_dc_file = &FileUtils::filenameConcatenate ($working_dir, "dublin_core.xml");
217# $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
218
219# if (defined $self->{'xslt_writer'}){
220# $outhandler = $self->{'xslt_writer'};
221# }
222# else{
223# $outhandler = $self->get_output_handler($doc_dc_file);
224# }
225
226# $self->output_general_xml_header($outhandler, "dublin_core");
227
228# my $all_text = $self->get_dc_metadata($doc_obj, $doc_obj->get_top_section());
229# print $outhandler $all_text;
230
231# $self->output_general_xml_footer($outhandler,"dublin_core");
232
233# if (defined $self->{'xslt_writer'}){
234# $self->close_xslt_pipe();
235# }
236# else{
237# close($outhandler);
238# }
239
240 $self->saveas_dspace_metadata($doc_obj,$working_dir,
241 "dublin_core.xml","dublin_core","dc");
242
243 my $metadata_prefix_list = $self->{'metadata_prefix'};
244# print STDERR "***!! md prefix = $metadata_prefix_list\n";
245
246 my @metadata_prefixes = split(/,\s*/,$metadata_prefix_list);
247 foreach my $ep (@metadata_prefixes) {
248 $self->saveas_dspace_metadata($doc_obj,$working_dir,
249 "metadata_$ep.xml","dublin_core",$ep);
250 }
251
252 $self->{'short_doc_file'} = &FileUtils::filenameConcatenate ($doc_dir, "dublin_core.xml");
253 $self->store_output_info_reference($doc_obj);
254}
255
256 sub process_assoc_files {
257 my $self = shift (@_);
258 my ($doc_obj, $doc_dir, $handle) = @_;
259
260 my $outhandler = $self->{'output_handle'};
261
262 my $output_dir = $self->get_output_dir();
263 return if (!defined $output_dir);
264
265 my $working_dir = &FileUtils::filenameConcatenate($output_dir, $doc_dir);
266
267 my @assoc_files = ();
268 my $filename;;
269
270 my $source_filename = $doc_obj->get_source_filename();
271
272 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
273
274 if (defined $collect_dir) {
275 my $dirsep_regexp = &util::get_os_dirsep();
276
277 if ($collect_dir !~ /$dirsep_regexp$/) {
278 $collect_dir .= &util::get_dirsep(); # ensure there is a slash at the end
279 }
280
281 # This test is never going to fail on Windows -- is this a problem?
282 if ($source_filename !~ /^$dirsep_regexp/) {
283 $source_filename = &FileUtils::filenameConcatenate($collect_dir, $source_filename);
284 }
285 }
286
287 my ($tail_filename) = ($source_filename =~ m/([^\/\\]*)$/);
288
289 print $handle "$tail_filename\n";
290
291 $filename = &FileUtils::filenameConcatenate($working_dir, $tail_filename);
292 if ($self->{'assocfile_copymode'} eq "hardlink") {
293 &FileUtils::hardLink ($source_filename, $filename, $self->{'verbosity'}); # Consider adding in 'strict' option??
294 }
295 else {
296 &FileUtils::copyFilesGeneral([$source_filename], $filename); # Consider adding in 'strict' option??
297 }
298
299 # set the assocfile path (even if we have no assoc files - need this for lucene)
300 $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(),
301 "assocfilepath",
302 "$doc_dir");
303 foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
304 my ($dir, $afile) = $assoc_file_rec->[1] =~ /^(.*?)([^\/\\]+)$/;
305 $dir = "" unless defined $dir;
306
307
308 my $real_filename = $assoc_file_rec->[0];
309 # for some reasons the image associate file has / before the full path
310 $real_filename =~ s/^\\(.*)/$1/i;
311 if (-e $real_filename) {
312 # escape backslashes and brackets in path for upcoming regex match
313 my $escaped_source_filename = &util::filename_to_regex($source_filename);
314 if ($real_filename =~ m/$escaped_source_filename$/) {
315 next;
316 }
317 else {
318 my $bundle = "bundle:ORIGINAL";
319
320 if ($afile =~ m/^thumbnail\./) {
321 $bundle = "bundle:THUMBNAIL";
322 }
323
324 # Store the associated file to the "contents" file. Cover.pdf not needed.
325 if ($afile ne "cover.jpg") {
326 print $handle "$assoc_file_rec->[1]\t$bundle\n";
327 }
328 }
329
330 $filename = &FileUtils::filenameConcatenate($working_dir, $afile);
331
332 if ($afile ne "cover.jpg") {
333 if ($self->{'assocfile_copymode'} eq "hardlink") {
334 &FileUtils::hardLink($real_filename, $filename, $self->{'verbosity'}); # Consider adding in 'strict' option??
335 }
336 else {
337 &FileUtils::copyFilesGeneral([$real_filename], $filename); # Consider adding in 'strict' option??
338 }
339 $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(),
340 "gsdlassocfile",
341 "$afile:$assoc_file_rec->[2]:$dir");
342 }
343 } elsif ($self->{'verbosity'} > 2) {
344 print $outhandler "DSpacePlugout::process couldn't copy the associated file " .
345 "$real_filename to $afile\n";
346 }
347 }
348}
349
350
351sub get_new_doc_dir{
352 my $self = shift (@_);
353 my($working_info,$working_dir,$OID) = @_;
354
355 return $OID;
356
357}
Note: See TracBrowser for help on using the repository browser.