source: main/trunk/greenstone2/perllib/plugouts/DSpacePlugout.pm@ 24829

Last change on this file since 24829 was 24829, checked in by ak19, 12 years ago

Changes to bat files and perl code to deal with brackets in (Windows) filepath. Also checked winmake.bat files to see if changes were needed there. These changes go together with the commits 24826 to 24828 for gems.bat, and commit 24820 on makegs2.bat.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.3 KB
Line 
1###########################################################################
2#
3# DSpacePlugout.pm -- the plugout module for DSpace archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package DSpacePlugout;
27
28use strict;
29no strict 'refs';
30use utf8;
31eval {require bytes};
32use util;
33use BasePlugout;
34
35sub BEGIN {
36 @DSpacePlugout::ISA = ('BasePlugout');
37}
38
39my $arguments = [
40 { 'name' => "metadata_prefix",
41 'desc' => "{DSpacePlugout.metadata_prefix}",
42 'type' => "string",
43 'reqd' => "no",
44 'hiddengli' => "no"} ];
45
46
47my $options = { 'name' => "DSpacePlugout",
48 'desc' => "{DSpacePlugout.desc}",
49 'abstract' => "no",
50 'inherits' => "yes",
51 'args' => $arguments };
52
53sub new {
54 my ($class) = shift (@_);
55 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
56 push(@$plugoutlist, $class);
57
58 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
59 push(@{$hashArgOptLists->{"OptList"}},$options);
60
61 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
62
63# print STDERR "***** metadata prefix = \"", $self->{'metadata_prefix'}, "\"\n";
64
65 return bless $self, $class;
66}
67
68sub saveas_dspace_metadata
69{
70 my $self = shift (@_);
71 my ($doc_obj,$working_dir,$metadata_file,$docroot,$metadata_prefix) = @_;
72
73 # my $docroot_attributes = ($metadata_prefix eq "dc") ? undef : "schema=\"$metadata_prefix\"";
74 my $docroot_attributes = "schema=\"$metadata_prefix\"";
75
76 my $doc_dc_file = &util::filename_cat ($working_dir, $metadata_file);
77 $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
78
79 my $outhandler;
80 if (defined $self->{'xslt_writer'}){
81 $outhandler = $self->{'xslt_writer'};
82 }
83 else{
84 $outhandler = $self->get_output_handler($doc_dc_file);
85 }
86
87 $self->output_general_xml_header($outhandler, $docroot, $docroot_attributes);
88
89 my $metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
90 $metadata_prefix);
91
92 if(defined $metadata_prefix && $metadata_prefix ne "") {
93 # merge dc with any ex.dc
94
95 my $ex_dc_metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
96 "ex.dc");
97
98 foreach my $metaname (keys %$ex_dc_metadata_hashmap) {
99 my $metaname_without_ex_prefix = $metaname;
100 $metaname_without_ex_prefix =~ s/^ex\.(.*)/$1/; # remove any ex from the ex.dc prefix
101
102 # if there's an ex.dc value for a metaname for which no dc
103 # value was assigned, put the ex.dc value into the hashmap
104 if(!defined $metadata_hashmap->{$metaname_without_ex_prefix}) {
105 $metadata_hashmap->{$metaname_without_ex_prefix} = [];
106 push(@{$metadata_hashmap->{$metaname_without_ex_prefix}},@{$ex_dc_metadata_hashmap->{$metaname}});
107 }
108 }
109
110 }
111
112 foreach my $metaname (keys %$metadata_hashmap) {
113 my $metavals = $metadata_hashmap->{$metaname};
114
115 my $qualifier = undef;
116 my $element;
117 if ($metaname =~ m/^(.*?)\^(.*?)$/) {
118 $element = $1;
119 $qualifier = $2;
120 $qualifier = lc($qualifier);
121 }
122 else {
123 $element = $metaname;
124 }
125 $element =~ s/^.*?\.//;
126 $element = lc($element);
127
128 foreach my $metaval (@$metavals) {
129
130 #if element is empty then no need to export it.
131
132 if ($metaval =~ /\S/) {
133 print $outhandler " <dcvalue element=\"$element\"";
134 #If no qualifier then add qualifier="none"
135 #print $outhandler " qualifier=\"$qualifier\"" if defined $qualifier;
136 if (defined $qualifier) {
137 print $outhandler " qualifier=\"$qualifier\"" ;
138 }
139 else {
140 print $outhandler " qualifier=\"none\" language=\"\"" ;
141 }
142 print $outhandler ">$metaval";
143 print $outhandler "</dcvalue>\n";
144 }
145
146
147
148 }
149 }
150
151 $self->output_general_xml_footer($outhandler,$docroot);
152
153 if (defined $self->{'xslt_writer'}){
154 $self->close_xslt_pipe();
155 }
156 else{
157 close($outhandler);
158 }
159
160}
161
162sub saveas {
163 my $self = shift (@_);
164 my ($doc_obj,$doc_dir) = @_;
165
166 my $output_dir = $self->get_output_dir();
167 &util::mk_all_dir ($output_dir) unless -e $output_dir;
168
169 my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
170 &util::mk_all_dir ($working_dir, $doc_dir);
171
172 #########################
173 # save the handle file
174 #########################
175 my $outhandle = $self->{'output_handle'};
176
177 my $generate_handle = 0;
178 if ($generate_handle) {
179 # Genereate handle file
180 # (Note: this section of code would benefit from being restructured)
181 my $doc_handle_file = &util::filename_cat ($working_dir, "handle");
182
183 my $env_hp = $ENV{'DSPACE_HANDLE_PREFIX'};
184 my $handle_prefix = (defined $env_hp) ? $env_hp : "123456789";
185
186 my $outhandler = $self->get_output_handler($doc_handle_file);
187
188 my ($handle) = ($doc_dir =~ m/^(.*)(:?\.dir)?$/);
189
190 print $outhandler "$handle_prefix/$handle\n";
191
192 close ($outhandler);
193 }
194
195 #########################
196 # save the content file
197 #########################
198 my $doc_contents_file = &util::filename_cat ($working_dir, "contents");
199
200 my $outhandler = $self->get_output_handler($doc_contents_file);
201
202 $self->process_assoc_files ($doc_obj, $doc_dir, $outhandler);
203
204 $self->process_metafiles_metadata ($doc_obj);
205
206 close($outhandler);
207
208 #############################
209 # save the dublin_core.xml file
210 ###############################
211# my $doc_dc_file = &util::filename_cat ($working_dir, "dublin_core.xml");
212# $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
213
214# if (defined $self->{'xslt_writer'}){
215# $outhandler = $self->{'xslt_writer'};
216# }
217# else{
218# $outhandler = $self->get_output_handler($doc_dc_file);
219# }
220
221# $self->output_general_xml_header($outhandler, "dublin_core");
222
223# my $all_text = $self->get_dc_metadata($doc_obj, $doc_obj->get_top_section());
224# print $outhandler $all_text;
225
226# $self->output_general_xml_footer($outhandler,"dublin_core");
227
228# if (defined $self->{'xslt_writer'}){
229# $self->close_xslt_pipe();
230# }
231# else{
232# close($outhandler);
233# }
234
235 $self->saveas_dspace_metadata($doc_obj,$working_dir,
236 "dublin_core.xml","dublin_core","dc");
237
238 my $metadata_prefix_list = $self->{'metadata_prefix'};
239# print STDERR "***!! md prefix = $metadata_prefix_list\n";
240
241 my @metadata_prefixes = split(/,\s*/,$metadata_prefix_list);
242 foreach my $ep (@metadata_prefixes) {
243 $self->saveas_dspace_metadata($doc_obj,$working_dir,
244 "metadata_$ep.xml","dublin_core",$ep);
245 }
246
247 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "dublin_core.xml");
248 $self->store_output_info_reference($doc_obj);
249}
250
251 sub process_assoc_files {
252 my $self = shift (@_);
253 my ($doc_obj, $doc_dir, $handle) = @_;
254
255 my $outhandler = $self->{'output_handle'};
256
257 my $output_dir = $self->get_output_dir();
258 return if (!defined $output_dir);
259
260 my $working_dir = &util::filename_cat($output_dir, $doc_dir);
261
262 my @assoc_files = ();
263 my $filename;;
264
265 my $source_filename = $doc_obj->get_source_filename();
266
267 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
268
269 if (defined $collect_dir) {
270 my $dirsep_regexp = &util::get_os_dirsep();
271
272 if ($collect_dir !~ /$dirsep_regexp$/) {
273 $collect_dir .= &util::get_dirsep(); # ensure there is a slash at the end
274 }
275
276 # This test is never going to fail on Windows -- is this a problem?
277 if ($source_filename !~ /^$dirsep_regexp/) {
278 $source_filename = &util::filename_cat($collect_dir, $source_filename);
279 }
280 }
281
282 my ($tail_filename) = ($source_filename =~ m/([^\/\\]*)$/);
283
284 print $handle "$tail_filename\n";
285
286 $filename = &util::filename_cat($working_dir, $tail_filename);
287 &util::hard_link ($source_filename, $filename, $self->{'verbosity'});
288
289 # set the assocfile path (even if we have no assoc files - need this for lucene)
290 $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(),
291 "assocfilepath",
292 "$doc_dir");
293 foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
294 my ($dir, $afile) = $assoc_file_rec->[1] =~ /^(.*?)([^\/\\]+)$/;
295 $dir = "" unless defined $dir;
296
297
298 my $real_filename = $assoc_file_rec->[0];
299 # for some reasons the image associate file has / before the full path
300 $real_filename =~ s/^\\(.*)/$1/i;
301 if (-e $real_filename) {
302 # escape backslashes and brackets in path for upcoming regex match
303 my $escaped_source_filename = &util::filename_to_regex($source_filename);
304 if ($real_filename =~ m/$escaped_source_filename$/) {
305 next;
306 }
307 else {
308 my $bundle = "bundle:ORIGINAL";
309
310 if ($afile =~ m/^thumbnail\./) {
311 $bundle = "bundle:THUMBNAIL";
312 }
313
314 # Store the associated file to the "contents" file. Cover.pdf not needed.
315 if ($afile ne "cover.jpg") {
316 print $handle "$assoc_file_rec->[1]\t$bundle\n";
317 }
318 }
319
320 $filename = &util::filename_cat($working_dir, $afile);
321
322 if ($afile ne "cover.jpg") {
323 &util::hard_link ($real_filename, $filename, $self->{'verbosity'});
324 $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(),
325 "gsdlassocfile",
326 "$afile:$assoc_file_rec->[2]:$dir");
327 }
328 } elsif ($self->{'verbosity'} > 2) {
329 print $outhandler "DSpacePlugout::process couldn't copy the associated file " .
330 "$real_filename to $afile\n";
331 }
332 }
333}
334
335
336sub get_new_doc_dir{
337 my $self = shift (@_);
338 my($working_info,$working_dir,$OID) = @_;
339
340 return $OID;
341
342}
Note: See TracBrowser for help on using the repository browser.