source: main/trunk/greenstone2/perllib/plugouts/DSpacePlugout.pm@ 23824

Last change on this file since 23824 was 23824, checked in by sjm84, 13 years ago

Phase one of commiting the files changed to extend the DSpace exporting capabilities to include more than just dublin core metadata

  • Property svn:keywords set to Author Date Id Revision
File size: 9.5 KB
RevLine 
[12330]1###########################################################################
2#
3# DSpacePlugout.pm -- the plugout module for DSpace archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package DSpacePlugout;
27
28use strict;
29no strict 'refs';
[23824]30use utf8;
[12330]31eval {require bytes};
32use util;
[17203]33use BasePlugout;
[12330]34
35sub BEGIN {
[17203]36 @DSpacePlugout::ISA = ('BasePlugout');
[12330]37}
38
[23824]39my $arguments = [
40 { 'name' => "metadata_prefix",
41 'desc' => "{DSpacePlugout.metadata_prefix}",
42 'type' => "string",
43 'reqd' => "no",
44 'hiddengli' => "no"} ];
[12330]45
[23824]46
[12330]47my $options = { 'name' => "DSpacePlugout",
48 'desc' => "{DSpacePlugout.desc}",
49 'abstract' => "no",
[23824]50 'inherits' => "yes",
51 'args' => $arguments };
[12330]52
53sub new {
54 my ($class) = shift (@_);
55 my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
56 push(@$plugoutlist, $class);
57
[17203]58 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
59 push(@{$hashArgOptLists->{"OptList"}},$options);
[12330]60
[17203]61 my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
[23824]62
63 print STDERR "***** metadata prefix = \"", $self->{'metadata_prefix'}, "\"\n";
64
[17203]65 return bless $self, $class;
[12330]66}
67
[23824]68sub saveas_dspace_metadata
69{
70 my $self = shift (@_);
71 my ($doc_obj,$working_dir,$metadata_file,$docroot,$metadata_prefix) = @_;
72
73 # my $docroot_attributes = ($metadata_prefix eq "dc") ? undef : "schema=\"$metadata_prefix\"";
74 my $docroot_attributes = "schema=\"$metadata_prefix\"";
75
76 my $doc_dc_file = &util::filename_cat ($working_dir, $metadata_file);
77 $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
78
79 my $outhandler;
80 if (defined $self->{'xslt_writer'}){
81 $outhandler = $self->{'xslt_writer'};
82 }
83 else{
84 $outhandler = $self->get_output_handler($doc_dc_file);
85 }
86
87 $self->output_general_xml_header($outhandler, $docroot, $docroot_attributes);
88
89 my $metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
90 $metadata_prefix);
91
92 foreach my $metaname (keys %$metadata_hashmap) {
93 my $metavals = $metadata_hashmap->{$metaname};
94
95 my $qualifier = undef;
96 my $element;
97 if ($metaname =~ m/^(.*?)\^(.*?)$/) {
98 $element = $1;
99 $qualifier = $2;
100 $qualifier = lc($qualifier);
101 }
102 else {
103 $element = $metaname;
104 }
105 $element =~ s/^.*?\.//;
106 $element = lc($element);
107
108 foreach my $metaval (@$metavals) {
109
110 #if element is empty then no need to export it.
111
112 if ($metaval =~ /\S/) {
113 print $outhandler " <dcvalue element=\"$element\"";
114 #If no qualifier then add qualifier="none"
115 #print $outhandler " qualifier=\"$qualifier\"" if defined $qualifier;
116 if (defined $qualifier) {
117 print $outhandler " qualifier=\"$qualifier\"" ;
118 }
119 else {
120 print $outhandler " qualifier=\"none\" language=\"\"" ;
121 }
122 print $outhandler ">$metaval";
123 print $outhandler "</dcvalue>\n";
124 }
125
126
127
128 }
129 }
130
131 $self->output_general_xml_footer($outhandler,$docroot);
132
133 if (defined $self->{'xslt_writer'}){
134 $self->close_xslt_pipe();
135 }
136 else{
137 close($outhandler);
138 }
139
140}
141
[12330]142sub saveas {
143 my $self = shift (@_);
144 my ($doc_obj,$doc_dir) = @_;
145
146 my $output_dir = $self->get_output_dir();
147 &util::mk_all_dir ($output_dir) unless -e $output_dir;
148
149 my $working_dir = &util::filename_cat ($output_dir, $doc_dir);
150 &util::mk_all_dir ($working_dir, $doc_dir);
151
152 #########################
153 # save the handle file
154 #########################
155 my $outhandle = $self->{'output_handle'};
156
[23824]157 my $generate_handle = 0;
158 if ($generate_handle) {
159 # Genereate handle file
160 # (Note: this section of code would benefit from being restructured)
161 my $doc_handle_file = &util::filename_cat ($working_dir, "handle");
[12330]162
[23824]163 my $env_hp = $ENV{'DSPACE_HANDLE_PREFIX'};
164 my $handle_prefix = (defined $env_hp) ? $env_hp : "123456789";
[12330]165
[23824]166 my $outhandler = $self->get_output_handler($doc_handle_file);
[12330]167
[23824]168 my ($handle) = ($doc_dir =~ m/^(.*)(:?\.dir)?$/);
[12330]169
[23824]170 print $outhandler "$handle_prefix/$handle\n";
[12330]171
[23824]172 close ($outhandler);
173 }
174
[12330]175 #########################
176 # save the content file
177 #########################
178 my $doc_contents_file = &util::filename_cat ($working_dir, "contents");
179
[23824]180 my $outhandler = $self->get_output_handler($doc_contents_file);
[12330]181
182 $self->process_assoc_files ($doc_obj, $doc_dir, $outhandler);
183
[20642]184 $self->process_metafiles_metadata ($doc_obj);
185
[12330]186 close($outhandler);
187
[23824]188 #############################
[12330]189 # save the dublin_core.xml file
190 ###############################
[23824]191# my $doc_dc_file = &util::filename_cat ($working_dir, "dublin_core.xml");
192# $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
[12330]193
[23824]194# if (defined $self->{'xslt_writer'}){
195# $outhandler = $self->{'xslt_writer'};
196# }
197# else{
198# $outhandler = $self->get_output_handler($doc_dc_file);
199# }
[12330]200
[23824]201# $self->output_general_xml_header($outhandler, "dublin_core");
[13172]202
[23824]203# my $all_text = $self->get_dc_metadata($doc_obj, $doc_obj->get_top_section());
204# print $outhandler $all_text;
[13172]205
[23824]206# $self->output_general_xml_footer($outhandler,"dublin_core");
[12330]207
[23824]208# if (defined $self->{'xslt_writer'}){
209# $self->close_xslt_pipe();
210# }
211# else{
212# close($outhandler);
213# }
214
215 $self->saveas_dspace_metadata($doc_obj,$working_dir,
216 "dublin_core.xml","dublin_core","dc");
217
218 my $metadata_prefix_list = $self->{'metadata_prefix'};
219 print STDERR "***!! md prefix = $metadata_prefix_list\n";
220
221 my @metadata_prefixes = split(/,\s*/,$metadata_prefix_list);
222 foreach my $ep (@metadata_prefixes) {
223 $self->saveas_dspace_metadata($doc_obj,$working_dir,
224 "metadata_$ep.xml","dublin_core",$ep);
[12330]225 }
[23824]226
[12330]227 $self->{'short_doc_file'} = &util::filename_cat ($doc_dir, "dublin_core.xml");
[12363]228 $self->store_output_info_reference($doc_obj);
[12330]229}
230
231 sub process_assoc_files {
232 my $self = shift (@_);
233 my ($doc_obj, $doc_dir, $handle) = @_;
234
235 my $outhandler = $self->{'output_handle'};
236
237 my $output_dir = $self->get_output_dir();
238 return if (!defined $output_dir);
239
240 my $working_dir = &util::filename_cat($output_dir, $doc_dir);
241
242 my @assoc_files = ();
243 my $filename;;
244
245 my $source_filename = $doc_obj->get_source_filename();
246
247 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
248
249 if (defined $collect_dir) {
250 my $dirsep_regexp = &util::get_os_dirsep();
251
252 if ($collect_dir !~ /$dirsep_regexp$/) {
253 $collect_dir .= &util::get_dirsep(); # ensure there is a slash at the end
254 }
255
256 # This test is never going to fail on Windows -- is this a problem?
257 if ($source_filename !~ /^$dirsep_regexp/) {
258 $source_filename = &util::filename_cat($collect_dir, $source_filename);
259 }
260 }
261
[20903]262 my ($tail_filename) = ($source_filename =~ m/([^\/\\]*)$/);
[12330]263
264 print $handle "$tail_filename\n";
265
266 $filename = &util::filename_cat($working_dir, $tail_filename);
[18463]267 &util::hard_link ($source_filename, $filename, $self->{'verbosity'});
[12330]268
269 # set the assocfile path (even if we have no assoc files - need this for lucene)
270 $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(),
271 "assocfilepath",
272 "$doc_dir");
273 foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
274 my ($dir, $afile) = $assoc_file_rec->[1] =~ /^(.*?)([^\/\\]+)$/;
275 $dir = "" unless defined $dir;
276
277
278 my $real_filename = $assoc_file_rec->[0];
279 # for some reasons the image associate file has / before the full path
280 $real_filename =~ s/^\\(.*)/$1/i;
281 if (-e $real_filename) {
[22921]282 # escape backslashes in path for upcoming regex match
283 my $escaped_source_filename = $source_filename;
284 $escaped_source_filename =~ s/\\/\\\\/g;
285 if ($real_filename =~ m/$escaped_source_filename$/) {
[12330]286 next;
287 }
288 else {
289 my $bundle = "bundle:ORIGINAL";
290
291 if ($afile =~ m/^thumbnail\./) {
292 $bundle = "bundle:THUMBNAIL";
293 }
294
[23824]295 # Store the associated file to the "contents" file. Cover.pdf not needed.
296 if ($afile ne "cover.jpg") {
297 print $handle "$assoc_file_rec->[1]\t$bundle\n";
[12330]298 }
[23824]299 }
[12330]300
301 $filename = &util::filename_cat($working_dir, $afile);
302
[23824]303 if ($afile ne "cover.jpg") {
304 &util::hard_link ($real_filename, $filename, $self->{'verbosity'});
305 $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(),
306 "gsdlassocfile",
307 "$afile:$assoc_file_rec->[2]:$dir");
308 }
[12330]309 } elsif ($self->{'verbosity'} > 2) {
310 print $outhandler "DSpacePlugout::process couldn't copy the associated file " .
311 "$real_filename to $afile\n";
312 }
313 }
314}
315
316
317sub get_new_doc_dir{
318 my $self = shift (@_);
319 my($working_info,$working_dir,$OID) = @_;
320
321 return $OID;
322
323}
Note: See TracBrowser for help on using the repository browser.