root/main/trunk/greenstone2/perllib/plugouts/DSpacePlugout.pm @ 32530

Revision 32511, 10.6 KB (checked in by ak19, 13 months ago)

Running plugoutinfo.pl with describeall or listall flag would break on FedoraMETSPlugout when either FEDORA_HOME or FEDORA_VERSION aren't set (as is often the case), as there's a die statement in the BEGIN of FedoraMETSPlugout. Needed to run die if either FEDORA env var is not set only if the plugout is NOT in info_only mode in plugout constructor. However, info_only mode was never set in any of the plugouts, so had to add set up the infrastructure for it in plugoutinfo.pl and plugout.pm. Then added the info_only test to all teh plugouts, even though it's redundant in most of them for making sure future changes to any plugout's constructors does not break plugoutinfo.pl.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# DSpacePlugout.pm -- the plugout module for DSpace archives
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2006 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package DSpacePlugout;
27
28use strict;
29no strict 'refs';
30use utf8;
31eval {require bytes};
32use util;
33use FileUtils;
34use BasePlugout;
35
36sub BEGIN {
37    @DSpacePlugout::ISA = ('BasePlugout');
38}
39
40my $arguments = [
41       { 'name' => "metadata_prefix",
42    'desc' => "{DSpacePlugout.metadata_prefix}",
43    'type' => "string",   
44    'reqd' => "no",
45    'hiddengli' => "no"} ];
46
47
48my $options = { 'name'     => "DSpacePlugout",
49        'desc'     => "{DSpacePlugout.desc}",
50        'abstract' => "no",
51        'inherits' => "yes",
52                'args'     => $arguments };
53
54sub new {
55    my ($class) = shift (@_);
56    my ($plugoutlist, $inputargs,$hashArgOptLists) = @_;
57    push(@$plugoutlist, $class);
58
59    push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
60    push(@{$hashArgOptLists->{"OptList"}},$options);
61
62    my $self = new BasePlugout($plugoutlist,$inputargs,$hashArgOptLists);
63   
64    if ($self->{'info_only'}) {
65        # don't worry about any options etc
66        return bless $self, $class;
67    }
68#    print STDERR "***** metadata prefix = \"", $self->{'metadata_prefix'}, "\"\n";
69
70    return bless $self, $class;
71}
72
73sub saveas_dspace_metadata
74{
75    my $self = shift (@_);
76    my ($doc_obj,$working_dir,$metadata_file,$docroot,$metadata_prefix) = @_;
77
78    # my $docroot_attributes = ($metadata_prefix eq "dc") ? undef : "schema=\"$metadata_prefix\"";
79    my $docroot_attributes = "schema=\"$metadata_prefix\"";
80 
81    my $doc_dc_file = &FileUtils::filenameConcatenate ($working_dir, $metadata_file);
82    $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
83
84    my $outhandler;
85    if (defined $self->{'xslt_writer'}){
86    $outhandler = $self->{'xslt_writer'};
87    }
88    else{
89    $outhandler = $self->get_output_handler($doc_dc_file);
90     }
91   
92    $self->output_general_xml_header($outhandler, $docroot, $docroot_attributes);
93
94    my $metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
95                                                          $metadata_prefix);
96
97    if(defined $metadata_prefix && $metadata_prefix ne "") {
98    # merge dc with any ex.dc
99
100    my $ex_dc_metadata_hashmap = $doc_obj->get_metadata_hashmap($doc_obj->get_top_section(),
101                                                          "ex.dc");
102
103    foreach my $metaname (keys %$ex_dc_metadata_hashmap) {
104        my $metaname_without_ex_prefix = $metaname;
105        $metaname_without_ex_prefix =~ s/^ex\.(.*)/$1/; # remove any ex from the ex.dc prefix
106
107        # if there's an ex.dc value for a metaname for which no dc
108        # value was assigned, put the ex.dc value into the hashmap
109        if(!defined $metadata_hashmap->{$metaname_without_ex_prefix}) {
110        $metadata_hashmap->{$metaname_without_ex_prefix} = [];
111        push(@{$metadata_hashmap->{$metaname_without_ex_prefix}},@{$ex_dc_metadata_hashmap->{$metaname}});
112        }
113    }
114
115    }
116
117    foreach my $metaname (keys %$metadata_hashmap) {
118      my $metavals = $metadata_hashmap->{$metaname};
119
120      my $qualifier = undef;
121      my $element;
122      if ($metaname =~ m/^(.*?)\^(.*?)$/) {
123        $element = $1;
124        $qualifier = $2;
125        $qualifier = lc($qualifier);
126      }
127      else {
128        $element = $metaname;
129      }
130      $element =~ s/^.*?\.//;
131      $element = lc($element);
132
133      foreach my $metaval (@$metavals) {
134       
135        #if element is empty then no need to export it.
136       
137        if ($metaval =~ /\S/) {
138            print $outhandler "  <dcvalue element=\"$element\"";
139            #If no qualifier then add qualifier="none"
140            #print $outhandler " qualifier=\"$qualifier\"" if defined $qualifier;
141            if (defined $qualifier) {
142                print $outhandler " qualifier=\"$qualifier\"" ;
143            }
144            else {
145                print $outhandler " qualifier=\"none\" language=\"\"" ;
146            }
147            print $outhandler ">$metaval";
148            print $outhandler "</dcvalue>\n";
149        }
150       
151       
152       
153      }
154    }
155   
156    $self->output_general_xml_footer($outhandler,$docroot);
157   
158    if (defined $self->{'xslt_writer'}){     
159    $self->close_xslt_pipe();
160    }
161    else{
162    close($outhandler);
163    }
164
165}
166
167sub saveas {
168    my $self = shift (@_);
169    my ($doc_obj,$doc_dir) = @_;
170
171    my $output_dir = $self->get_output_dir();
172    &FileUtils::makeAllDirectories ($output_dir) unless -e $output_dir;
173
174    my $working_dir = &FileUtils::filenameConcatenate ($output_dir, $doc_dir);   
175    &FileUtils::makeAllDirectories ($working_dir, $doc_dir);
176
177    #########################
178    # save the handle file
179    #########################
180    my $outhandle = $self->{'output_handle'};
181 
182    my $generate_handle = 0;
183    if ($generate_handle) {
184        # Genereate handle file
185        # (Note: this section of code would benefit from being restructured)
186        my $doc_handle_file = &FileUtils::filenameConcatenate ($working_dir, "handle");
187   
188        my $env_hp = $ENV{'DSPACE_HANDLE_PREFIX'};
189        my $handle_prefix = (defined $env_hp) ? $env_hp : "123456789";
190
191        my $outhandler =  $self->get_output_handler($doc_handle_file);
192
193        my ($handle) = ($doc_dir =~ m/^(.*)(:?\.dir)?$/);
194
195        print $outhandler "$handle_prefix/$handle\n";
196   
197        close ($outhandler);
198    }
199   
200    #########################
201    # save the content file
202    #########################
203    my $doc_contents_file = &FileUtils::filenameConcatenate ($working_dir, "contents");
204   
205    my $outhandler =  $self->get_output_handler($doc_contents_file);
206
207    $self->process_assoc_files ($doc_obj, $doc_dir, $outhandler);
208   
209    $self->process_metafiles_metadata ($doc_obj);
210
211    close($outhandler);
212
213    #############################
214    # save the dublin_core.xml file
215    ###############################
216#      my $doc_dc_file = &FileUtils::filenameConcatenate ($working_dir, "dublin_core.xml");
217#      $self->open_xslt_pipe($doc_dc_file,$self->{'xslt_file'});
218
219#      if (defined $self->{'xslt_writer'}){
220#   $outhandler = $self->{'xslt_writer'};
221#      }
222#      else{
223#   $outhandler = $self->get_output_handler($doc_dc_file);
224#       }
225   
226#      $self->output_general_xml_header($outhandler, "dublin_core");
227
228#      my $all_text = $self->get_dc_metadata($doc_obj, $doc_obj->get_top_section());
229#      print $outhandler $all_text;
230
231#      $self->output_general_xml_footer($outhandler,"dublin_core");
232   
233#      if (defined $self->{'xslt_writer'}){     
234#   $self->close_xslt_pipe();
235#      }
236#      else{
237#   close($outhandler);
238#      }
239
240    $self->saveas_dspace_metadata($doc_obj,$working_dir,
241                                  "dublin_core.xml","dublin_core","dc");
242
243    my $metadata_prefix_list = $self->{'metadata_prefix'};
244#    print STDERR "***!! md prefix = $metadata_prefix_list\n";
245
246    my @metadata_prefixes = split(/,\s*/,$metadata_prefix_list);
247    foreach my $ep (@metadata_prefixes) {
248      $self->saveas_dspace_metadata($doc_obj,$working_dir,
249                                    "metadata_$ep.xml","dublin_core",$ep);
250    }
251
252    $self->{'short_doc_file'} =  &FileUtils::filenameConcatenate ($doc_dir, "dublin_core.xml");
253    $self->store_output_info_reference($doc_obj);
254}
255
256 sub process_assoc_files {
257    my $self = shift (@_);
258    my ($doc_obj, $doc_dir, $handle) = @_;
259
260    my $outhandler = $self->{'output_handle'};
261   
262    my $output_dir = $self->get_output_dir();
263    return if (!defined $output_dir);
264
265    my $working_dir = &FileUtils::filenameConcatenate($output_dir, $doc_dir);
266
267    my @assoc_files = ();
268    my $filename;;
269
270    my $source_filename = $doc_obj->get_source_filename();
271
272    my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
273
274    if (defined $collect_dir) {
275    my $dirsep_regexp = &util::get_os_dirsep();
276
277    if ($collect_dir !~ /$dirsep_regexp$/) {
278        $collect_dir .= &util::get_dirsep(); # ensure there is a slash at the end
279    }
280
281    # This test is never going to fail on Windows -- is this a problem?
282    if ($source_filename !~ /^$dirsep_regexp/) {
283        $source_filename = &FileUtils::filenameConcatenate($collect_dir, $source_filename);
284    }
285    }
286   
287    my ($tail_filename) = ($source_filename =~ m/([^\/\\]*)$/);
288   
289    print $handle "$tail_filename\n";
290   
291    $filename = &FileUtils::filenameConcatenate($working_dir, $tail_filename);
292    &FileUtils::hardLink ($source_filename, $filename, $self->{'verbosity'});
293             
294    # set the assocfile path (even if we have no assoc files - need this for lucene)
295    $doc_obj->set_utf8_metadata_element ($doc_obj->get_top_section(),
296                     "assocfilepath",
297                     "$doc_dir");
298    foreach my $assoc_file_rec (@{$doc_obj->get_assoc_files()}) {
299    my ($dir, $afile) = $assoc_file_rec->[1] =~ /^(.*?)([^\/\\]+)$/;
300    $dir = "" unless defined $dir;
301       
302   
303    my $real_filename = $assoc_file_rec->[0];
304    # for some reasons the image associate file has / before the full path
305    $real_filename =~ s/^\\(.*)/$1/i;
306    if (-e $real_filename) {
307        # escape backslashes and brackets in path for upcoming regex match
308        my $escaped_source_filename = &util::filename_to_regex($source_filename);
309        if ($real_filename =~ m/$escaped_source_filename$/) {
310        next;
311        }
312        else {
313        my $bundle = "bundle:ORIGINAL";
314       
315        if ($afile =~ m/^thumbnail\./) {
316            $bundle = "bundle:THUMBNAIL";
317        }
318
319        # Store the associated file to the "contents" file. Cover.pdf not needed.
320        if ($afile ne "cover.jpg") {
321            print $handle "$assoc_file_rec->[1]\t$bundle\n";
322        }
323        }
324   
325        $filename = &FileUtils::filenameConcatenate($working_dir, $afile);
326       
327        if ($afile ne "cover.jpg") {
328            &FileUtils::hardLink ($real_filename, $filename, $self->{'verbosity'});
329            $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(),
330                 "gsdlassocfile",
331                 "$afile:$assoc_file_rec->[2]:$dir");
332        }
333    } elsif ($self->{'verbosity'} > 2) {
334        print $outhandler "DSpacePlugout::process couldn't copy the associated file " .
335        "$real_filename to $afile\n";
336    }
337    }
338}
339                         
340
341sub get_new_doc_dir{
342   my $self = shift (@_); 
343   my($working_info,$working_dir,$OID) = @_;
344 
345   return $OID;
346
347}
Note: See TracBrowser for help on using the browser.