source: main/trunk/greenstone2/bin/script/export.pl@ 21580

Last change on this file since 21580 was 21580, checked in by mdewsnip, 14 years ago

Changed import.pl and export.pl to pass the infodbtype read from the collect.cfg file into the arcinfo object. Part of making the code less GDBM-specific.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 23.6 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# export.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2004 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will export a particular collection into a specific Format (e.g. METS or DSpace) by importing then saving as a different format.
30
31package export;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
40 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
41
42 if (defined $ENV{'GSDLEXTS'}) {
43 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
44 foreach my $e (@extensions) {
45 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
46
47 unshift (@INC, "$ext_prefix/perllib");
48 unshift (@INC, "$ext_prefix/perllib/cpan");
49 unshift (@INC, "$ext_prefix/perllib/plugins");
50 unshift (@INC, "$ext_prefix/perllib/plugouts");
51 }
52 }
53 if (defined $ENV{'GSDL3EXTS'}) {
54 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
55 foreach my $e (@extensions) {
56 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
57
58 unshift (@INC, "$ext_prefix/perllib");
59 unshift (@INC, "$ext_prefix/perllib/cpan");
60 unshift (@INC, "$ext_prefix/perllib/plugins");
61 unshift (@INC, "$ext_prefix/perllib/plugouts");
62 }
63 }
64
65}
66
67use strict;
68no strict 'refs'; # allow filehandles to be variables and vice versa
69no strict 'subs'; # allow barewords (eg STDERR) as function arguments
70
71use arcinfo;
72use colcfg;
73use plugin;
74use plugout;
75use manifest;
76use inexport;
77use util;
78use scriptutil;
79use FileHandle;
80use gsprintf 'gsprintf';
81use printusage;
82use parse2;
83
84
85my $oidtype_list =
86 [ { 'name' => "hash",
87 'desc' => "{import.OIDtype.hash}" },
88 { 'name' => "assigned",
89 'desc' => "{import.OIDtype.assigned}" },
90 { 'name' => "incremental",
91 'desc' => "{import.OIDtype.incremental}" },
92 { 'name' => "dirname",
93 'desc' => "{import.OIDtype.dirname}" } ];
94
95# what format to export as
96my $saveas_list =
97 [ { 'name' => "GreenstoneMETS",
98 'desc' => "{export.saveas.GreenstoneMETS}"},
99 { 'name' => "FedoraMETS",
100 'desc' => "{export.saveas.FedoraMETS}"},
101 { 'name' => "MARCXML",
102 'desc' => "{export.saveas.MARCXML}"},
103 { 'name' => "DSpace",
104 'desc' => "{export.saveas.DSpace}" }
105 ];
106
107
108# Possible attributes for each argument
109# name: The name of the argument
110# desc: A description (or more likely a reference to a description) for this argument
111# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
112# reqd: Is this argument required?
113# hiddengli: Is this argument hidden in GLI?
114# modegli: The lowest detail mode this argument is visible at in GLI
115
116my $saveas_argument =
117 { 'name' => "saveas",
118 'desc' => "{export.saveas}",
119 'type' => "enum",
120 'list' => $saveas_list,
121 'deft' => "GreenstoneMETS",
122 'reqd' => "no",
123 'modegli' => "3" };
124
125
126my $arguments =
127 [
128 $saveas_argument,
129 { 'name' => "exportdir",
130 'desc' => "{export.exportdir}",
131 'type' => "string",
132 'reqd' => "no",
133 'hiddengli' => "yes" },
134 { 'name' => "importdir",
135 'desc' => "{import.importdir}",
136 'type' => "string",
137 'reqd' => "no",
138 'hiddengli' => "yes" },
139 { 'name' => "collectdir",
140 'desc' => "{export.collectdir}",
141 'type' => "string",
142 # parsearg left "" as default
143 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
144 'deft' => "",
145 'reqd' => "no",
146 'hiddengli' => "yes" },
147 { 'name' => "site",
148 'desc' => "{import.site}",
149 'type' => "string",
150 'deft' => "",
151 'reqd' => "no",
152 'hiddengli' => "yes" },
153 { 'name' => "manifest",
154 'desc' => "{import.manifest}",
155 'type' => "string",
156 'deft' => "",
157 'reqd' => "no",
158 'hiddengli' => "yes" },
159 { 'name' => "debug",
160 'desc' => "{export.debug}",
161 'type' => "flag",
162 'reqd' => "no",
163 'hiddengli' => "yes" },
164 { 'name' => "faillog",
165 'desc' => "{export.faillog}",
166 'type' => "string",
167 'deft' => "",
168 'reqd' => "no",
169 'modegli' => "3" },
170 # does this make sense?
171 { 'name' => "incremental",
172 'desc' => "{import.incremental}",
173 'type' => "flag",
174 'hiddengli' => "yes" },
175 { 'name' => "keepold",
176 'desc' => "{export.keepold}",
177 'type' => "flag",
178 'reqd' => "no",
179 'hiddengli' => "yes" },
180 { 'name' => "removeold",
181 'desc' => "{export.removeold}",
182 'type' => "flag",
183 'reqd' => "no",
184 'hiddengli' => "yes" },
185 { 'name' => "language",
186 'desc' => "{scripts.language}",
187 'type' => "string",
188 'reqd' => "no",
189 'hiddengli' => "yes" },
190 { 'name' => "maxdocs",
191 'desc' => "{export.maxdocs}",
192 'type' => "int",
193 'reqd' => "no",
194 'range' => "1,",
195 'modegli' => "1" },
196 { 'name' => "OIDtype",
197 'desc' => "{import.OIDtype}",
198 'type' => "enum",
199 'list' => $oidtype_list,
200 # parsearg left "" as default
201 #'deft' => "hash",
202 'reqd' => "no",
203 'modegli' => "2" },
204 { 'name' => "OIDmetadata",
205 'desc' => "{import.OIDmetadata}",
206 'type' => "string",
207 #'type' => "metadata", #doesn't work properly in GLI
208 'deft' => "dc.Identifier",
209 'reqd' => "no",
210 'modegli' => "2" },
211 { 'name' => "out",
212 'desc' => "{export.out}",
213 'type' => "string",
214 'deft' => "STDERR",
215 'reqd' => "no",
216 'hiddengli' => "yes" },
217 { 'name' => "statsfile",
218 'desc' => "{export.statsfile}",
219 'type' => "string",
220 'deft' => "STDERR",
221 'reqd' => "no",
222 'hiddengli' => "yes" },
223 { 'name' => "xsltfile",
224 'desc' => "{BasPlugout.xslt_file}",
225 'type' => "string",
226 'reqd' => "no",
227 'hiddengli' => "yes" },
228 { 'name' => "xslt_txt",
229 'desc' => "{METSPlugout.xslt_txt}",
230 'type' => "string",
231 'reqd' => "no",
232 'hiddengli' => "no" },
233 { 'name' => "xslt_mets",
234 'desc' => "{METSPlugout.xslt_mets}",
235 'type' => "string",
236 'reqd' => "no",
237 'hiddengli' => "no" },
238 { 'name' => "fedora_namespace",
239 'desc' => "{FedoraMETSPlugout.fedora_namespace} (-saveas FedoraMETS)",
240 'type' => "string",
241 'deft' => "greenstone",
242 'reqd' => "no",
243 'hiddengli' => "no" },
244 { 'name' => "mapping_file",
245 'desc' => "{MARCXMLPlugout.mapping_file} (-saveas MARCXML)",
246 'type' => "string",
247 'reqd' => "no",
248 'hiddengli' => "no" },
249 { 'name' => "group_marc",
250 'desc' => "{MARCXMLPlugout.group} (-saveas MARCXML)",
251 'type' => "flag",
252 'reqd' => "no",
253 'hiddengli' => "no" },
254 { 'name' => "verbosity",
255 'desc' => "{export.verbosity}",
256 'type' => "int",
257 'range' => "0,3",
258 'deft' => "2",
259 'reqd' => "no",
260 'modegli' => "3" },
261 { 'name' => "gli",
262 'desc' => "{scripts.gli}",
263 'type' => "flag",
264 'reqd' => "no",
265 'hiddengli' => "yes" },
266 { 'name' => "listall",
267 'desc' => "{export.listall}",
268 'type' => "flag",
269 'reqd' => "no" },
270 { 'name' => "xml",
271 'desc' => "{scripts.xml}",
272 'type' => "flag",
273 'reqd' => "no",
274 'hiddengli' => "yes" }
275 ];
276
277my $options = { 'name' => "export.pl",
278 'desc' => "{export.desc}",
279 'args' => $arguments };
280
281my $listall_options = { 'name' => "export.pl",
282 'desc' => "{export.desc}",
283 'args' => [ $saveas_argument ] };
284
285
286&main();
287
288sub main {
289 # params
290 my ($language, $verbosity, $debug,
291 $collectdir, $importdir, $exportdir, $site, $manifest,
292 $incremental, $incremental_mode, $keepold, $removeold,
293 $saveas,
294 $OIDtype, $OIDmetadata,
295 $maxdocs, $statsfile,
296 $gzip,
297 $out, $faillog, $gli, $listall,
298 # plugout specific ones
299 $mapping_file, $xsltfile,
300 $xslt_mets, $xslt_txt, $fedora_namespace, $group_marc);
301
302 my $xml = 0;
303
304 # other vars
305 my ($configfilename, $collection, $collectcfg,
306 $expinfo_doc_filename, $expinfo_src_filename, $export_info,
307 $gs_mode,
308 $processor, $pluginfo);
309
310 my $service = "export";
311
312 my $hashParsingResult = {};
313 # general options available to all plugins
314 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
315
316 # If parse returns -1 then something has gone wrong
317 if ($intArgLeftinAfterParsing == -1)
318 {
319 &PrintUsage::print_txt_usage($options, "{export.params}");
320 die "\n";
321 }
322
323 foreach my $strVariable (keys %$hashParsingResult)
324 {
325 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
326 }
327
328
329 # If $language has been specified, load the appropriate resource bundle
330 # (Otherwise, the default resource bundle will be loaded automatically)
331 if ($language && $language =~ /\S/) {
332 &gsprintf::load_language_specific_resource_bundle($language);
333 }
334
335 if ($listall) {
336 if ($xml) {
337 &PrintUsage::print_xml_usage($listall_options);
338 }
339 else
340 {
341 &PrintUsage::print_txt_usage($listall_options,"{export.params}");
342 }
343 die "\n";
344 }
345
346 if ($xml) {
347 &PrintUsage::print_xml_usage($options);
348 die "\n";
349 }
350
351 if ($gli) { # the gli wants strings to be in UTF-8
352 &gsprintf::output_strings_in_UTF8;
353 }
354
355 # now check that we had exactly one leftover arg, which should be
356 # the collection name. We don't want to do this earlier, cos
357 # -xml arg doesn't need a collection name
358 # Or if the user specified -h, then we output the usage also
359 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
360 {
361 &PrintUsage::print_txt_usage($options, "{export.params}");
362 die "\n";
363 }
364
365 my $close_out = 0;
366 if ($out !~ /^(STDERR|STDOUT)$/i) {
367 open (OUT, ">$out") ||
368 (&gsprintf(STDERR, "{common.cannot_open_output_file}\n", $out) && die);
369 $out = 'export::OUT';
370 $close_out = 1;
371 }
372 $out->autoflush(1);
373
374 # get and check the collection name
375 if (($collection = &colcfg::use_collection($site, @ARGV, $collectdir)) eq "") {
376 &PrintUsage::print_txt_usage($options, "{export.params}");
377 die "\n";
378 }
379 # add collection's perllib dir into include path in
380 # case we have collection specific modules
381 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
382
383 # check that we can open the faillog
384 if ($faillog eq "") {
385 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
386 }
387 open (FAILLOG, ">$faillog") ||
388 (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die);
389 my $faillogname = $faillog;
390 $faillog = 'export::FAILLOG';
391 $faillog->autoflush(1);
392
393 # Read in the collection configuration file.
394 ($configfilename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
395 $collectcfg = &colcfg::read_collection_cfg ($configfilename, $gs_mode);
396
397 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
398 $importdir = $collectcfg->{'importdir'};
399 }
400 if (defined $collectcfg->{'exportdir'} && $exportdir eq "") {
401 $exportdir = $collectcfg->{'exportdir'};
402 }
403
404 # fill in the default import and export directories if none
405 # were supplied, turn all \ into / and remove trailing /
406 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq "";
407 $importdir =~ s/[\\\/]+/\//g;
408 $importdir =~ s/\/$//;
409 if (!-e $importdir) {
410 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir);
411 die "\n";
412 }
413
414 $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq "";
415 $exportdir =~ s/[\\\/]+/\//g;
416 $exportdir =~ s/\/$//;
417
418 my $plugins = [];
419 if (defined $collectcfg->{'plugin'}) {
420 $plugins = $collectcfg->{'plugin'};
421 }
422 # some global options for the plugins
423 my @global_opts = ();
424
425 if ($verbosity !~ /\d+/) {
426 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
427 $verbosity = $collectcfg->{'verbosity'};
428 } else {
429 $verbosity = 2; # the default
430 }
431 }
432
433 if (defined $collectcfg->{'manifest'} && $manifest eq "") {
434 $manifest = $collectcfg->{'manifest'};
435 }
436 if (defined $collectcfg->{'gzip'} && !$gzip) {
437 if ($collectcfg->{'gzip'} =~ /^true$/i) {
438 $gzip = 1;
439 }
440 }
441 if ($maxdocs !~ /\-?\d+/) {
442 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
443 $maxdocs = $collectcfg->{'maxdocs'};
444 } else {
445 $maxdocs = -1; # the default
446 }
447 }
448
449 # groupsize is in import - does it make sense here??
450
451 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental|assigned|dirname)$/)) {
452 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental|assigned|dirname)$/) {
453 $OIDtype = $collectcfg->{'OIDtype'};
454 } else {
455 $OIDtype = "hash"; # the default
456 }
457 }
458
459 if ((!defined $OIDmetadata) || ($OIDmetadata eq "")) {
460 if (defined $collectcfg->{'OIDmetadata'}) {
461 $OIDmetadata = $collectcfg->{'OIDmetadata'};
462 } else {
463 $OIDmetadata = "dc.Identifier"; # the default
464 }
465 }
466
467 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
468 $debug = 1;
469 }
470 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) {
471 $gli = 1;
472 }
473 $gli = 0 unless defined $gli;
474
475 # check keepold and removeold
476 ($removeold, $keepold, $incremental, $incremental_mode)
477 = &scriptutil::check_removeold_and_keepold($removeold, $keepold,
478 $incremental, "export",
479 $collectcfg);
480
481 print STDERR "<export>\n" if $gli;
482
483 my $manifest_lookup = new manifest();
484 if ($manifest ne "") {
485 my $manifest_filename = $manifest;
486
487 if ($manifest_filename !~ m/^[\\\/]/) {
488 $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename);
489 }
490
491 $manifest =~ s/[\\\/]+/\//g;
492 $manifest =~ s/\/$//;
493
494 $manifest_lookup->parse($manifest_filename);
495 }
496
497 # load all the plugins
498 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts, $incremental_mode);
499
500 if (scalar(@$pluginfo) == 0) {
501 &gsprintf($out, "{import.no_plugins_loaded}\n");
502 die "\n";
503 }
504
505 # remove the old contents of the export directory if needed
506 if ($removeold) {
507 if (-e $exportdir) {
508 &gsprintf($out, "{export.removing_export}\n");
509 &util::rm_r ($exportdir);
510 }
511 my $tmpdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "tmp");
512 $tmpdir =~ s/[\\\/]+/\//g;
513 $tmpdir =~ s/\/$//;
514 if (-e $tmpdir) {
515 &gsprintf($out, "{import.removing_tmpdir}\n");
516 &util::rm_r ($tmpdir);
517 }
518 }
519
520 # create the export dir if needed
521 &util::mk_all_dir($exportdir);
522
523 # read the export information file
524
525 # the plugouts should be doing this!!
526## $expinfo_doc_filename = &util::filename_cat ($exportdir, "export.inf");
527 $expinfo_doc_filename = &util::filename_cat ($exportdir,"archiveinf-doc" );
528 &util::rename_gdbm_file($expinfo_doc_filename); # ensures gdb in case we have an existing legacy ldb one - can this happen?
529 $expinfo_doc_filename .= ".gdb";
530
531 $expinfo_src_filename = &util::filename_cat ($exportdir,"archiveinf-src" );
532 &util::rename_gdbm_file($expinfo_src_filename); # ensures gdb in case we have an existing legacy ldb one - can this happen?
533 $expinfo_src_filename .= ".gdb";
534
535
536 $export_info = new arcinfo($collectcfg->{'infodbtype'});
537 $export_info -> load_info ($expinfo_doc_filename);
538
539 if ($manifest eq "") {
540 # Load in list of files in export folder from last export (if present)
541 $export_info->load_prev_import_filelist ($expinfo_src_filename);
542 }
543
544 my ($plugout);
545 if (defined $collectcfg->{'plugout'} && $collectcfg->{'plugout'} =~ /^(.*METS|DSpace|MARCXML)Plugout/) {
546 $plugout = $collectcfg->{'plugout'};
547 }
548 else{
549 if ($saveas !~ /^(GreenstoneMETS|FedoraMETS|DSpace|MARCXML)$/) {
550 push @$plugout,"GreenstoneMETSPlugout";
551 }
552 else{
553 push @$plugout,$saveas."Plugout";
554 }
555 }
556
557 my $plugout_name = $plugout->[0];
558
559 push @$plugout,("-output_info",$export_info) if (defined $export_info);
560 push @$plugout,("-verbosity",$verbosity) if (defined $verbosity);
561 push @$plugout,("-debug") if ($debug);
562 push @$plugout,("-gzip_output") if ($gzip);
563 push @$plugout,("-output_handle",$out) if (defined $out);
564 push @$plugout,("-xslt_file",$xsltfile) if (defined $xsltfile && $xsltfile ne "");
565 push @$plugout,("-group") if ($group_marc && $plugout_name =~ m/^MARCXMLPlugout$/);
566 push @$plugout,("-mapping_file",$mapping_file) if (defined $mapping_file && $mapping_file ne "" && $plugout_name =~ m/^MARCXMLPlugout$/);
567 push @$plugout,("-xslt_mets",$xslt_mets) if (defined $xslt_mets && $xslt_mets ne "" && $plugout_name =~ m/^.*METSPlugout$/);
568 push @$plugout,("-xslt_txt",$xslt_txt) if (defined $xslt_txt && $xslt_txt ne "" && $plugout_name =~ m/^.*METSPlugout$/);
569 push @$plugout,("-fedora_namespace",$fedora_namespace) if (defined $fedora_namespace && $fedora_namespace ne "" && $plugout_name eq "FedoraMETSPlugout");
570
571 $processor = &plugout::load_plugout($plugout);
572 $processor->setoutputdir ($exportdir);
573
574 $processor->set_OIDtype ($OIDtype, $OIDmetadata);
575
576 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli);
577
578 if ($manifest eq "") {
579 # process the import directory
580 my $block_hash = {};
581 my $metadata = {};
582 # gobal blocking pass may set up some metadata
583 &plugin::file_block_read($pluginfo, $importdir, "", $block_hash, $metadata, $gli);
584 #&plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
585 ### section below copied from import.pl
586 if ($incremental) {
587 # equivalent to saying ($keepold && ($incremental_mode eq "all"))
588
589 &inexport::prime_doc_oid_count($exportdir);
590
591
592 # Can now work out which files were new, already existed, and have
593 # been deleted
594
595 &inexport::new_vs_old_import_diff($export_info,$block_hash,$importdir,
596 $exportdir,$verbosity,$incremental_mode);
597
598 my @deleted_files = sort keys %{$block_hash->{'deleted_files'}};
599 # Filter out any in gsdl/tmp area
600 my @filtered_deleted_files = ();
601 my $gsdl_tmp_area = &util::filename_cat($ENV{'GSDLHOME'}, "tmp");
602 my $collect_tmp_area = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "tmp");
603 $gsdl_tmp_area = &util::filename_to_regex($gsdl_tmp_area);
604 $collect_tmp_area = &util::filename_to_regex($collect_tmp_area);
605
606
607 foreach my $df (@deleted_files) {
608 next if ($df =~ m/^$gsdl_tmp_area/);
609 next if ($df =~ m/^$collect_tmp_area/);
610
611 push(@filtered_deleted_files,$df);
612 }
613
614
615 @deleted_files = @filtered_deleted_files;
616
617 if (scalar(@deleted_files>0)) {
618 print STDERR "Files deleted since last import:\n ";
619 print STDERR join("\n ",@deleted_files), "\n";
620 }
621
622 my @new_files = sort keys %{$block_hash->{'new_files'}};
623 if (scalar(@new_files>0)) {
624 print STDERR "New files since last import:\n ";
625 print STDERR join("\n ",@new_files), "\n";
626 }
627
628 &inexport::mark_docs_for_deletion($export_info,$block_hash,\@deleted_files,
629 $exportdir,$verbosity);
630
631 &inexport::mark_docs_for_reindex($export_info,$block_hash,
632 $exportdir,$verbosity);
633
634 my @reindex_files = sort keys %{$block_hash->{'reindex_files'}};
635
636 if (scalar(@reindex_files>0)) {
637 print STDERR "Files to reindex since last import:\n ";
638 print STDERR join("\n ",@reindex_files), "\n";
639 }
640
641
642 # not sure if the following will work -- will the metadata data-structure be correctly initialized
643 # in the right order?
644# foreach my $file (@new_files, @reindex_files) {
645# &plugin::read ($pluginfo, $importdir, $file, $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
646# }
647
648
649 # Play it safe, and run through the entire folder, only processing new or edited files
650 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
651
652 }
653 else {
654 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli);
655 }
656
657 ### end copy
658 }
659 else {
660 # process any files marked for exporting
661 foreach my $file (keys %{$manifest_lookup->{'index'}}) {
662 &plugin::read ($pluginfo, $importdir, $file, {}, {}, $processor, $maxdocs, 0, $gli);
663 }
664
665 my @deleted_files = keys %{$manifest_lookup->{'delete'}};
666
667 &inexport::mark_docs_for_deletion($export_info,{},\@deleted_files,$exportdir);
668
669 }
670
671 if ($saveas eq "FedoraMETS") {
672 # create collection "doc obj" for Fedora that contains
673 # collection-level metadata
674
675 my $doc_obj = new doc($configfilename,"nonindexed_doc","none");
676 $doc_obj->set_OID("collection");
677
678 my $col_name = undef;
679 my $col_meta = $collectcfg->{'collectionmeta'};
680
681 if (defined $col_meta) {
682
683 store_collectionmeta($col_meta,"collectionname",$doc_obj); # in GS3 this is a collection's name
684 store_collectionmeta($col_meta,"collectionextra",$doc_obj); # in GS3 this is a collection's description
685
686 }
687 $processor->process($doc_obj);
688 }
689
690 &plugin::end($pluginfo, $processor);
691
692 &plugin::deinit($pluginfo, $processor);
693
694 # Store the value of OIDCount (used in doc.pm) so it can be
695 # restored correctly to this value on an incremental build
696 &inexport::store_doc_oid_count($exportdir);
697
698 # write out the export information file
699 #$processor->close_file_output() if $groupsize > 1;
700 $processor->close_group_output() if $processor->is_group();
701
702# if (($saveas =~ m/^.*METS$/) || ($saveas eq "MARCXML")) {
703# # Not all export types need this,
704
705## $export_info->save_info($expinfo_doc_filename);
706# }
707
708
709 # for backwards compatability with archvies.inf file
710 if ($expinfo_doc_filename =~ m/(contents)|(\.inf)$/) {
711 $export_info->save_info($expinfo_doc_filename);
712 }
713 else {
714 $export_info->save_revinfo_db($expinfo_src_filename);
715 }
716
717
718 # write out export stats
719 my $close_stats = 0;
720 if ($statsfile !~ /^(STDERR|STDOUT)$/i) {
721 if (open (STATS, ">$statsfile")) {
722 $statsfile = 'import::STATS';
723 $close_stats = 1;
724 } else {
725 &gsprintf($out, "{import.cannot_open_stats_file}", $statsfile);
726 &gsprintf($out, "{import.stats_backup}\n");
727 $statsfile = 'STDERR';
728 }
729 }
730
731 &gsprintf($out, "\n");
732 &gsprintf($out, "*********************************************\n");
733 &gsprintf($out, "{export.complete}\n");
734 &gsprintf($out, "*********************************************\n");
735
736 &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli);
737 if ($close_stats) {
738 close STATS;
739 }
740
741 close OUT if $close_out;
742 close FAILLOG;
743}
744
745
746sub store_collectionmeta
747{
748 my ($collectionmeta,$field,$doc_obj) = @_;
749
750 my $section = $doc_obj->get_top_section();
751
752 my $field_hash = $collectionmeta->{$field};
753
754 foreach my $k (keys %$field_hash)
755 {
756 my $val = $field_hash->{$k};
757
758 ### print STDERR "*** $k = $field_hash->{$k}\n";
759
760 my $md_label = "ex.$field";
761
762
763 if ($k =~ m/^\[l=(.*?)\]$/)
764 {
765
766 my $md_suffix = $1;
767 $md_label .= "^$md_suffix";
768 }
769
770
771 $doc_obj->add_utf8_metadata($section,$md_label, $val);
772
773 # see collConfigxml.pm: GS2's "collectionextra" is called "description" in GS3,
774 # while "collectionname" in GS2 is called "name" in GS3.
775 # Variable $nameMap variable in collConfigxml.pm maps between GS2 and GS3
776 if (($md_label eq "ex.collectionname^en") || ($md_label eq "ex.collectionname"))
777 {
778 $doc_obj->add_utf8_metadata($section,"dc.Title", $val);
779 }
780
781 }
782}
783
784
785
786
Note: See TracBrowser for help on using the repository browser.