Changeset 26932
- Timestamp:
- 2013-02-26T09:52:47+13:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/perllib/inexport.pm
r25401 r26932 234 234 # fill in the default import and archives directories if none 235 235 # were supplied, turn all \ into / and remove trailing / 236 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 237 $importdir =~ s/[\\\/]+/\//g; 238 $importdir =~ s/\/$//; 239 if (!-e $importdir) { 236 if ($importdir eq "") 237 { 238 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import"); 239 } 240 else 241 { 242 # hijack filename_cat to sanitize the user provided importdir [jmt12] 243 $importdir = &util::filename_cat ($importdir); 244 } 245 if (!&util::dir_exists($importdir)) { 240 246 &gsprintf($out, "{import.no_import_dir}\n\n", $importdir); 241 247 die "\n"; … … 256 262 } 257 263 } 258 259 $archivedir =~ s/[\\\/]+/\//g; 260 $archivedir =~ s/\/$//; 264 else 265 { 266 # use filename_cat() to sanitize the user provided archive directory as 267 # it is more aware of protocols etc 268 $archivedir = &util::filename_cat($archivedir); 269 } 261 270 $self->{'archivedir'} = $archivedir; 262 271 … … 273 282 $self->{'manifest'} = $collectcfg->{'manifest'}; 274 283 } 284 # Default value 285 $self->{'manifest_version'} = 0; 275 286 276 287 if (defined $collectcfg->{'gzip'} && !$self->{'gzip'}) { … … 427 438 428 439 $manifest_lookup->parse($manifest_filename); 440 441 # Manifests may now include a version number 442 $self->{'manifest_version'} = $manifest_lookup->get_version(); 429 443 } 430 444 … … 455 469 456 470 if ($removeold) { 457 if ( -e $archivedir) {471 if (&util::dir_exists($archivedir)) { 458 472 &gsprintf($out, "{import.removing_archives}\n"); 459 473 &util::rm_r ($archivedir); … … 462 476 $tmpdir =~ s/[\\\/]+/\//g; 463 477 $tmpdir =~ s/\/$//; 464 if ( -e $tmpdir) {465 #&gsprintf($out, "{import.removing_tmpdir}\n");466 #&util::rm_r ($tmpdir);478 if (&util::dir_exists($tmpdir)) { 479 &gsprintf($out, "{import.removing_tmpdir}\n"); 480 &util::rm_r ($tmpdir); 467 481 } 468 482 } … … 481 495 # and attach themselves as a listener (even though they don't do anything) 482 496 # This is done so that, in parallel importing, the server will persist 483 # until the top level import.pl (which will be the first th is that calls484 # thisfunction) completes. [jmt12]497 # until the top level import.pl (which will be the first that calls this 498 # function) completes. [jmt12] 485 499 my $create_server = 0; 486 500 # - infodb's of type *server need to be started on the same machine that … … 562 576 } 563 577 564 my $processor = &plugout::load_plugout($plugout); 578 my $processor = &plugout::load_plugout($plugout); 565 579 $processor->setoutputdir ($archivedir); 566 580 $processor->set_sortmeta ($sortmeta, $removeprefix, $removesuffix) if defined $sortmeta; … … 714 728 715 729 # If we are not using complex inherited metadata (and thus have skipped 716 # the global file scan) we need to at least scan the directory of the 717 # files being indexed/reindexed. [jmt12] 718 if (!defined $collectcfg->{'complexmeta'} || $collectcfg->{'complexmeta'} ne 'true') 730 # the global file scan) we need to at least check for a matching 731 # metadata.xml for the files being indexed/reindexed. [jmt12] 732 # - unless we are using the newer version of Manifests, which are treated 733 # verbatim, and should have a metadata element for metadata files (so 734 # we can explicitly process metadata files other than metadata.xml) 735 if ($self->{'manifest_version'} < 1 && (!defined $collectcfg->{'complexmeta'} || $collectcfg->{'complexmeta'} ne 'true')) 719 736 { 720 737 my @all_files_to_import = (keys %{$block_hash->{'reindex_files'}}, keys %{$block_hash->{'new_files'}}); 721 738 foreach my $file_to_import (@all_files_to_import) 722 739 { 723 my $dir_to_import = $file_to_import; 724 $dir_to_import =~ s/[^\\\/]*$//; 725 # - one day we may need to manually scan this directory for child 726 # directories and somehow explicitly block them from being 727 # recursed. 728 if (-d $dir_to_import) 740 my $metadata_xml_path = $file_to_import; 741 $metadata_xml_path =~ s/[^\\\/]*$/metadata.xml/; 742 if (&util::file_exists($metadata_xml_path)) 729 743 { 730 &plugin::file_block_read($pluginfo, $dir_to_import, '', $block_hash, $metadata, $gli);744 &plugin::file_block_read($pluginfo, '', $metadata_xml_path, $block_hash, $metadata, $gli); 731 745 } 746 } 747 } 748 749 if ($self->{'manifest_version'} > 0) 750 { 751 # Process metadata files (?) 752 # Process files 753 foreach my $file_to_import (keys %{$block_hash->{'reindex_files'}}, keys %{$block_hash->{'new_files'}}) 754 { 755 &plugin::read ($pluginfo, '', $file_to_import, $block_hash, $metadata, $processor, $maxdocs, 0, $gli); 732 756 } 733 757 } … … 823 847 $self->{'collection'}, $self->{'site'}); 824 848 } 825 else 849 # only do this if we aren't using the newer paradigm for manifest files 850 elsif ($self->{'manifest_version'} < 1) 826 851 { 827 852 &plugin::read ($pluginfo, $importdir, "", $block_hash, $metadata, $processor, $maxdocs, 0, $gli); … … 852 877 # Store the value of OIDCount (used in doc.pm) so it can be 853 878 # restored correctly to this value on an incremental build 854 store_doc_oid_count($archivedir); 879 # - this OIDcount file should only be generated for numerical oids [jmt12] 880 if ($self->{'OIDtype'} eq 'incremental') 881 { 882 store_doc_oid_count($archivedir); 883 } 855 884 856 885 # write out the archive information file … … 992 1021 993 1022 994 if (open(OIDOUT, ">$oid_count_filename")) {1023 if (open(OIDOUT,&util::file_openfdcommand($oid_count_filename, '>'))) { 995 1024 print OIDOUT $doc::OIDcount, "\n"; 996 1025
Note:
See TracChangeset
for help on using the changeset viewer.