Changeset 12003
- Timestamp:
- 2006-07-04T15:36:20+12:00 (18 years ago)
- Location:
- trunk/gsdl/bin/script
- Files:
-
- 3 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/build
r2892 r12003 155 155 print STDOUT " -optionfile file Get options from file, useful on systems where\n"; 156 156 print STDOUT " long command lines may cause problems\n"; 157 print STDOUT " -indextype mg|mgpp|lucene \n"; 158 print STDERR " Specify the type of indexer used in this collection\n"; 159 print STDERR " If -append is used then -indextype is needed to \n"; 160 print STDERR " determine how to run buildcol.pl as well as update\n"; 161 print STDERR " 'building' and 'index' according.\n"; 157 162 print STDOUT " -append Add new files to existing collection\n"; 163 print STDOUT " -manifest Use manifest.xml file to determine which files to process.\n"; 158 164 print STDOUT " -remove_archives Remove archives directory after successfully\n"; 159 165 print STDOUT " building the collection.\n"; … … 386 392 my $import_cmd = "perl -S import.pl"; 387 393 $import_cmd .= " -out \"$outfile.import\"" if $use_out; 388 $import_cmd .= " -removeold" unless $append; 394 if ($append) { 395 $import_cmd .= " -keepold"; 396 } else { 397 $import_cmd .= " -removeold"; 398 } 399 400 $import_cmd .= " -manifest manifest.xml" if ($manifest); 389 401 $import_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/; 390 402 $import_cmd .= " -statsfile \"$statsfile\"" if $statsfile =~ /\w/; … … 415 427 416 428 my $build_cmd = "perl -S buildcol.pl"; 429 430 my $removeold = 1; 431 if ($append) { 432 if ($indextype eq "lucene") { 433 $build_cmd .= " -keepold"; 434 $removeold = 0; 435 } 436 else { 437 $build_cmd .= " -removeold"; 438 } 439 } 440 else { 441 $build_cmd .= " -removeold"; 442 } 443 417 444 $build_cmd .= " -out \"$outfile.build\"" if $use_out; 418 445 $build_cmd .= " -collectdir \"$collectdir\"" if $collectdir =~ /\w/; … … 437 464 } 438 465 439 # replace old indexes with new ones 440 if (&has_content ($indexdir)) { 441 print $out "removing old indexes\n"; 442 &util::rm_r ($indexdir); 443 } 444 rmdir ($indexdir) if -d $indexdir; 445 &File::Copy::move ($buildingdir, $indexdir); 466 if ($removeold) { 467 # replace old indexes with new ones 468 if (&has_content ($indexdir)) { 469 print $out "removing old indexes\n"; 470 &util::rm_r ($indexdir); 471 } 472 rmdir ($indexdir) if -d $indexdir; 473 &File::Copy::move ($buildingdir, $indexdir); 474 } 475 else { 476 # Do nothing. Assume index is symbolic link to building 477 } 446 478 447 479 # remove the cached arhives … … 560 592 if (!parsargv::parse($argref, 561 593 'optionfile/.*/', \$optionfile, 594 'indextype/^(mg|mgpp|lucene)$/mg', \$indextype, 562 595 'append', \$append, 596 'manifest', \$manifest, 563 597 'remove_archives', \$remove_archives, 564 598 'remove_import', \$remove_import, -
trunk/gsdl/bin/script/import.pl
r11746 r12003 44 44 use plugin; 45 45 use docprint; 46 use manifest; 46 47 use util; 47 48 use scriptutil; … … 50 51 use printusage; 51 52 use parse2; 53 54 52 55 53 56 use strict; … … 92 95 # parsearg left "" as default 93 96 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"), 97 'deft' => "", 98 'reqd' => "no", 99 'hiddengli' => "yes" }, 100 { 'name' => "manifest", 101 'desc' => "{import.manifest}", 102 'type' => "string", 94 103 'deft' => "", 95 104 'reqd' => "no", … … 219 228 220 229 sub main { 221 my ($verbosity, $importdir, $archivedir, $ keepold,230 my ($verbosity, $importdir, $archivedir, $manifest, $keepold, 222 231 $removeold, $saveas, $version, 223 232 $gzip, $groupsize, $OIDtype, $debug, … … 331 340 $archivedir = $collectcfg->{'archivedir'}; 332 341 } 342 if (defined $collectcfg->{'manifest'} && $manifest eq "") { 343 $manifest = $collectcfg->{'manifest'}; 344 } 333 345 334 346 if (defined $collectcfg->{'gzip'} && !$gzip) { … … 403 415 $archivedir =~ s/\/$//; 404 416 417 my $manifest_lookup = new manifest(); 418 if ($manifest ne "") { 419 my $manifest_filename = $manifest; 420 421 if ($manifest_filename !~ m/^[\\\/]/) { 422 $manifest_filename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, $manifest_filename); 423 } 424 425 $manifest =~ s/[\\\/]+/\//g; 426 $manifest =~ s/\/$//; 427 428 $manifest_lookup->parse($manifest_filename); 429 } 430 431 405 432 # load all the plugins 406 433 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts); … … 445 472 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs, $gli); 446 473 447 # process the import directory 448 &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs, 0, $gli); 449 474 if ($manifest eq "") { 475 # process the import directory 476 &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs, 0, $gli); 477 } 478 else { 479 480 # process any new files 481 foreach my $file (keys %{$manifest_lookup->{'index'}}) { 482 &plugin::read ($pluginfo, $importdir, $file, {}, $processor, $maxdocs, 0, $gli); 483 } 484 485 # record files marked for deletion in arcinfo 486 foreach my $file (keys %{$manifest_lookup->{'delete'}}) { 487 # consider finding it? 488 # $archive_info->add_info($OID,$doc_xml_file,"D"); 489 } 490 } 491 450 492 &plugin::end($pluginfo, $processor); 451 493 -
trunk/gsdl/bin/script/lucene_passes.pl
r10165 r12003 127 127 } elsif ($mode eq "index") { 128 128 # notify lucene indexer 129 # print STDERR $doc_xml; 129 130 # SAX parser seems to be sensitive to blank lines 131 # => remove them 132 $doc_xml =~ s/\n+/\n/g; 133 134 # print STDERR $doc_xml; 135 130 136 ## print PIPEOUT "$output_filename\n"; 137 131 138 print PIPEOUT "$doc_xml"; 139 140 132 141 #save_xml_doc($full_textdir, "$output_filename.txt", $doc_xml); 133 142 }
Note:
See TracChangeset
for help on using the changeset viewer.