Changeset 9233
- Timestamp:
- 2005-03-01T15:32:42+13:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/export.pl
r8879 r9233 73 73 # modegli: The lowest detail mode this argument is visible at in GLI 74 74 75 my $arguments = 76 [ { 'name' => "exportdir", 77 'desc' => "{export.exportdir}", 78 'type' => "string", 79 'reqd' => "no", 80 'hiddengli' => "yes" }, 81 { 'name' => "collectdir", 82 'desc' => "{export.collectdir}", 83 'type' => "string", 84 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"), 85 'reqd' => "no", 86 'hiddengli' => "yes" }, 87 { 'name' => "debug", 88 'desc' => "{export.debug}", 89 'type' => "flag", 90 'reqd' => "no", 91 'hiddengli' => "yes" }, 92 { 'name' => "faillog", 93 'desc' => "{export.faillog}", 94 'type' => "string", 95 'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"), 96 'reqd' => "no", 97 'modegli' => "4" }, 98 { 'name' => "groupsize", 99 'desc' => "{import.groupsize}", 100 'type' => "int", 101 'deft' => "1", 102 'reqd' => "no", 103 'modegli' => "3" }, 104 { 'name' => "gzip", 105 'desc' => "{import.gzip}", 106 'type' => "flag", 107 'reqd' => "no", 108 'modegli' => "4" }, 109 { 'name' => "importdir", 110 'desc' => "{import.importdir}", 111 'type' => "string", 112 'reqd' => "no", 113 'hiddengli' => "yes" }, 114 { 'name' => "keepold", 115 'desc' => "{export.keepold}", 116 'type' => "flag", 117 'reqd' => "no", 118 'hiddengli' => "yes" }, 119 { 'name' => "language", 120 'desc' => "{scripts.language}", 121 'type' => "string", 122 'reqd' => "no", 123 'modegli' => "4" }, 124 { 'name' => "maxdocs", 125 'desc' => "{export.maxdocs}", 126 'type' => "int", 127 'reqd' => "no", 128 'range' => "1,", 129 'modegli' => "1" }, 130 { 'name' => "OIDtype", 131 'desc' => "{export.OIDtype}", 132 'type' => "enum", 133 'list' => $oidtype_list, 134 'deft' => "hash", 135 'reqd' => "no", 136 'modegli' => "3" }, 137 { 'name' => "out", 138 'desc' => "{export.out}", 139 'type' => "string", 140 'deft' => "STDERR", 141 'reqd' => "no", 142 'hiddengli' => "yes" }, 143 { 'name' => "removeold", 144 'desc' => "{export.removeold}", 145 'type' => "flag", 146 'reqd' => "no", 147 'modegli' => "3" }, 75 my $saveas_argument = 148 76 { 'name' => "saveas", 149 77 'desc' => "{export.saveas}", … … 152 80 'deft' => "METS", 153 81 'reqd' => "no", 154 'modegli' => "3" }, 155 { 'name' => "sortmeta", 156 'desc' => "{export.sortmeta}", 157 'type' => "metadata", 158 'reqd' => "no", 159 'modegli' => "2" }, 82 'modegli' => "3" }; 83 84 85 my $arguments = 86 [ { 'name' => "exportdir", 87 'desc' => "{export.exportdir}", 88 'type' => "string", 89 'reqd' => "no", 90 'hiddengli' => "yes" }, 91 { 'name' => "collectdir", 92 'desc' => "{export.collectdir}", 93 'type' => "string", 94 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"), 95 'reqd' => "no", 96 'hiddengli' => "yes" }, 97 { 'name' => "listall", 98 'desc' => "{scripts.listall}", 99 'type' => "flag", 100 'reqd' => "no" }, 101 { 'name' => "debug", 102 'desc' => "{export.debug}", 103 'type' => "flag", 104 'reqd' => "no", 105 'hiddengli' => "yes" }, 106 { 'name' => "faillog", 107 'desc' => "{export.faillog}", 108 'type' => "string", 109 'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"), 110 'reqd' => "no", 111 'modegli' => "4" }, 112 { 'name' => "importdir", 113 'desc' => "{import.importdir}", 114 'type' => "string", 115 'reqd' => "no", 116 'hiddengli' => "yes" }, 117 { 'name' => "keepold", 118 'desc' => "{export.keepold}", 119 'type' => "flag", 120 'reqd' => "no", 121 'hiddengli' => "yes" }, 122 { 'name' => "language", 123 'desc' => "{scripts.language}", 124 'type' => "string", 125 'reqd' => "no", 126 'modegli' => "4" }, 127 { 'name' => "maxdocs", 128 'desc' => "{export.maxdocs}", 129 'type' => "int", 130 'reqd' => "no", 131 'range' => "1,", 132 'modegli' => "1" }, 133 { 'name' => "out", 134 'desc' => "{export.out}", 135 'type' => "string", 136 'deft' => "STDERR", 137 'reqd' => "no", 138 'hiddengli' => "yes" }, 139 { 'name' => "removeold", 140 'desc' => "{export.removeold}", 141 'type' => "flag", 142 'reqd' => "no", 143 'modegli' => "3" }, 144 $saveas_argument, 160 145 # { 'name' => "statsfile", 161 146 # 'desc' => "{export.statsfile}", … … 176 161 'args' => $arguments }; 177 162 163 my $listall_options = { 'name' => "export.pl", 164 'desc' => "{export.desc}", 165 'args' => [ $saveas_argument ] }; 166 178 167 sub gsprintf 179 168 { … … 185 174 186 175 sub main { 187 my ($verbosity, $importdir, $archivedir, $keepold, 176 my ($verbosity, $importdir, $archivedir, $keepold, $listall, 188 177 $removeold, $saveas, $gzip, $groupsize, $OIDtype, $debug, 189 $maxdocs, $collection, $configfilename, $collectcfg,178 $maxdocs, $collection, $configfilename, 190 179 $pluginfo, $sortmeta, $export_info_filename, 191 180 $export_info, $processor, $out, $faillog, $collectdir, $gli); … … 202 191 'verbosity/\d+/', \$verbosity, 203 192 'importdir/.*/', \$importdir, 204 'exportdir/.*/', \$exportdir, 193 'exportdir/.*/', \$exportdir, 194 'listall', \$listall, 205 195 'keepold', \$keepold, 206 196 'removeold', \$removeold, 207 197 'saveas/^(DSpace|METS)$/METS', \$saveas, 208 'gzip', \$gzip,209 'groupsize/\d+/1', \$groupsize,210 'OIDtype/^(hash|incremental)$/', \$OIDtype,211 'sortmeta/.*/', \$sortmeta,212 198 'debug', \$debug, 213 199 'maxdocs/^\-?\d+/', \$maxdocs, … … 222 208 } 223 209 210 $gzip = undef; 211 $groupsize = 1; 212 $OIDtype = undef; 213 $sortmeta = undef; 214 224 215 # If $language has been specified, load the appropriate resource bundle 225 216 # (Otherwise, the default resource bundle will be loaded automatically) … … 228 219 } 229 220 230 if ($xml) { 221 if ($listall) { 222 if ($xml) { 223 &PrintUsage::print_xml_usage($listall_options); 224 } 225 else 226 { 227 &PrintUsage::print_txt_usage($listall_options,"{export.params}"); 228 } 229 die "\n"; 230 } 231 elsif ($xml) { 231 232 &PrintUsage::print_xml_usage($options); 232 233 die "\n"; … … 248 249 $removeold = 0 if ($keepold); 249 250 250 # get and check the collection name 251 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") { 252 &PrintUsage::print_txt_usage($options, "{export.params}"); 253 die "\n"; 254 } 255 256 if ($faillog eq "") { 257 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 258 } 259 open (FAILLOG, ">$faillog") || 260 (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die); 261 my $faillogname = $faillog; 262 $faillog = 'export::FAILLOG'; 263 $faillog->autoflush(1); 264 265 # check sortmeta 266 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/; 267 if (defined $sortmeta && $groupsize > 1) { 268 &gsprintf($out, "{export.cannot_sort}\n\n"); 269 $sortmeta = undef; 270 } 271 272 # dynamically load 'docsave' module so it can pick up on a collection 273 # specific docsave.pm is specified. 274 275 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 276 require docsave; 277 278 # get the list of plugins for this collection and set any options that 279 # were specified in the collect.cfg (all export.pl options except 280 # -collectdir, -out and -faillog may be specified in the collect.cfg (these 281 # options must be known before we read the collect.cfg)) 282 my $plugins = []; 283 my @global_opts = (); 284 285 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg"); 286 if (-e $configfilename) { 287 $collectcfg = &colcfg::read_collect_cfg ($configfilename); 288 if (defined $collectcfg->{'plugin'}) { 289 $plugins = $collectcfg->{'plugin'}; 290 } 291 292 if ($verbosity !~ /\d+/) { 293 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 294 $verbosity = $collectcfg->{'verbosity'}; 251 while (scalar(@ARGV)>0) { 252 my $collect_name = shift @ARGV; 253 254 $ENV{'GSDLCOLLECTION'} = $collect_name; 255 256 eval { 257 # get and check the collection name 258 if (($collection = &util::use_collection($collect_name, $collectdir)) eq "") { 259 &PrintUsage::print_txt_usage($options, "{export.params}"); 260 die "\n"; 261 } 262 263 if ($faillog eq "") { 264 $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log"); 265 } 266 open (FAILLOG, ">$faillog") || 267 (&gsprintf(STDERR, "{export.cannot_open_fail_log}\n", $faillog) && die); 268 my $faillogname = $faillog; 269 $faillog = 'export::FAILLOG'; 270 $faillog->autoflush(1); 271 272 # check sortmeta 273 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/; 274 if (defined $sortmeta && $groupsize > 1) { 275 &gsprintf($out, "{export.cannot_sort}\n\n"); 276 $sortmeta = undef; 277 } 278 279 # dynamically load 'docsave' module so it can pick up on a collection 280 # specific docsave.pm is specified. 281 282 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib"); 283 require docsave; 284 285 # get the list of plugins for this collection and set any options that 286 # were specified in the collect.cfg (all export.pl options except 287 # -collectdir, -out and -faillog may be specified in the collect.cfg (these 288 # options must be known before we read the collect.cfg)) 289 my $plugins = []; 290 my @global_opts = (); 291 292 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg"); 293 if (-e $configfilename) { 294 my $collectcfg = &colcfg::read_collect_cfg ($configfilename); 295 if (defined $collectcfg->{'plugin'}) { 296 $plugins = $collectcfg->{'plugin'}; 297 } 298 299 if ($verbosity !~ /\d+/) { 300 if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) { 301 $verbosity = $collectcfg->{'verbosity'}; 302 } else { 303 $verbosity = 2; # the default 304 } 305 } 306 if (defined $collectcfg->{'importdir'} && $importdir eq "") { 307 $importdir = $collectcfg->{'importdir'}; 308 } 309 if (defined $collectcfg->{'exportdir'} && $exportdir eq "") { 310 $exportdir = $collectcfg->{'exportdir'}; 311 } 312 if (defined $collectcfg->{'removeold'}) { 313 if ($collectcfg->{'removeold'} =~ /^true$/i && !$keepold) { 314 $removeold = 1; 315 } 316 if ($collectcfg->{'removeold'} =~ /^false$/i && !$removeold) { 317 $removeold = 0; 318 } 319 } 320 if (defined $collectcfg->{'keepold'}) { 321 if ($collectcfg->{'keepold'} =~ /^false$/i && !$keepold) { 322 $removeold = 1; 323 } 324 } 325 if (defined $collectcfg->{'gzip'} && !$gzip) { 326 if ($collectcfg->{'gzip'} =~ /^true$/i) { 327 $gzip = 1; 328 } 329 } 330 if ($maxdocs !~ /\-?\d+/) { 331 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 332 $maxdocs = $collectcfg->{'maxdocs'}; 333 } else { 334 $maxdocs = -1; # the default 335 } 336 } 337 if ($groupsize == 1) { 338 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) { 339 $groupsize = $collectcfg->{'groupsize'}; 340 } 341 } 342 if (!defined $OIDtype || ($OIDtype !~ /^(hash|incremental)$/)) { 343 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental)$/) { 344 $OIDtype = $collectcfg->{'OIDtype'}; 345 } else { 346 $OIDtype = "hash"; # the default 347 } 348 } 349 if (defined $collectcfg->{'sortmeta'} && $sortmeta eq "") { 350 $sortmeta = $collectcfg->{'sortmeta'}; 351 } 352 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 353 $debug = 1; 354 } 355 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 356 $gli = 1; 357 } 358 359 # global plugin stuff 360 if (defined $collectcfg->{'separate_cjk'}&& $collectcfg->{'separate_cjk'} =~ /^true$/i) { 361 push @global_opts, "-separate_cjk"; 362 } 295 363 } else { 296 $verbosity = 2; # the default 297 } 298 } 299 if (defined $collectcfg->{'importdir'} && $importdir eq "") { 300 $importdir = $collectcfg->{'importdir'}; 301 } 302 if (defined $collectcfg->{'exportdir'} && $exportdir eq "") { 303 $exportdir = $collectcfg->{'exportdir'}; 304 } 305 if (defined $collectcfg->{'removeold'}) { 306 if ($collectcfg->{'removeold'} =~ /^true$/i && !$keepold) { 307 $removeold = 1; 308 } 309 if ($collectcfg->{'removeold'} =~ /^false$/i && !$removeold) { 310 $removeold = 0; 311 } 312 } 313 if (defined $collectcfg->{'keepold'}) { 314 if ($collectcfg->{'keepold'} =~ /^false$/i && !$keepold) { 315 $removeold = 1; 316 } 317 } 318 if (defined $collectcfg->{'gzip'} && !$gzip) { 319 if ($collectcfg->{'gzip'} =~ /^true$/i) { 320 $gzip = 1; 321 } 322 } 323 if ($maxdocs !~ /\-?\d+/) { 324 if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) { 325 $maxdocs = $collectcfg->{'maxdocs'}; 364 (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die); 365 } 366 367 $gli = 0 unless defined $gli; 368 369 print STDERR "<export>\n" if $gli; 370 371 # fill in the default import and export directories if none 372 # were supplied, turn all \ into / and remove trailing / 373 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 374 $importdir =~ s/[\\\/]+/\//g; 375 $importdir =~ s/\/$//; 376 $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq ""; 377 $exportdir =~ s/[\\\/]+/\//g; 378 $exportdir =~ s/\/$//; 379 380 # load all the plugins 381 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts); 382 383 if (scalar(@$pluginfo) == 0) { 384 &gsprintf($out, "{import.no_plugins_loaded}\n"); 385 die "\n"; 386 } 387 388 # remove the old contents of the export directory if needed 389 if ($removeold && -e $exportdir) { 390 &gsprintf($out, "{export.removing_export}\n"); 391 &util::rm_r ($exportdir); 392 } 393 394 # read the export information file 395 if (!$debug) { 396 # Export to DSpace Arhive format or METs format 397 # If saveas=DSpace, a "contents" file will be created, otherwise "export.inf" 398 399 if ($saveas eq "DSpace"){ 400 $export_info_filename = &util::filename_cat ($exportdir, "contents"); 401 } elsif ($saveas eq "METS") { 402 $export_info_filename = &util::filename_cat ($exportdir, "export.inf"); 403 } 404 405 $export_info = new expinfo(); 406 $export_info -> load_info ($export_info_filename); 407 408 $processor = new docsave ($collection, $export_info, $verbosity, $gzip, $groupsize, $out, $service, $saveas); 409 410 $processor->setexportdir ($exportdir); 411 412 $processor->set_sortmeta ($sortmeta) if defined $sortmeta; 413 $processor->set_OIDtype ($OIDtype); 414 $processor->set_saveas ($saveas); 326 415 } else { 327 $maxdocs = -1; # the default 328 } 329 } 330 if ($groupsize == 1) { 331 if (defined $collectcfg->{'groupsize'} && $collectcfg->{'groupsize'} =~ /\d+/) { 332 $groupsize = $collectcfg->{'groupsize'}; 333 } 334 } 335 if ($OIDtype !~ /^(hash|incremental)$/) { 336 if (defined $collectcfg->{'OIDtype'} && $collectcfg->{'OIDtype'} =~ /^(hash|incremental)$/) { 337 $OIDtype = $collectcfg->{'OIDtype'}; 338 } else { 339 $OIDtype = "hash"; # the default 340 } 341 } 342 if (defined $collectcfg->{'sortmeta'} && $sortmeta eq "") { 343 $sortmeta = $collectcfg->{'sortmeta'}; 344 } 345 if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) { 346 $debug = 1; 347 } 348 if (defined $collectcfg->{'gli'} && $collectcfg->{'gli'} =~ /^true$/i) { 349 $gli = 1; 350 } 351 352 # global plugin stuff 353 if (defined $collectcfg->{'separate_cjk'}&& $collectcfg->{'separate_cjk'} =~ /^true$/i) { 354 push @global_opts, "-separate_cjk"; 355 } 356 } else { 357 (&gsprintf($out, "{common.cannot_find_cfg_file}\n", $configfilename) && die); 358 } 359 360 $gli = 0 unless defined $gli; 361 362 print STDERR "<export>\n" if $gli; 363 364 # fill in the default import and export directories if none 365 # were supplied, turn all \ into / and remove trailing / 366 $importdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "import") if $importdir eq ""; 367 $importdir =~ s/[\\\/]+/\//g; 368 $importdir =~ s/\/$//; 369 $exportdir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "export") if $exportdir eq ""; 370 $exportdir =~ s/[\\\/]+/\//g; 371 $exportdir =~ s/\/$//; 372 373 # load all the plugins 374 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out, $faillog, \@global_opts); 375 376 if (scalar(@$pluginfo) == 0) { 377 &gsprintf($out, "{import.no_plugins_loaded}\n"); 378 die "\n"; 379 } 380 381 # remove the old contents of the export directory if needed 382 if ($removeold && -e $exportdir) { 383 &gsprintf($out, "{export.removing_export}\n"); 384 &util::rm_r ($exportdir); 385 } 386 387 # read the export information file 388 if (!$debug) { 389 # Export to DSpace Arhive format or METs format 390 # If saveas=DSpace, a "contents" file will be created, otherwise "export.inf" 391 392 if ($saveas eq "DSpace"){ 393 $export_info_filename = &util::filename_cat ($exportdir, "contents"); 394 } elsif ($saveas eq "METS") { 395 $export_info_filename = &util::filename_cat ($exportdir, "export.inf"); 396 } 397 398 $export_info = new expinfo(); 399 $export_info -> load_info ($export_info_filename); 400 401 $processor = new docsave ($collection, $export_info, $verbosity, $gzip, $groupsize, $out, $service, $saveas); 402 403 $processor->setexportdir ($exportdir); 404 405 # if ($saveas eq "DSpace"){ 406 # if (!open(OUTDOC_EXPORT_CONTENTS,">$export_info_filename")){ 407 # print STDERR "Process could not write collection contents to file $export_info_filename\n"; 408 # return; 409 # } 410 # } 411 412 $processor->set_sortmeta ($sortmeta) if defined $sortmeta; 413 $processor->set_OIDtype ($OIDtype); 414 $processor->set_saveas ($saveas); 415 } else { 416 $processor = new docprint (); 417 } 418 419 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs); 420 421 # process the import directory 422 # if ($saveas eq "DSpace"){ 423 # print STDERR "###ImportDir=$importdir\n"; 424 # &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs, $saveas,'export::OUTDOC_EXPORT_CONTENTS',$gli); 425 # } else { 426 &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs, $gli); 427 # } 428 429 &plugin::end($pluginfo, $processor); 430 431 # write out the export information file 432 if (!$debug) { 433 $processor->close_file_output() if $groupsize > 1; 434 # $processor->close_file_output(); 435 print STDERR "##What is saveas =$saveas\n"; 436 if ($saveas eq "METS") { 437 $export_info->save_info($export_info_filename); 438 # } elsif ($saveas es "DSpace"){ 439 # $export_info->content_info($export_info_filename); 440 } 441 #close OUTDOC_EXPORT_CONTENTS; 442 } 443 416 $processor = new docprint (); 417 } 418 419 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs); 420 421 # process the import directory 422 &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs, $gli); 423 424 &plugin::end($pluginfo, $processor); 425 426 # write out the export information file 427 if (!$debug) { 428 $processor->close_file_output() if $groupsize > 1; 429 if ($saveas eq "METS") { 430 $export_info->save_info($export_info_filename); 431 } 432 } 433 444 434 # # write out export stats 445 435 # my $close_stats = 0; … … 454 444 # } 455 445 # } 456 446 close FAILLOG; 447 }; 448 449 ## $ENV{'GSDLCOLLECTION'} = undef; 450 $importdir = ""; 451 $removeold = 0; 452 453 } 454 457 455 &gsprintf($out, "\n"); 458 456 &gsprintf($out, "*********************************************\n"); 459 457 &gsprintf($out, "{export.complete}\n"); 460 458 &gsprintf($out, "*********************************************\n"); 461 459 462 460 # &plugin::write_stats($pluginfo, $statsfile, $faillogname, $gli); 463 461 # if ($close_stats) { … … 466 464 467 465 close OUT if $close_out; 468 close FAILLOG; 466 467 469 468 }
Note:
See TracChangeset
for help on using the changeset viewer.