- Timestamp:
- 2012-09-13T10:12:59+12:00 (12 years ago)
- Location:
- main/trunk/greenstone2/bin/script
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/bin/script/g2f-buildcol.pl
r21687 r26183 21 21 $ENV{'FEDORA_PROTOCOL'} = "http" if (!defined $ENV{'FEDORA_PROTOCOL'}); 22 22 $ENV{'FEDORA_PID_NAMESPACE'} = "greenstone" if (!defined $ENV{'FEDORA_PID_NAMESPACE'}); 23 $ENV{'FEDORA_PREFIX'} = "/fedora" if (!defined $ENV{'FEDORA_PREFIX'}); 23 24 24 25 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/"); … … 35 36 use printusage; 36 37 use parse2; 38 use cfgread; 39 use colcfg; 37 40 38 41 use g2futil; 39 42 43 use dbutil; 40 44 41 45 my $arguments = … … 213 217 foreach my $hd (@hash_dirs) { 214 218 219 my $hash_id = &g2futil::get_hash_id($hd); 220 221 if (defined $hash_id) { 222 223 my $pid = "$pid_namespace:$gs_col-$hash_id"; 224 225 226 my $dsinfo_status = &g2futil::run_datastore_info($pid,$options); 227 228 if ($dsinfo_status == 0) { 229 print " $pid being updated.\n"; 230 &g2futil::run_purge($pid,$options); 231 } 232 else { 233 print " $pid not present.\n"; 234 } 235 } 236 215 237 my $docmets_filename 216 238 = &util::filename_cat($hd,"docmets.xml"); … … 227 249 else { 228 250 print STDERR "Error: Unable to open directory $export_dir: $!\n"; 229 exit; 230 } 231 232 251 exit 1; 252 } 253 254 255 # can possibly use inexport instead of running buildcol.pl through system() 256 print STDERR "**** Just for now, also run Greenstone's buildcol.pl\n"; 257 258 my $gs_opts = " -verbosity $verbosity"; 259 $gs_opts .= " -gli" if ($gli); 260 $gs_opts .= " -collectdir \"$collectdir\"" if ($collectdir); 261 $gs_opts .= " -mode infodb"; 262 263 my $gs_buildcol_arguments = "$gs_opts $gs_col"; 264 265 &g2futil::run_cmd("buildcol.pl", $gs_buildcol_arguments, $options); 266 267 # read in collect cfg file to work out db type 268 my $collectcfg = &util::filename_cat ($collectdir, $gs_col, "etc", "collectionConfig.xml"); 269 #print STDERR "**** collectcfg file: $collectcfg\n"; 270 unless(open(FIN, "<$collectcfg")) { 271 print STDERR "g2f-buildcol.pl: Unable to open $collectcfg...ERROR: $!\n"; 272 exit 1; 273 } 274 close(FIN); 275 276 # for now we assume GS3, since that's what the following gets implemented for 277 my $collect_cfg = &colcfg::read_collection_cfg ($collectcfg, "gs3"); 278 # get the database type for this collection from its configuration file (may be undefined) 279 my $infodbtype = $collect_cfg->{'infodbtype'} || &dbutil::get_default_infodb_type(); 280 281 # open .gdbm database file in building/text/$colname.gdb, using dbutil 282 my $colname = $gs_col; 283 $colname =~ s/(:?\\|\/)(.*)$/$1/; # remove any collect group from collection name to get tailname 284 285 my $building_txt_dir = &util::filename_cat ($collectdir, $gs_col, "building", "text"); 286 my $building_txt_db = &dbutil::get_infodb_file_path($infodbtype, "$colname", $building_txt_dir); 287 288 # foreach key that matches http://dir1/dir2/....file.xxx 289 my $db_keys = {}; 290 &dbutil::read_infodb_keys($infodbtype,$building_txt_db, $db_keys); 291 292 foreach my $key (keys %$db_keys) { 293 if($key =~ m@^http://@) { 294 295 # get value for the key 296 my $src_rec_string = &dbutil::read_infodb_entry($infodbtype,$building_txt_db, $key); 297 my $src_rec = &dbutil::convert_infodb_string_to_hash($src_rec_string); 298 my $OID_hash_value = $src_rec->{'section'}->[0]; 299 $OID_hash_value = "$pid_namespace:$gs_col-".$OID_hash_value; # convert to fedoraPID 300 301 # its fedora pid = "greenstone-http:$colname-http:||dir|file.xxx" 302 # except that fedorapids don't like extra colons and don't like | 303 my $fedora_identifier = "$pid_namespace-http:$gs_col-$key"; 304 # CAN'T HAVE | OR : (as in "http:||one|two.html") in fedoraPID 305 $key =~ s@/@_@g; 306 $key =~ s@:@-@g; 307 my $fedora_pid = "$pid_namespace-http:$gs_col-$key"; 308 309 # To run fedora ingest on the new file need to have sensible 310 # filenames that won't offend windows 311 my $fedora_key_file_name = "$fedora_pid"; 312 $fedora_key_file_name =~ s@\.@-@g; 313 $fedora_key_file_name =~ s/\:/=/g; 314 $fedora_key_file_name .= ".xml"; 315 print STDERR "+++++ fpid: $fedora_pid, fedora-key filename: $fedora_key_file_name\n"; 316 317 # write out a FedoraMets File for this key (in /tmp) 318 # -> it has one metadata value, which is 'dc:title' = HASHxxxxxx 319 320 # The HASHID shouldn't be the title: then will have 321 # duplicate titles and it will be hard to search for 322 # unique ones. What about making the filename the 323 # dc.title and the HASHID the dc.identifier 324 325 my $contents = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"no\"?>\n"; 326 $contents .= "<mets:mets xmlns:mets=\"http://www.loc.gov/METS/\"\n"; 327 $contents .= " xmlns:xsi=\"http://www.w3.org/2001/XMLSchema-instance\"\n"; 328 $contents .= " xmlns:gsdl3=\"http://www.greenstone.org/namespace/gsdlmetadata/1.0/\"\n"; 329 $contents .= " xmlns:xlink=\"http://www.w3.org/1999/xlink\"\n"; 330 $contents .= " xsi:schemaLocation=\"http://www.loc.gov/METS/\n"; 331 $contents .= " http://www.loc.gov/standards/mets/mets.xsd\n"; 332 $contents .= " http://www.greenstone.org/namespace/gsdlmetadata/1.0/\n"; 333 $contents .= " http://www.greenstone.org/namespace/gsdlmetadata/1.0/gsdl_metadata.xsd\"\n"; 334 $contents .= " OBJID=\"$fedora_pid\"\n"; 335 # $contents .= " OBJID=\"greenstone:$gs_col-HASH1f814d07252c354039ee11\"\n"; 336 $contents .= " TYPE=\"FedoraObject\" LABEL=\"$fedora_pid\" EXT_VERSION=\"1.1\">\n"; 337 $contents .= "<mets:metsHdr RECORDSTATUS=\"A\"/>\n"; 338 $contents .= " <mets:amdSec ID=\"DC\" >\n"; 339 $contents .= " <mets:techMD ID=\"DC.0\">\n"; 340 $contents .= " <mets:mdWrap LABEL=\"Metadata\" MDTYPE=\"OTHER\" OTHERMDTYPE=\"gsdl3\" ID=\"DCgsdl1\">\n"; 341 $contents .= " <mets:xmlData>\n"; 342 $contents .= " <oai_dc:dc xmlns:dc=\"http://purl.org/dc/elements/1.1/\" xmlns:oai_dc=\"http://www.openarchives.org/OAI/2.0/oai_dc/\" >\n"; 343 $contents .= " <dc:title>$OID_hash_value</dc:title>\n"; 344 # $contents .= " <dc:identifier>$fedora_identifier</dc:identifier>\n"; 345 $contents .= " </oai_dc:dc>\n"; 346 $contents .= " </mets:xmlData>\n"; 347 $contents .= " </mets:mdWrap>\n"; 348 $contents .= " </mets:techMD>\n"; 349 $contents .= " </mets:amdSec>\n"; 350 $contents .= "</mets:mets>\n"; 351 352 353 # write out the file and then run fedora ingest on that file 354 # The file gets purged in g2f-import.pl, so don't remove it from export dir now 355 my $fedora_key_file_path = &util::filename_cat($export_dir, $fedora_key_file_name); 356 unless(open(FOUT, ">$fedora_key_file_path")) { 357 print STDERR "g2f-buildcol.pl: Unable to open $fedora_key_file_path...ERROR: $!\n"; 358 exit 1; 359 } 360 print FOUT $contents; 361 close(FOUT); 362 363 print STDERR "<Build>\n" if $gli; 364 print STDERR "Ingesting $fedora_key_file_name\n"; 365 print STDERR "#### ".join(",", %$options)."\n"; 366 367 &g2futil::run_ingest($fedora_key_file_path,$options); 368 print STDERR "</Build>\n" if $gli; 369 } 370 371 } 372 373 374 # If successful!!! Then need to think about: 375 # [CLX] nodes 376 # Doing this with FedoraMETSPlugin 377 378 233 379 } 234 380 -
main/trunk/greenstone2/bin/script/g2f-import.pl
r22338 r26183 212 212 # readdir 213 213 if (opendir(DIR, $export_dir)) { 214 214 my @xml_files = grep { $_ =~ m/^greenstone-http.*\.xml$/ } readdir(DIR); 215 215 closedir DIR; 216 217 # purge all the (URL,hashID) metadata files that we inserted 218 # into fedora at the end of g2f-buildcol.pl 219 # convert the filenames into fedora-pids 220 # filename = greenstone-http=tmpcol-http-__test1-html.xml -> fpid = greenstone-http:tmpcol-http-__test1.html 221 foreach my $file (@xml_files) { 222 my $fedora_pid = $file; 223 $fedora_pid =~ s/\.xml$//; 224 $fedora_pid =~ s/\=/:/; 225 $fedora_pid =~ s/(.*)-(.*)$/$1.$2/; 226 227 print STDERR "#### fedora_pid: $fedora_pid\n"; 228 &g2futil::run_purge($fedora_pid,$options); # displays error message if first time (nothing to purge) 229 } 230 216 231 my @hash_dirs = &g2futil::get_all_hash_dirs($export_dir,$maxdocs); 217 232 … … 246 261 print "***\n"; 247 262 248 my $gs_export_opts = "-saveas FedoraMETS -fedora_namespace $pid_namespace -verbosity $verbosity"; 249 250 $gs_export_opts .= " -gli" if ($gli); 251 252 $gs_export_opts .= " -language $language" if ($language); 253 $gs_export_opts .= " -collectdir \"$collectdir\"" if ($collectdir); 254 $gs_export_opts .= " -removeold" if ($removeold); 255 $gs_export_opts .= " -maxdocs $maxdocs" if ($maxdocs); 256 257 $gs_export_opts .= " -exportdir \"$export_dir\""; 263 my $gs_export_opts = "-saveas FedoraMETS -fedora_namespace $pid_namespace"; 264 265 my $gs_opts = " -verbosity $verbosity"; 266 $gs_opts .= " -gli" if ($gli); 267 268 $gs_opts .= " -language $language" if ($language); 269 $gs_opts .= " -collectdir \"$collectdir\"" if ($collectdir); 270 $gs_opts .= " -removeold" if ($removeold); 271 $gs_opts .= " -maxdocs $maxdocs" if ($maxdocs); 272 273 $gs_export_opts .= " $gs_opts -exportdir \"$export_dir\""; 258 274 259 275 my $gs_export_arguments = "$gs_export_opts $gs_col"; 260 276 261 277 &g2futil::run_cmd("export.pl", $gs_export_arguments, $options); 278 279 print STDERR "**** Just for now, also run Greenstone's import.pl\n"; 280 # if we have the FedoraMETSPlugIN then we wouldn't have to run import anymore 281 my $gs_import_arguments = "$gs_opts $gs_col"; 282 283 &g2futil::run_cmd("import.pl", $gs_import_arguments, $options); 262 284 } 263 285
Note:
See TracChangeset
for help on using the changeset viewer.