Changeset 17318 for gsdl/trunk/bin
- Timestamp:
- 2008-09-18T10:02:41+12:00 (16 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gsdl/trunk/bin/script/explode_metadata_database.pl
r17301 r17318 227 227 228 228 my @metadata_records = split(/$split_exp/, $text); 229 print STDERR "Number of records: " . scalar(@metadata_records) . "\n"; 229 my $total_num_records = scalar(@metadata_records); 230 print STDERR "Number of records: $total_num_records\n"; 230 231 231 232 # Write the metadata from each record to the metadata.xml file … … 235 236 236 237 # Check if we need to start a new directory for these records 237 check_need_new_directory($exploded_base_dir,$record_number,$records_per_folder, 238 \@metadata_records,\$documents_directory); 238 check_need_new_directory($exploded_base_dir,$record_number, 239 $records_per_folder,$total_num_records, 240 \$documents_directory); 239 241 # Use the plugin's process function to avoid duplicating code 240 242 my $doc_obj = new doc($filename, "nonindexed_doc"); … … 249 251 250 252 251 check_close_directory($record_number,$records_per_folder, \@metadata_records);253 check_close_directory($record_number,$records_per_folder,$total_num_records); 252 254 253 255 $record_number = $record_number + 1; … … 255 257 } 256 258 else { 257 # Call metadata_read to set sup associated metadata259 # Call metadata_read to set up associated metadata 258 260 259 261 my $pluginfo = undef; … … 264 266 my $gli = undef; 265 267 266 my $extrametakeys = {};268 my $extrametakeys = []; 267 269 my $extrametadata = {}; 268 270 … … 271 273 $extrametakeys, $extrametadata, $processor, $maxdocs, $gli); 272 274 273 274 my $documents_directory = need_new_directory($exploded_base_dir); 275 276 # Attach metadata to object 277 # => use the plugin's extra_metadata function to avoid duplicating code 278 my $doc_obj = new doc($filename, "nonindexed_doc"); 279 280 $plugobj->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $extrametadata); 281 282 # Try to get a doc to attach the metadata to 283 # If no match found, create a dummy .nul file 284 attach_metadata_or_make_nul_doc($document_field, $doc_obj, undef, 285 $documents_directory, $orig_base_dir, 286 $document_prefix, $document_suffix, $metadata_set, $verbosity); 287 288 289 close_directory(); 290 } 291 275 my $total_num_records = scalar (@$extrametakeys); 276 print STDERR "Number of records: $total_num_records\n"; 277 my $record_number = 1; 278 my $documents_directory; 279 foreach my $record (@$extrametakeys) { 280 &check_need_new_directory($exploded_base_dir, $record_number, $records_per_folder, $total_num_records, \$documents_directory); 281 282 # Attach metadata to object 283 # => use the plugin's extra_metadata function to avoid duplicating code 284 my $doc_obj = new doc($filename, "nonindexed_doc"); 285 # all the metadata has been extracted into extrametadata 286 $plugobj->extra_metadata ($doc_obj, $doc_obj->get_top_section(), $extrametadata->{$record}); 287 288 # Try to get a doc to attach the metadata to 289 # If no match found, create a dummy .nul file 290 attach_metadata_or_make_nul_doc($document_field, $doc_obj, $record_number, $documents_directory, $orig_base_dir, $document_prefix, $document_suffix, $metadata_set, $verbosity); 291 292 &check_close_directory($record_number,$records_per_folder,$total_num_records); 293 294 $record_number = $record_number + 1; 295 296 } 297 } 292 298 293 299 # Explode means just that: the original file is deleted … … 326 332 sub check_need_new_directory 327 333 { 328 my ($exploded_base_dir,$record_number, $records_per_folder, $metadata_records,329 $ documents_dir_ref) = @_;334 my ($exploded_base_dir,$record_number, $records_per_folder, 335 $total_num_records, $documents_dir_ref) = @_; 330 336 331 337 … … 334 340 my $documents_directory = $exploded_base_dir; 335 341 336 if ( scalar(@$metadata_records)> $records_per_folder) {342 if ($total_num_records > $records_per_folder) { 337 343 $documents_directory .= "." . sprintf("%8.8d", $record_number); 338 344 } … … 366 372 my $document_file_full = $document_prefix . $value . $document_suffix; 367 373 374 # this either downloads/copies the document, or creates a nul file for it. 368 375 $document_file = &obtain_document($document_file_full, $documents_directory, $orig_base_dir, $verbosity); 369 376 &write_metadata_xml_file_entry(METADATA_XML_FILE, $document_file, $record_metadata, $metadata_set); 370 377 } 371 378 } 372 373 374 # Create a dummy .nul file if we haven't obtained a ny documentsfor this record379 } 380 381 # Create a dummy .nul file if we haven't obtained a document (or null file) for this record 375 382 if (not defined $document_file) { 376 383 … … 399 406 sub check_close_directory 400 407 { 401 my ($record_number,$records_per_folder,$ metadata_records) = @_;402 403 if (($record_number % $records_per_folder) == 0 || $record_number == scalar(@$metadata_records)) {408 my ($record_number,$records_per_folder,$total_num_records) = @_; 409 410 if (($record_number % $records_per_folder) == 0 || $record_number == $total_num_records) { 404 411 # Finish and close the metadata.xml file 405 412 close_directory(); … … 511 518 $local_document_file = &util::filename_cat($documents_directory, $document_file_name); 512 519 513 &util::cp($document_file_full, $documents_directory); 514 520 if (-e $document_file_full) { 521 &util::cp($document_file_full, $documents_directory); 522 } 523 515 524 # Check the document was obtained successfully 516 525 if (!-e $local_document_file) { … … 531 540 open(NULL_FILE, ">$local_document_file.nul"); 532 541 close(NULL_FILE); 542 print STDERR "Creating a nul document $document_file_name\n"; 533 543 } 534 544
Note:
See TracChangeset
for help on using the changeset viewer.