Changeset 9041 for trunk/gsdl/bin/script/explode_metadata_database.pl
- Timestamp:
- 2005-02-15T12:27:03+13:00 (19 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/explode_metadata_database.pl
r8858 r9041 10 10 11 11 use parsargv; 12 13 12 use printusage; 13 14 my $arguments = 15 [ { 'name' => "encoding", 16 'desc' => "{explode.encoding}", 17 'type' => "string", 18 'deft' => "iso_8859_1", 19 'reqd' => "no" } , 20 { 'name' => "metadata_set", 21 'desc' => "{explode.metadata_set}", 22 'type' => "string", 23 'reqd' => "no" } , 24 { 'name' => "plugin", 25 'desc' => "{explode.plugin}", 26 'type' => "string", 27 'reqd' => "yes" }, 28 { 'name' => "filename_field", 29 'desc' => "{explode.filename_field}", 30 'type' => "string", 31 'reqd' => "no"} 32 ]; 33 34 my $options = { 'name' => "explode_metadata_database.pl", 35 'desc' => "{explode.desc}", 36 'args' => $arguments }; 37 38 14 39 sub main 15 40 { 41 my ($encoding, $metadata_set, $plugin, $filename_field); 42 43 16 44 # Parse command line arguments 17 45 if (!parsargv::parse(\@ARGV, 18 46 'encoding/.*/iso_8859_1', \$encoding, 19 'plugin/.*/', \$plugin)) { 47 'metadata_set/.*/', \$metadata_set, 48 'plugin/.*/', \$plugin, 49 'filename_field/.*/', \$filename_field)) { 50 &PrintUsage::print_txt_usage($options, "{explode.params}"); 20 51 die "\n"; 21 52 } … … 23 54 # The metadata database filename is the first value that remains after the options have been parsed out 24 55 my $filename = $ARGV[0]; 25 26 # Create a new instance of the plugin specified... 56 if (!defined $filename || $filename !~ /\w/) { 57 &PrintUsage::print_txt_usage($options, "{explode.params}"); 58 print STDERR "You need to specify a filename"; 59 die "\n"; 60 } 61 # check that file exists 62 if (!-e $filename) { 63 print STDERR "File $filename doesn't exist...\n"; 64 die "\n"; 65 } 66 # check required options 67 if (!defined $plugin || $plugin !~ /\w/) { 68 &PrintUsage::print_txt_usage($options, "{explode.params}"); 69 print STDERR "You need to specify a plugin"; 70 die "\n"; 71 } 72 73 # check metadata set 74 if (defined $metadata_set && $metadata_set =~ /\w/) { 75 $metadata_set .= "."; 76 } else { 77 $metadata_set = ""; 78 } 79 80 #check filename field 81 27 82 my $plugobj; 28 83 require "$plugin.pm"; … … 64 119 my $record_number = 0; 65 120 foreach $record_text (@metadata_records) { 66 $record_number = $record_number + 1; 67 121 68 122 # Use the plugin's process function to avoid duplicating code 69 123 my $doc_obj = new doc($filename, "nonindexed_doc"); 70 124 $plugobj->process(\$record_text, undef, undef, $filename, undef, $doc_obj, 0); 71 72 my $document_file = sprintf("%4.4d", $record_number) . ".nul"; 125 # try to get a file name 126 my $document_file; 127 if (defined $filename_field) { 128 my $meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $filename_field); 129 if (defined $meta) { 130 $meta =~ s/&\w{1,10};//g; # remove entities 131 $document_file = "$meta.nul"; 132 my $num = 0; 133 while (-e "$documents_directory/$document_file") { 134 $num++; 135 $document_file = "$meta$num.nul"; 136 } 137 } else { 138 $record_number = $record_number + 1; 139 $document_file = sprintf("%4.4d", $record_number) . ".nul"; 140 } 141 } else { 142 $record_number = $record_number + 1; 143 $document_file = sprintf("%4.4d", $record_number) . ".nul"; 144 } 145 73 146 open(DUMMY_FILE, ">$documents_directory/$document_file"); 74 147 close(DUMMY_FILE); … … 76 149 # Look at all the metadata assigned to this record 77 150 my $record_metadata = $doc_obj->get_all_metadata($cursection); 78 &write_metadata_xml_file_entry(METADATA_XML_FILE, $document_file, $record_metadata );151 &write_metadata_xml_file_entry(METADATA_XML_FILE, $document_file, $record_metadata, $metadata_set); 79 152 } 80 153 … … 84 157 85 158 # Explode means just that: the original file is deleted 86 &util::rm($filename);159 #&util::rm($filename); 87 160 } 88 161 … … 93 166 my $file_name = shift(@_); 94 167 my $record_metadata = shift(@_); 95 168 my $meta_prefix = shift(@_); 169 96 170 # Make $file_name XML-safe 97 171 $file_name =~ s/</</g; … … 129 203 $value =~ s/>/>/g; 130 204 131 print $metadata_xml_file " <Metadata mode=\"accumulate\" name=\"$ field\">$value</Metadata>\n";205 print $metadata_xml_file " <Metadata mode=\"accumulate\" name=\"$meta_prefix$field\">$value</Metadata>\n"; 132 206 } 133 207
Note:
See TracChangeset
for help on using the changeset viewer.