Ignore:
Timestamp:
2005-02-15T12:27:03+13:00 (19 years ago)
Author:
kjdon
Message:

made it use the args and proper print usage, added metadata_set and filename_field args

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/explode_metadata_database.pl

    r8858 r9041  
    1010
    1111use parsargv;
    12 
    13 
     12use printusage;
     13
     14my $arguments =
     15    [ { 'name' => "encoding",
     16    'desc' => "{explode.encoding}",
     17    'type' => "string",
     18    'deft' => "iso_8859_1",
     19    'reqd' => "no" } ,
     20      { 'name' => "metadata_set",
     21    'desc' => "{explode.metadata_set}",
     22    'type' => "string",
     23    'reqd' => "no" } ,
     24      { 'name' => "plugin",
     25    'desc' => "{explode.plugin}",
     26    'type' => "string",
     27    'reqd' => "yes" },
     28      { 'name' => "filename_field",
     29    'desc' => "{explode.filename_field}",
     30    'type' => "string",
     31    'reqd' => "no"}
     32      ];
     33   
     34my $options = { 'name' => "explode_metadata_database.pl",
     35        'desc' => "{explode.desc}",
     36        'args' => $arguments };
     37
     38       
    1439sub main
    1540{
     41    my ($encoding, $metadata_set, $plugin, $filename_field);
     42
     43   
    1644    # Parse command line arguments
    1745    if (!parsargv::parse(\@ARGV,
    1846             'encoding/.*/iso_8859_1', \$encoding,
    19              'plugin/.*/', \$plugin)) {
     47             'metadata_set/.*/', \$metadata_set,
     48             'plugin/.*/', \$plugin,
     49             'filename_field/.*/', \$filename_field)) {
     50    &PrintUsage::print_txt_usage($options, "{explode.params}");
    2051    die "\n";
    2152    }
     
    2354    # The metadata database filename is the first value that remains after the options have been parsed out
    2455    my $filename = $ARGV[0];
    25 
    26     # Create a new instance of the plugin specified...
     56    if (!defined $filename || $filename !~ /\w/) {
     57    &PrintUsage::print_txt_usage($options, "{explode.params}");
     58    print STDERR "You need to specify a filename";
     59    die "\n";
     60    }
     61    # check that file exists
     62    if (!-e $filename) {
     63    print STDERR "File $filename doesn't exist...\n";
     64    die "\n";
     65    }
     66    # check required options
     67    if (!defined $plugin || $plugin !~ /\w/) {
     68    &PrintUsage::print_txt_usage($options, "{explode.params}");
     69    print STDERR "You need to specify a plugin";
     70    die "\n";
     71    }
     72   
     73    # check metadata set
     74    if (defined $metadata_set && $metadata_set =~ /\w/) {
     75    $metadata_set .= ".";
     76    } else {
     77    $metadata_set = "";
     78    }
     79
     80    #check filename field
     81   
    2782    my $plugobj;
    2883    require "$plugin.pm";
     
    64119    my $record_number = 0;
    65120    foreach $record_text (@metadata_records) {
    66     $record_number = $record_number + 1;
    67 
     121   
    68122    # Use the plugin's process function to avoid duplicating code
    69123    my $doc_obj = new doc($filename, "nonindexed_doc");
    70124    $plugobj->process(\$record_text, undef, undef, $filename, undef, $doc_obj, 0);
    71 
    72     my $document_file = sprintf("%4.4d", $record_number) . ".nul";
     125    # try to get a file name
     126    my $document_file;
     127    if (defined $filename_field) {
     128        my $meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $filename_field);
     129        if (defined $meta) {
     130        $meta =~ s/&\w{1,10};//g; # remove entities
     131        $document_file = "$meta.nul";
     132        my $num = 0;
     133        while (-e "$documents_directory/$document_file") {
     134            $num++;
     135            $document_file = "$meta$num.nul";
     136        }
     137        } else {
     138        $record_number = $record_number + 1;
     139        $document_file = sprintf("%4.4d", $record_number) . ".nul";
     140        }
     141    } else {
     142        $record_number = $record_number + 1;
     143        $document_file = sprintf("%4.4d", $record_number) . ".nul";
     144    }
     145   
    73146    open(DUMMY_FILE, ">$documents_directory/$document_file");
    74147    close(DUMMY_FILE);
     
    76149    # Look at all the metadata assigned to this record
    77150    my $record_metadata = $doc_obj->get_all_metadata($cursection);
    78     &write_metadata_xml_file_entry(METADATA_XML_FILE, $document_file, $record_metadata);
     151    &write_metadata_xml_file_entry(METADATA_XML_FILE, $document_file, $record_metadata, $metadata_set);
    79152    }
    80153
     
    84157
    85158    # Explode means just that: the original file is deleted
    86     &util::rm($filename);
     159    #&util::rm($filename);
    87160}
    88161
     
    93166    my $file_name = shift(@_);
    94167    my $record_metadata = shift(@_);
    95 
     168    my $meta_prefix = shift(@_);
     169   
    96170    # Make $file_name XML-safe
    97171    $file_name =~ s/</&lt;/g;
     
    129203    $value =~ s/>/&gt;/g;
    130204
    131     print $metadata_xml_file "      <Metadata mode=\"accumulate\" name=\"$field\">$value</Metadata>\n";
     205    print $metadata_xml_file "      <Metadata mode=\"accumulate\" name=\"$meta_prefix$field\">$value</Metadata>\n";
    132206    }
    133207
Note: See TracChangeset for help on using the changeset viewer.