Changeset 10338


Ignore:
Timestamp:
2005-07-28T11:31:20+12:00 (19 years ago)
Author:
kjdon
Message:

tidied this up: uses new parse2, use strict

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/bin/script/explode_metadata_database.pl

    r9147 r10338  
    88}
    99
    10 
    11 use parsargv;
     10use strict;
     11no strict 'subs'; # allow barewords (eg STDERR) as function arguments
     12no strict 'refs'; # allow filehandles to be variables and vice versa
    1213use printusage;
     14use parse2;
     15
    1316my $unicode_list =
    1417    [ { 'name' => "auto",
     
    2225
    2326my $arguments =
    24     [ { 'name' => "input_encoding",
     27    [
     28      { 'name' => "plugin",
     29    'desc' => "{explode.plugin}",
     30    'type' => "string",
     31    'reqd' => "yes",
     32    'hiddengli' => "yes"},
     33      { 'name' => "input_encoding",
    2534    'desc' => "{explode.encoding}",
    2635    'type' => "enum",
     
    3342    'reqd' => "no",
    3443    'hiddengli' => "yes"},
    35       { 'name' => "plugin",
    36     'desc' => "{explode.plugin}",
    37     'type' => "string",
    38     'reqd' => "yes",
    39     'hiddengli' => "yes"},
    4044      { 'name' => "document_field",
    4145    'desc' => "{explode.document_field}",
     
    5357    'desc' => "{explode.filename_field}",
    5458    'type' => "string",
    55     'reqd' => "no"}     
     59    'reqd' => "no"},
     60      { 'name' => "verbosity",
     61    'desc' => "{import.verbosity}",
     62    'type' => "int",
     63    'range' => "0,",
     64    'deft' => "1",
     65    'reqd' => "no",
     66    'modegli' => "4" },
     67      { 'name' => "xml",
     68    'desc' => "",
     69    'type' => "flag",
     70    'reqd' => "no",
     71    'hiddengli' => "yes" }
    5672      ];
    5773   
     
    6379sub main
    6480{
    65     my ($encoding, $metadata_set, $plugin, $filename_field,
    66     $document_field, $document_prefix, $document_suffix);
     81    my ($language, $input_encoding, $metadata_set, $plugin, $filename_field,
     82    $document_field, $document_prefix, $document_suffix, $verbosity);
    6783
    6884    my $xml = 0;
    69     # Parse command line arguments
    70     if (!parsargv::parse(\@ARGV,
    71              'language/.*/', \$language,
    72              'input_encoding/.*/auto', \$encoding,
    73              'metadata_set/.*/', \$metadata_set,
    74              'plugin/.*/', \$plugin,
    75              'filename_field/.*/', \$filename_field,
    76              'document_field/.*/', \$document_field,
    77              'document_prefix/.*/', \$document_prefix,
    78              'document_suffix/.*/', \$document_suffix,
    79              q^xml^, \$xml)) {
     85
     86    my $hashParsingResult = {};
     87    my $blnParseFailed = "false";
     88    # parse the options
     89    my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
     90    # There should one arg left after parsing
     91    if($intArgLeftinAfterParsing > 1)
     92    {
    8093    &PrintUsage::print_txt_usage($options, "{explode.params}");
    8194    die "\n";
    8295    }
    83    
     96
     97    foreach my $strVariable (keys %$hashParsingResult)
     98    {
     99    eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
     100    }
     101       
    84102    # If $language has been specified, load the appropriate resource bundle
    85103    # (Otherwise, the default resource bundle will be loaded automatically)
    86     if ($language) {
     104    if ($language && $language =~ /\S/) {
    87105    &gsprintf::load_language_specific_resource_bundle($language);
    88106    }
     
    121139
    122140    #check filename field
    123     if (defined $fileanme_field && $filenmae_field eq "") {
     141    if (defined $filename_field && $filename_field eq "") {
    124142    undef $filename_field;
    125143    }
     
    132150    $plugobj->init(1, "STDERR", "STDERR");
    133151   
    134     if ($encoding eq "auto") {
    135     $plugobj->{'input_encoding'} = $encoding;   
    136     (my $language, $encoding) = $plugobj->textcat_get_language_encoding ($filename);
    137     }
     152    if ($input_encoding eq "auto") {
     153    $plugobj->{'input_encoding'} = $input_encoding;   
     154    ($language, $input_encoding) = $plugobj->textcat_get_language_encoding ($filename);
     155    }
     156    my $text = "";
    138157    # Use the plugin's read_file function to avoid duplicating code
    139     $plugobj->read_file($filename, $encoding, undef, \$text);
     158    $plugobj->read_file($filename, $input_encoding, undef, \$text);
    140159
    141160    # Create a directory to store the document files...
     
    147166
    148167    # ...and a metadata.xml file for the document metadata (extracted from the database)
    149     $documents_metadata_xml_file = &util::filename_cat($documents_directory, "metadata.xml");
     168    my $documents_metadata_xml_file = &util::filename_cat($documents_directory, "metadata.xml");
    150169    if (-e $documents_metadata_xml_file) {
    151170    die "Error: document metadata.xml file $documents_metadata_xml_file already exists (bailing).\n";
     
    166185    # Write the metadata from each record to the metadata.xml file
    167186    my $record_number = 0;
    168     foreach $record_text (@metadata_records) {
     187    foreach my $record_text (@metadata_records) {
    169188   
    170189    # Use the plugin's process function to avoid duplicating code
     
    183202        if ($field eq $document_field) {
    184203            my $document_file_full = $document_prefix . $value . $document_suffix;
    185             $document_file = &obtain_document($self, $document_file_full, $documents_directory);
     204            $document_file = &obtain_document($document_file_full, $documents_directory, $verbosity);
    186205        }
    187206        }
     
    191210        # try to get a file name
    192211        if (defined $filename_field) {
     212       
    193213        my $meta = $doc_obj->get_metadata_element($doc_obj->get_top_section(), $filename_field);
    194214        if (defined $meta) {
     
    284304sub obtain_document
    285305{
    286     my $self = shift(@_);
    287306    my $document_file_full = shift(@_);
    288307    my $documents_directory = shift(@_);
    289 
    290     my $outhandle = $self->{'outhandle'};
    291     print $outhandle "Obtaining document file $document_file_full...\n"
    292     if ($self->{'verbosity'} > 1);
     308    my $verbosity = shift(@_);
     309   
     310    print STDERR "Obtaining document file $document_file_full...\n" if ($verbosity > 1);
    293311
    294312    my $document_file_name;
     
    302320
    303321    my $wget_options = "--quiet";
    304     $wget_options = "--verbose" if ($self->{'verbosity'} > 2);
     322    $wget_options = "--verbose" if ($verbosity > 2);
    305323    $wget_options .= " --timestamping";  # Only re-download files if they're newer
    306324    `wget $wget_options $document_file_full --output-document $local_document_file`;
Note: See TracChangeset for help on using the changeset viewer.