Changeset 10428


Ignore:
Timestamp:
2005-08-05T15:21:41+12:00 (19 years ago)
Author:
chi
Message:

Modification of the way passing argument and option lists for the secondary plugin. Also, add an
option (extracted_word_metadata) to retrieve metadata based on user-defined fields from the html document
converted by VB scripting.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/WordPlug.pm

    r10405 r10428  
    9494             'type' => "regexp",
    9595             'reqd' => "no",
    96              'deft' => "" }];
     96             'deft' => "" },
     97               { 'name' => "extracted_word_metadata_fields",
     98             'type' => "string",
     99             'deft' => "Title"}];
    97100   
    98101    push(@$arguments,@$ws_arg);
     
    101104    if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
    102105    if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
    103 
    104     #foreach my $arg (@{$hashArgOptLists->{"ArgList"}})
    105     #{
    106     #   print STDERR "**** Word Argument =$arg->{'name'}\n";
    107     #}
    108    
     106    my @arg_array = @$inputargs;
    109107    my $self = (defined $hashArgOptLists)? new ConvertToPlug($pluginlist,$inputargs,$hashArgOptLists): new ConvertToPlug($pluginlist,$inputargs);
    110108
     
    115113    if (defined $self->{'windows_scripting'}) {
    116114    if (!defined $secondary_plugin_options->{'StructuredHTMLPlug'}){
    117         $secondary_plugin_options->{'StructuredHTMLPlug'} =[];
     115        $secondary_plugin_options->{'StructuredHTMLPlug'} = [];
    118116        my $structhtml_options = $secondary_plugin_options->{'StructuredHTMLPlug'};
     117        if ($self->{'input_encoding'} eq "auto") {
     118        $self->{'input_encoding'} = "utf8";
     119        $self->{'extract_language'} = 1;
     120        push(@$structhtml_options,"-input_encoding", "utf8");
     121        push(@$structhtml_options,"-extract_language");
     122               
     123        # Instruct HTMLPlug (when eventually accessed through read_into_doc_obj)
     124        # to extract these metadata fields from the HEAD META fields
     125        push(@$structhtml_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
     126        push(@$structhtml_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     127        }   
    119128    }
    120129    }
     
    139148    # to extract these metadata fields from the HEAD META fields
    140149    push(@$html_options,"-metadata_fields","Title,GENERATOR,date,author<Creator>");
    141     #push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
     150    push(@$html_options , "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?');
    142151    }
    143152   
    144153    $self = bless $self, $class;
    145     $self->load_secondary_plugins($class,$secondary_plugin_options);
     154    $self->load_secondary_plugins($class,$secondary_plugin_options,$hashArgOptLists);
    146155
    147156    return bless $self;
     
    171180
    172181    # Write it out again!
     182    $self->utf8_write_file (\$text, $conv_filename);
    173183}
    174184
     
    206216    my $self = shift (@_);
    207217    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
    208    
     218    
    209219    return $self->process_type("doc", $base_dir, $file, $doc_obj);
    210220}
Note: See TracChangeset for help on using the changeset viewer.