Ignore:
Timestamp:
2003-06-20T14:22:34+12:00 (21 years ago)
Author:
mdewsnip
Message:

Tidied up and structures (representing the options of the plugin) in preparation for removing the print_usage() routines.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r3834 r4744  
    4545
    4646my $unicode_list =
    47 [ { 'name' => "auto",
    48     'desc' => "Use text categorization algorithm to automatically identify the encoding of each source document. This will be slower than explicitly setting the encoding but will work where more than one encoding is used within the same collection." } ,
    49   { 'name' => "ascii",
    50     'desc' => "Plain 7 bit ascii. This may be a bit faster than using iso_8859_1. Beware of using this on a collection of documents that may contain characters outside the plain 7 bit ascii set though (e.g. German or French documents containing accents), use iso_8859_1 instead." },
    51   { 'name' => "utf8",
    52     'desc' => "either utf8 or unicode -- automatically detected." },
    53   { 'name' => "unicode",
    54     'desc' => "just unicode" } ];
     47    [ { 'name' => "auto",
     48    'desc' => "Use text categorization algorithm to automatically identify the encoding of each source document. This will be slower than explicitly setting the encoding but will work where more than one encoding is used within the same collection." } ,
     49      { 'name' => "ascii",
     50    'desc' => "Plain 7 bit ascii. This may be a bit faster than using iso_8859_1. Beware of using this on a collection of documents that may contain characters outside the plain 7 bit ascii set though (e.g. German or French documents containing accents), use iso_8859_1 instead." },
     51      { 'name' => "utf8",
     52    'desc' => "either utf8 or unicode -- automatically detected." },
     53      { 'name' => "unicode",
     54    'desc' => "just unicode" } ];
    5555
    5656my $arguments =
    5757    [ { 'name' => "process_exp",
    58     'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. Each plugin has its own default process_exp. e.g HTMLPlug defaults to '(?i)\.html?\$' i.e. all documents ending in .htm or .html (case-insensitive).",
     58    'desc' => "A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",
    5959    'type' => "string",
    6060    'deft' => "",
     
    6666    'reqd' => "no" },
    6767      { 'name' => "input_encoding",
    68     'desc' => "The encoding of the source documents. Documents will be converted from these encodings and stored internally as utf8. The default input_encoding is 'auto'.",
     68    'desc' => "The encoding of the source documents. Documents will be converted from these encodings and stored internally as utf8.",
    6969    'type' => "enum",
    7070    'list' => $unicode_list,
     
    7272    'deft' => "auto" } ,
    7373      { 'name' => "default_encoding",
    74     'desc' => "Use this encoding if -input_encoding is set to 'auto' and the text categorization algorithm fails to extract the encoding or extracts an encoding unsupported by Greenstone.  The default is iso_8859_1.",
    75     'type' => "flag",
    76     'reqd' => "no" },
     74    'desc' => "Use this encoding if -input_encoding is set to 'auto' and the text categorization algorithm fails to extract the encoding or extracts an encoding unsupported by Greenstone.",
     75    'type' => "enum",
     76    'reqd' => "no",
     77        'deft' => "utf8" },
    7778      { 'name' => "extract_language",
    7879    'desc' => "Identify the language of each document and set 'Language' metadata. Note that this will be done automatically if -input_encoding is 'auto'.",
     
    107108    'desc' => "The maximum historical date to be used as metadata (in a Common Era date, such as 1950).",
    108109    'type' => "int",
     110    'deft' => (localtime)[5]+1900,
    109111    'reqd' => "no"},
    110112      { 'name' => "maximum_century",
    111113    'desc' => "The maximum named century to be extracted as historical metadata (e.g. 14 will extract all references up to the 14th century).",
    112114    'type' => "int",
     115    'deft' => "-1",
    113116    'reqd' => "no" },
    114117      { 'name' => "no_bibliography",
     
    196199}
    197200
     201
     202# sub print_usage_new
     203# {
     204# }
     205
     206
     207sub print_usage_new
     208{
     209    local $self = shift(@_);
     210    local $optionlist = $self->{'option_list'};
     211    local $pluginoptions = pop(@$optionlist);
     212    return if (!defined($pluginoptions));
     213
     214    local $pluginname = $pluginoptions->{'name'};
     215    local $pluginargs = $pluginoptions->{'args'};
     216
     217    # Produce the usage information using the data structure above
     218    print STDERR " usage: plugin $pluginname";
     219    if (defined($pluginargs)) {
     220    print STDERR " [options]";
     221    }
     222    print STDERR "\n\n";
     223
     224    # Display the plugin options, if there are some
     225    if (defined($pluginargs)) {
     226    # Find the length of the longest option string
     227    local $maxlength = 0;
     228    foreach $option (@$pluginargs) {
     229        local $optionname = $option->{'name'};
     230        local $optiontype = $option->{'type'};
     231
     232        local $optionstringlength = length($optionname);
     233        if ($optiontype ne "flag") {
     234        $optionstringlength = $optionstringlength + 3 + length($optiontype);
     235        }
     236
     237        # Remember the longest
     238        if ($optionstringlength > $maxlength) {
     239        $maxlength = $optionstringlength;
     240        }
     241    }
     242
     243    # Calculate the column offset of the option descriptions
     244    local $optiondescoffset = 3 + $maxlength + 2;
     245
     246    # Display the plugin options
     247    print STDERR " options:\n";
     248    foreach $option (@$pluginargs) {
     249        # Display option name
     250        local $optionname = $option->{'name'};
     251        print STDERR "  -$optionname";
     252        local $optionstringlength = 3 + length($optionname);
     253 
     254        # Display option type, if the option is not a flag
     255        local $optiontype = $option->{'type'};
     256        if ($optiontype ne "flag") {
     257        print STDERR " <$optiontype>";
     258        $optionstringlength = $optionstringlength + (2 + length($optiontype) + 1);
     259        }
     260
     261        # Display the option description
     262        local $optiondesc = $option->{'desc'};
     263        &display_text_in_column($optiondesc, $optiondescoffset, $optionstringlength, 80);
     264
     265        # Show the default value for the option, if there is one
     266        local $optiondefault = $option->{'deft'};
     267        if (defined($optiondefault)) {
     268        print STDERR " " x $optiondescoffset;
     269        print STDERR "Default: " . $optiondefault . "\n";
     270        }
     271
     272        # If the option has a list of possible values, display these
     273        local $optionvalueslist = $option->{'list'};
     274        if (defined($optionvalueslist)) {
     275        print STDERR "\n";
     276        foreach $optionvalue (@$optionvalueslist) {
     277            local $optionvaluename = $optionvalue->{'name'};
     278            print STDERR " " x $optiondescoffset;
     279            print STDERR "$optionvaluename:";
     280
     281            local $optionvaluedesc = $optionvalue->{'desc'};
     282            &display_text_in_column($optionvaluedesc, ($optiondescoffset + 2),
     283                        $optiondescoffset + length($optionvaluename), 80);
     284        }
     285        }
     286
     287        # Special case for 'input_encoding'
     288        if ($optionname =~ m/^input_encoding$/i) {
     289        my $e = $encodings::encodings;
     290        foreach $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {
     291            local $encodingname = $enc;
     292            print STDERR " " x $optiondescoffset;
     293            print STDERR "$enc:";
     294
     295            local $encodingdesc = $e->{$enc}->{'name'};
     296            &display_text_in_column($encodingdesc, ($optiondescoffset + 2),
     297                        $optiondescoffset + length($encodingname), 80);
     298        }
     299        }
     300
     301        # Add a blank line to separate options
     302        print STDERR "\n";
     303    }
     304    }
     305
     306    # If the plugin inherits from another, do the parent now
     307    if (defined($optionlist)) {
     308    $self->print_usage_new();
     309    }
     310}
     311
     312
     313sub display_text_in_column
     314{
     315    local ($text, $columnbeg, $firstlineoffset, $columnend) = @_;
     316
     317    # Spaces are put *before* words, so treat the column beginning as 1 smaller than it is
     318    $columnbeg = $columnbeg - 1;
     319
     320    # Add some padding (if needed) for the first line
     321    local $linelength = $columnbeg;
     322    if ($firstlineoffset < $columnbeg) {
     323    print STDERR " " x ($columnbeg - $firstlineoffset);
     324    }
     325    else {
     326    $linelength = $firstlineoffset;
     327    }
     328
     329    # Break the text into words, and display one at a time
     330    local @words = split(/ /, $text);
     331
     332    foreach $word (@words) {
     333    # Unescape '<' and '>' characters
     334    $word =~ s/&lt;/</g;
     335    $word =~ s/&gt;/>/g;
     336
     337    # If printing this word would except the column end, start a new line
     338    if (($linelength + length($word)) >= $columnend) {
     339        print STDERR "\n";
     340        print STDERR " " x $columnbeg;
     341        $linelength = $columnbeg;
     342    }
     343
     344    # Write the word
     345    print STDERR " " . $word;
     346    $linelength = $linelength + (length($word) + 1);
     347    }
     348
     349    print STDERR "\n";
     350}
     351
     352
    198353sub print_general_usage {
    199354    my ($plugin_name) = @_;
     
    316471             q^process_exp/.*/^, \$self->{'process_exp'},
    317472             q^block_exp/.*/^, \$self->{'block_exp'},
     473             q^extract_language^, \$self->{'extract_language'},
    318474             q^extract_acronyms^, \$self->{'extract_acronyms'},
    319              q^extract_keyphrases^, \$self->{'kea'}, #with extra options
    320              q^extract_keyphrase_options/.*/^, \$self->{'kea_options'}, #no extra options
     475             q^extract_keyphrases^, \$self->{'kea'}, #with extra options (UNDOCUMENTED)
     476             q^extract_keyphrase_options/.*/^, \$self->{'kea_options'}, #no extra options (UNDOCUMENTED)
    321477             qq^input_encoding/$enc/auto^, \$self->{'input_encoding'},
    322478             qq^default_encoding/$denc/utf8^, \$self->{'default_encoding'},
Note: See TracChangeset for help on using the changeset viewer.