Changeset 4750


Ignore:
Timestamp:
2003-06-23T11:52:31+12:00 (21 years ago)
Author:
mdewsnip
Message:

Improved formatting of usage texts automatically generated from John's option data structures.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r4746 r4750  
    127127        'desc'     => "Base class for all the import plugins.",
    128128        'inherits' => "No",
    129         'args'     => $arguments,
    130         'process_exp' => "",
    131         'block_exp' => "" };
     129        'args'     => $arguments };
    132130
    133131sub print_xml_usage {
     
    200198
    201199
    202 sub print_usage_new
     200sub new_print_usage
    203201{
    204202    local $self = shift(@_);
    205     local $optionlist = $self->{'option_list'};
    206     local $pluginoptions = pop(@$optionlist);
    207     return if (!defined($pluginoptions));
    208 
    209     local $pluginname = $pluginoptions->{'name'};
     203
     204    # Print the usage message for a plugin (recursively)
     205    local $descoffset = $self->determine_description_offset(0);
     206    $self->print_plugin_usage($descoffset, 1);
     207}
     208
     209
     210sub determine_description_offset
     211{
     212    local $self = shift(@_);
     213    local $maxoffset = shift(@_);
     214
     215    local $optionlistref = $self->{'option_list'};
     216    local @optionlist = @$optionlistref;
     217    local $pluginoptions = pop(@$optionlistref);
     218    return $maxoffset if (!defined($pluginoptions));
     219
     220    # Find the length of the longest option string of this plugin
    210221    local $pluginargs = $pluginoptions->{'args'};
    211 
    212     # Produce the usage information using the data structure above
    213     print STDERR " usage: plugin $pluginname";
    214222    if (defined($pluginargs)) {
    215     print STDERR " [options]";
    216     }
    217     print STDERR "\n\n";
    218 
    219     # Display the plugin options, if there are some
    220     if (defined($pluginargs)) {
    221     # Find the length of the longest option string
    222     local $maxlength = 0;
    223223    foreach $option (@$pluginargs) {
    224224        local $optionname = $option->{'name'};
    225225        local $optiontype = $option->{'type'};
    226226
    227         local $optionstringlength = length($optionname);
     227        local $optiondescoffset = 3 + length($optionname);
    228228        if ($optiontype ne "flag") {
    229         $optionstringlength = $optionstringlength + 3 + length($optiontype);
     229        $optiondescoffset = $optiondescoffset + 2 + length($optiontype) + 1;
    230230        }
    231231
    232232        # Remember the longest
    233         if ($optionstringlength > $maxlength) {
    234         $maxlength = $optionstringlength;
     233        if ($optiondescoffset > $maxoffset) {
     234        $maxoffset = $optiondescoffset;
    235235        }
    236236    }
    237 
     237    }
     238
     239    # Recurse up the plugin hierarchy
     240    $maxoffset = $self->determine_description_offset($maxoffset);
     241    $self->{'option_list'} = \@optionlist;
     242    return $maxoffset;
     243}
     244
     245
     246sub print_plugin_usage
     247{
     248    local $self = shift(@_);
     249    local $descoffset = shift(@_);
     250    local $isleafclass = shift(@_);
     251
     252    local $optionlistref = $self->{'option_list'};
     253    local @optionlist = @$optionlistref;
     254    local $pluginoptions = pop(@$optionlistref);
     255    return if (!defined($pluginoptions));
     256
     257    local $pluginname = $pluginoptions->{'name'};
     258    local $pluginargs = $pluginoptions->{'args'};
     259
     260    # Produce the usage information using the data structure above
     261    if ($isleafclass) {
     262    print STDERR " usage: plugin $pluginname [options]\n\n";
     263    }
     264
     265    # Display the plugin options, if there are some
     266    if (defined($pluginargs)) {
    238267    # Calculate the column offset of the option descriptions
    239     local $optiondescoffset = 3 + $maxlength + 2;
     268    local $optiondescoffset = $descoffset + 2;  # 2 spaces between options & descriptions
     269
     270    if ($isleafclass) {
     271        print STDERR " specific options:\n";
     272    }
     273    else {
     274        print STDERR " general options (from $pluginname):\n";
     275    }
    240276
    241277    # Display the plugin options
    242     print STDERR " options:\n";
    243278    foreach $option (@$pluginargs) {
    244279        # Display option name
    245280        local $optionname = $option->{'name'};
    246281        print STDERR "  -$optionname";
    247         local $optionstringlength = 3 + length($optionname);
    248  
     282        local $optionstringlength = length("  -$optionname");
     283
    249284        # Display option type, if the option is not a flag
    250285        local $optiontype = $option->{'type'};
    251286        if ($optiontype ne "flag") {
    252287        print STDERR " <$optiontype>";
    253         $optionstringlength = $optionstringlength + (2 + length($optiontype) + 1);
     288        $optionstringlength = $optionstringlength + length(" <$optiontype>");
    254289        }
    255290
     
    286321            local $encodingname = $enc;
    287322            print STDERR " " x $optiondescoffset;
    288             print STDERR "$enc:";
     323            print STDERR "$encodingname:";
    289324
    290325            local $encodingdesc = $e->{$enc}->{'name'};
     
    299334    }
    300335
    301     # If the plugin inherits from another, do the parent now
    302     if (defined($optionlist)) {
    303     $self->print_usage_new();
    304     }
     336    # Recurse up the plugin hierarchy
     337    $self->print_plugin_usage($descoffset, 0);
     338    $self->{'option_list'} = \@optionlist;
    305339}
    306340
     
    338372
    339373    # Write the word
    340     print STDERR " " . $word;
    341     $linelength = $linelength + (length($word) + 1);
     374    print STDERR " $word";
     375    $linelength = $linelength + length(" $word");
    342376    }
    343377
     
    346380
    347381
    348 sub print_general_usage {
    349     my ($plugin_name) = @_;
    350 
    351     print STDERR "\n  usage: plugin $plugin_name [options]\n\n";
    352 
    353     print STDERR "   -process_exp      A perl regular expression to match against filenames.\n";
    354     print STDERR "                     Matching filenames will be processed by this plugin.\n";
    355     print STDERR "                     Each plugin has its own default process_exp. e.g HTMLPlug\n";
    356     print STDERR "                     defaults to '(?i)\.html?\$' i.e. all documents ending in\n";
    357     print STDERR "                     .htm or .html (case-insensitive).\n\n";
    358 
    359     print STDERR "   -block_exp        Files matching this regular expression will be blocked from\n";
    360     print STDERR "                     being passed to any later plugins in the list. This has no\n";
    361     print STDERR "                     real effect other than to prevent lots of warning messages\n";
    362     print STDERR "                     about input files you don't care about. Each plugin might\n";
    363     print STDERR "                     have a default block_exp. e.g. by default HTMLPlug blocks\n";
    364     print STDERR "                     any files with .gif, .jpg, .jpeg, .png or .css\n";
    365     print STDERR "                     file extensions.\n\n";
    366 
    367 
    368     print STDERR "   -input_encoding   The encoding of the source documents. Documents will be\n";
    369     print STDERR "                     converted from these encodings and stored internally as\n";
    370     print STDERR "                     utf8. The default input_encoding is 'auto'. Accepted values\n";
    371     print STDERR "                     are:\n";
    372 
    373     print STDERR "                       auto: Use text categorization algorithm to automatically\n";
    374     print STDERR "                         identify the encoding of each source document. This\n";
    375     print STDERR "                         will be slower than explicitly setting the encoding\n";
    376     print STDERR "                         but will work where more than one encoding is used\n";
    377     print STDERR "                         within the same collection.\n";
    378 
    379     print STDERR "                       ascii: Plain 7 bit ascii. This may be a bit faster than\n";
    380     print STDERR "                         using iso_8859_1. Beware of using this on a collection\n";
    381     print STDERR "                         of documents that may contain characters outside the\n";
    382     print STDERR "                         plain 7 bit ascii set though (e.g. German or French\n";
    383     print STDERR "                         documents containing accents), use iso_8859_1 instead.\n";
    384 
    385     print STDERR "                       utf8: either utf8 or unicode -- automatically detected\n";
    386     print STDERR "                       unicode: just unicode\n";
    387 
    388     my $e = $encodings::encodings;
    389     foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {
    390     print STDERR "                       $enc: $e->{$enc}->{'name'}\n";
    391     }
    392     print STDERR "\n";
    393     print STDERR "   -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n";
    394     print STDERR "                     the text categorization algorithm fails to extract the\n";
    395     print STDERR "                     encoding or extracts an encoding unsupported by Greenstone.\n";
    396     print STDERR "                     The default is iso_8859_1.\n\n";
    397 
    398     print STDERR "   -extract_language Identify the language of each document and set 'Language'\n";
    399     print STDERR "                      metadata. Note that this will be done automatically if\n";
    400     print STDERR "                     -input_encoding is 'auto'.\n\n";
    401     print STDERR "   -default_language If Greenstone fails to work out what language a document is\n";
    402     print STDERR "                     the 'Language' metadata element will be set to this value.\n";
    403     print STDERR "                     The default is 'en' (ISO 639 language symbols are used:\n";
    404     print STDERR "                     en = English). Note that if -input_encoding is not set to\n";
    405     print STDERR "                     'auto' and -extract_language is not set, all documents will\n";
    406     print STDERR "                     have their 'Language' metadata set to this value.\n\n";
    407 
    408     print STDERR "   -extract_acronyms Extract acronyms from within text and set as metadata\n";
    409 
    410     print STDERR "   -markup_acronyms  Add acronym metadata into document text\n\n";
    411 
    412     print STDERR "   -first            Comma separated list of first sizes to extract from the\n";
    413     print STDERR "                     text into a metadata field. The field is called 'FirstNNN'.\n\n";
    414 
    415     print STDERR "   -extract_email    Extract email addresses as metadata\n\n";
    416 
    417     print STDERR "   -extract_historical_years Extract time-period information from historical\n";
    418     print STDERR "                     documents.  This is stored as metadata with the document.\n";
    419     print STDERR "                     There is a search interface for this metadata, which you \n";
    420     print STDERR "                     can include in your collection by adding the statement:\n";
    421     print STDERR "                           format QueryInterface DateSearch\n";
    422     print STDERR "                     to your collection configuration file\n";
    423     print STDERR "   -maximum_year     The maximum historical date to be used as metadata (in a\n";
    424     print STDERR "                     Common Era date, such as 1950)\n";
    425     print STDERR "   -maximum_century  The maximum named century to be extracted as historical\n";
    426     print STDERR "                     metadata (e.g. 14 will extract all references up to the\n";
    427     print STDERR "                     14th century)\n";
    428     print STDERR "   -no_bibliography  Do not try and block bibliographic dates when extracting\n";
    429     print STDERR "                     historical dates.\n";
    430     print STDERR "   -cover_image      Will look for a prefix.jpg file (where prefix is the same\n";
    431     print STDERR "                     prefix as the file being processed) and associate it as a\n";
    432     print STDERR "                     cover image\n\n";
    433 }
     382sub print_general_usage {
     383    my ($plugin_name) = @_;
     384
     385    print STDERR "\n  usage: plugin $plugin_name [options]\n\n";
     386
     387    print STDERR "   -process_exp      A perl regular expression to match against filenames.\n";
     388    print STDERR "                     Matching filenames will be processed by this plugin.\n";
     389    print STDERR "                     Each plugin has its own default process_exp. e.g HTMLPlug\n";
     390    print STDERR "                     defaults to '(?i)\.html?\$' i.e. all documents ending in\n";
     391    print STDERR "                     .htm or .html (case-insensitive).\n\n";
     392
     393    print STDERR "   -block_exp        Files matching this regular expression will be blocked from\n";
     394    print STDERR "                     being passed to any later plugins in the list. This has no\n";
     395    print STDERR "                     real effect other than to prevent lots of warning messages\n";
     396    print STDERR "                     about input files you don't care about. Each plugin might\n";
     397    print STDERR "                     have a default block_exp. e.g. by default HTMLPlug blocks\n";
     398    print STDERR "                     any files with .gif, .jpg, .jpeg, .png or .css\n";
     399    print STDERR "                     file extensions.\n\n";
     400
     401
     402    print STDERR "   -input_encoding   The encoding of the source documents. Documents will be\n";
     403    print STDERR "                     converted from these encodings and stored internally as\n";
     404    print STDERR "                     utf8. The default input_encoding is 'auto'. Accepted values\n";
     405    print STDERR "                     are:\n";
     406
     407    print STDERR "                       auto: Use text categorization algorithm to automatically\n";
     408    print STDERR "                         identify the encoding of each source document. This\n";
     409    print STDERR "                         will be slower than explicitly setting the encoding\n";
     410    print STDERR "                         but will work where more than one encoding is used\n";
     411    print STDERR "                         within the same collection.\n";
     412
     413    print STDERR "                       ascii: Plain 7 bit ascii. This may be a bit faster than\n";
     414    print STDERR "                         using iso_8859_1. Beware of using this on a collection\n";
     415    print STDERR "                         of documents that may contain characters outside the\n";
     416    print STDERR "                         plain 7 bit ascii set though (e.g. German or French\n";
     417    print STDERR "                         documents containing accents), use iso_8859_1 instead.\n";
     418
     419    print STDERR "                       utf8: either utf8 or unicode -- automatically detected\n";
     420    print STDERR "                       unicode: just unicode\n";
     421
     422    my $e = $encodings::encodings;
     423    foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {
     424 print STDERR "                       $enc: $e->{$enc}->{'name'}\n";
     425    }
     426    print STDERR "\n";
     427    print STDERR "   -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n";
     428    print STDERR "                     the text categorization algorithm fails to extract the\n";
     429    print STDERR "                     encoding or extracts an encoding unsupported by Greenstone.\n";
     430    print STDERR "                     The default is iso_8859_1.\n\n";
     431
     432    print STDERR "   -extract_language Identify the language of each document and set 'Language'\n";
     433    print STDERR "                      metadata. Note that this will be done automatically if\n";
     434    print STDERR "                     -input_encoding is 'auto'.\n\n";
     435    print STDERR "   -default_language If Greenstone fails to work out what language a document is\n";
     436    print STDERR "                     the 'Language' metadata element will be set to this value.\n";
     437    print STDERR "                     The default is 'en' (ISO 639 language symbols are used:\n";
     438    print STDERR "                     en = English). Note that if -input_encoding is not set to\n";
     439    print STDERR "                     'auto' and -extract_language is not set, all documents will\n";
     440    print STDERR "                     have their 'Language' metadata set to this value.\n\n";
     441
     442    print STDERR "   -extract_acronyms Extract acronyms from within text and set as metadata\n";
     443
     444    print STDERR "   -markup_acronyms  Add acronym metadata into document text\n\n";
     445
     446    print STDERR "   -first            Comma separated list of first sizes to extract from the\n";
     447    print STDERR "                     text into a metadata field. The field is called 'FirstNNN'.\n\n";
     448
     449    print STDERR "   -extract_email    Extract email addresses as metadata\n\n";
     450
     451    print STDERR "   -extract_historical_years Extract time-period information from historical\n";
     452    print STDERR "                     documents.  This is stored as metadata with the document.\n";
     453    print STDERR "                     There is a search interface for this metadata, which you \n";
     454    print STDERR "                     can include in your collection by adding the statement:\n";
     455    print STDERR "                           format QueryInterface DateSearch\n";
     456    print STDERR "                     to your collection configuration file\n";
     457    print STDERR "   -maximum_year     The maximum historical date to be used as metadata (in a\n";
     458    print STDERR "                     Common Era date, such as 1950)\n";
     459    print STDERR "   -maximum_century  The maximum named century to be extracted as historical\n";
     460    print STDERR "                     metadata (e.g. 14 will extract all references up to the\n";
     461    print STDERR "                     14th century)\n";
     462    print STDERR "   -no_bibliography  Do not try and block bibliographic dates when extracting\n";
     463    print STDERR "                     historical dates.\n";
     464    print STDERR "   -cover_image      Will look for a prefix.jpg file (where prefix is the same\n";
     465    print STDERR "                     prefix as the file being processed) and associate it as a\n";
     466    print STDERR "                     cover image\n\n";
     467}
    434468
    435469# print_usage should be overridden for any sub-classes having
    436470# their own plugin specific options
    437 sub print_usage {
    438     print STDERR "\nThis plugin has no plugin specific options\n\n";
    439 }
     471# sub print_usage {
     472#     print STDERR "\nThis plugin has no plugin specific options\n\n";
     473# }
    440474
    441475sub new {
Note: See TracChangeset for help on using the changeset viewer.