Context Navigation

← Previous Changeset
Next Changeset →

Changeset 4750

Timestamp:

2003-06-23T11:52:31+12:00 (21 years ago)

Author:

mdewsnip

Message:

Improved formatting of usage texts automatically generated from John's option data structures.

File:

: 1 edited

trunk/gsdl/perllib/plugins/BasPlug.pm (modified) (6 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/perllib/plugins/BasPlug.pm

-              r4746
+              r4750
         'desc'     => "Base class for all the import plugins.",
         'inherits' => "No",
+        'args'     => $arguments,
+        'process_exp' => "",
+        'block_exp' => "" };
+        'args'     => $arguments };
 sub print_xml_usage {
 …
 sub print_usage_new
+sub new_print_usage
+{
     local $self = shift(@_);
+    local $optionlist = $self->{'option_list'};
+    local $pluginoptions = pop(@$optionlist);
+    return if (!defined($pluginoptions));
+    local $pluginname = $pluginoptions->{'name'};
+    # Print the usage message for a plugin (recursively)
+    local $descoffset = $self->determine_description_offset(0);
+    $self->print_plugin_usage($descoffset, 1);
+}
+sub determine_description_offset
+{
+    local $self = shift(@_);
+    local $maxoffset = shift(@_);
+    local $optionlistref = $self->{'option_list'};
+    local @optionlist = @$optionlistref;
+    local $pluginoptions = pop(@$optionlistref);
+    return $maxoffset if (!defined($pluginoptions));
+    # Find the length of the longest option string of this plugin
     local $pluginargs = $pluginoptions->{'args'};
-    # Produce the usage information using the data structure above
-    print STDERR " usage: plugin $pluginname";
     if (defined($pluginargs)) {
-    print STDERR " [options]";
+    }
-    print STDERR "\n\n";
-    # Display the plugin options, if there are some
-    if (defined($pluginargs)) {
-    # Find the length of the longest option string
-    local $maxlength = 0;
     foreach $option (@$pluginargs) {
         local $optionname = $option->{'name'};
         local $optiontype = $option->{'type'};
         local $optionstringlength = length($optionname);
+        local $optiondescoffset = 3 + length($optionname);
         if ($optiontype ne "flag") {
         $optionstringlength = $optionstringlength + 3 + length($optiontype);
+        $optiondescoffset = $optiondescoffset + 2 + length($optiontype) + 1;
+        }
         # Remember the longest
         if ($optionstringlength > $maxlength) {
         $maxlength = $optionstringlength;
+        if ($optiondescoffset > $maxoffset) {
+        $maxoffset = $optiondescoffset;
+        }
+    }
+    }
+    # Recurse up the plugin hierarchy
+    $maxoffset = $self->determine_description_offset($maxoffset);
+    $self->{'option_list'} = \@optionlist;
+    return $maxoffset;
+}
+sub print_plugin_usage
+{
+    local $self = shift(@_);
+    local $descoffset = shift(@_);
+    local $isleafclass = shift(@_);
+    local $optionlistref = $self->{'option_list'};
+    local @optionlist = @$optionlistref;
+    local $pluginoptions = pop(@$optionlistref);
+    return if (!defined($pluginoptions));
+    local $pluginname = $pluginoptions->{'name'};
+    local $pluginargs = $pluginoptions->{'args'};
+    # Produce the usage information using the data structure above
+    if ($isleafclass) {
+    print STDERR " usage: plugin $pluginname [options]\n\n";
+    }
+    # Display the plugin options, if there are some
+    if (defined($pluginargs)) {
     # Calculate the column offset of the option descriptions
+    local $optiondescoffset = 3 + $maxlength + 2;
+    local $optiondescoffset = $descoffset + 2;  # 2 spaces between options & descriptions
+    if ($isleafclass) {
+        print STDERR " specific options:\n";
+    }
+    else {
+        print STDERR " general options (from $pluginname):\n";
+    }
     # Display the plugin options
-    print STDERR " options:\n";
     foreach $option (@$pluginargs) {
         # Display option name
         local $optionname = $option->{'name'};
         print STDERR "  -$optionname";
         local $optionstringlength = 3 + length($optionname);
+        local $optionstringlength = length("  -$optionname");
         # Display option type, if the option is not a flag
         local $optiontype = $option->{'type'};
         if ($optiontype ne "flag") {
         print STDERR " <$optiontype>";
         $optionstringlength = $optionstringlength + (2 + length($optiontype) + 1);
+        $optionstringlength = $optionstringlength + length(" <$optiontype>");
+        }
 …
             local $encodingname = $enc;
             print STDERR " " x $optiondescoffset;
             print STDERR "$enc:";
+            print STDERR "$encodingname:";
             local $encodingdesc = $e->{$enc}->{'name'};
 …
+    }
+    # If the plugin inherits from another, do the parent now
+    if (defined($optionlist)) {
+    $self->print_usage_new();
+    }
+    # Recurse up the plugin hierarchy
+    $self->print_plugin_usage($descoffset, 0);
+    $self->{'option_list'} = \@optionlist;
+}
 …
     # Write the word
     print STDERR " " . $word;
     $linelength = $linelength + (length($word) + 1);
+    print STDERR " $word";
+    $linelength = $linelength + length(" $word");
+    }
 …
 sub print_general_usage {
     my ($plugin_name) = @_;
     print STDERR "\n  usage: plugin $plugin_name [options]\n\n";
     print STDERR "   -process_exp      A perl regular expression to match against filenames.\n";
     print STDERR "                     Matching filenames will be processed by this plugin.\n";
     print STDERR "                     Each plugin has its own default process_exp. e.g HTMLPlug\n";
     print STDERR "                     defaults to '(?i)\.html?\$' i.e. all documents ending in\n";
     print STDERR "                     .htm or .html (case-insensitive).\n\n";
     print STDERR "   -block_exp        Files matching this regular expression will be blocked from\n";
     print STDERR "                     being passed to any later plugins in the list. This has no\n";
     print STDERR "                     real effect other than to prevent lots of warning messages\n";
     print STDERR "                     about input files you don't care about. Each plugin might\n";
     print STDERR "                     have a default block_exp. e.g. by default HTMLPlug blocks\n";
     print STDERR "                     any files with .gif, .jpg, .jpeg, .png or .css\n";
     print STDERR "                     file extensions.\n\n";
     print STDERR "   -input_encoding   The encoding of the source documents. Documents will be\n";
     print STDERR "                     converted from these encodings and stored internally as\n";
     print STDERR "                     utf8. The default input_encoding is 'auto'. Accepted values\n";
     print STDERR "                     are:\n";
     print STDERR "                       auto: Use text categorization algorithm to automatically\n";
     print STDERR "                         identify the encoding of each source document. This\n";
     print STDERR "                         will be slower than explicitly setting the encoding\n";
     print STDERR "                         but will work where more than one encoding is used\n";
     print STDERR "                         within the same collection.\n";
     print STDERR "                       ascii: Plain 7 bit ascii. This may be a bit faster than\n";
     print STDERR "                         using iso_8859_1. Beware of using this on a collection\n";
     print STDERR "                         of documents that may contain characters outside the\n";
     print STDERR "                         plain 7 bit ascii set though (e.g. German or French\n";
     print STDERR "                         documents containing accents), use iso_8859_1 instead.\n";
     print STDERR "                       utf8: either utf8 or unicode -- automatically detected\n";
     print STDERR "                       unicode: just unicode\n";
     my $e = $encodings::encodings;
     foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {
     print STDERR "                       $enc: $e->{$enc}->{'name'}\n";
+    }
     print STDERR "\n";
     print STDERR "   -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n";
     print STDERR "                     the text categorization algorithm fails to extract the\n";
     print STDERR "                     encoding or extracts an encoding unsupported by Greenstone.\n";
     print STDERR "                     The default is iso_8859_1.\n\n";
     print STDERR "   -extract_language Identify the language of each document and set 'Language'\n";
     print STDERR "                      metadata. Note that this will be done automatically if\n";
     print STDERR "                     -input_encoding is 'auto'.\n\n";
     print STDERR "   -default_language If Greenstone fails to work out what language a document is\n";
     print STDERR "                     the 'Language' metadata element will be set to this value.\n";
     print STDERR "                     The default is 'en' (ISO 639 language symbols are used:\n";
     print STDERR "                     en = English). Note that if -input_encoding is not set to\n";
     print STDERR "                     'auto' and -extract_language is not set, all documents will\n";
     print STDERR "                     have their 'Language' metadata set to this value.\n\n";
     print STDERR "   -extract_acronyms Extract acronyms from within text and set as metadata\n";
     print STDERR "   -markup_acronyms  Add acronym metadata into document text\n\n";
     print STDERR "   -first            Comma separated list of first sizes to extract from the\n";
     print STDERR "                     text into a metadata field. The field is called 'FirstNNN'.\n\n";
     print STDERR "   -extract_email    Extract email addresses as metadata\n\n";
     print STDERR "   -extract_historical_years Extract time-period information from historical\n";
     print STDERR "                     documents.  This is stored as metadata with the document.\n";
     print STDERR "                     There is a search interface for this metadata, which you \n";
     print STDERR "                     can include in your collection by adding the statement:\n";
     print STDERR "                           format QueryInterface DateSearch\n";
     print STDERR "                     to your collection configuration file\n";
     print STDERR "   -maximum_year     The maximum historical date to be used as metadata (in a\n";
     print STDERR "                     Common Era date, such as 1950)\n";
     print STDERR "   -maximum_century  The maximum named century to be extracted as historical\n";
     print STDERR "                     metadata (e.g. 14 will extract all references up to the\n";
     print STDERR "                     14th century)\n";
     print STDERR "   -no_bibliography  Do not try and block bibliographic dates when extracting\n";
     print STDERR "                     historical dates.\n";
     print STDERR "   -cover_image      Will look for a prefix.jpg file (where prefix is the same\n";
     print STDERR "                     prefix as the file being processed) and associate it as a\n";
     print STDERR "                     cover image\n\n";
+}
+#  sub print_general_usage {
+#      my ($plugin_name) = @_;
+#      print STDERR "\n  usage: plugin $plugin_name [options]\n\n";
+#      print STDERR "   -process_exp      A perl regular expression to match against filenames.\n";
+#      print STDERR "                     Matching filenames will be processed by this plugin.\n";
+#      print STDERR "                     Each plugin has its own default process_exp. e.g HTMLPlug\n";
+#      print STDERR "                     defaults to '(?i)\.html?\$' i.e. all documents ending in\n";
+#      print STDERR "                     .htm or .html (case-insensitive).\n\n";
+#      print STDERR "   -block_exp        Files matching this regular expression will be blocked from\n";
+#      print STDERR "                     being passed to any later plugins in the list. This has no\n";
+#      print STDERR "                     real effect other than to prevent lots of warning messages\n";
+#      print STDERR "                     about input files you don't care about. Each plugin might\n";
+#      print STDERR "                     have a default block_exp. e.g. by default HTMLPlug blocks\n";
+#      print STDERR "                     any files with .gif, .jpg, .jpeg, .png or .css\n";
+#      print STDERR "                     file extensions.\n\n";
+#      print STDERR "   -input_encoding   The encoding of the source documents. Documents will be\n";
+#      print STDERR "                     converted from these encodings and stored internally as\n";
+#      print STDERR "                     utf8. The default input_encoding is 'auto'. Accepted values\n";
+#      print STDERR "                     are:\n";
+#      print STDERR "                       auto: Use text categorization algorithm to automatically\n";
+#      print STDERR "                         identify the encoding of each source document. This\n";
+#      print STDERR "                         will be slower than explicitly setting the encoding\n";
+#      print STDERR "                         but will work where more than one encoding is used\n";
+#      print STDERR "                         within the same collection.\n";
+#      print STDERR "                       ascii: Plain 7 bit ascii. This may be a bit faster than\n";
+#      print STDERR "                         using iso_8859_1. Beware of using this on a collection\n";
+#      print STDERR "                         of documents that may contain characters outside the\n";
+#      print STDERR "                         plain 7 bit ascii set though (e.g. German or French\n";
+#      print STDERR "                         documents containing accents), use iso_8859_1 instead.\n";
+#      print STDERR "                       utf8: either utf8 or unicode -- automatically detected\n";
+#      print STDERR "                       unicode: just unicode\n";
+#      my $e = $encodings::encodings;
+#      foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {
+#   print STDERR "                       $enc: $e->{$enc}->{'name'}\n";
+#      }
+#      print STDERR "\n";
+#      print STDERR "   -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n";
+#      print STDERR "                     the text categorization algorithm fails to extract the\n";
+#      print STDERR "                     encoding or extracts an encoding unsupported by Greenstone.\n";
+#      print STDERR "                     The default is iso_8859_1.\n\n";
+#      print STDERR "   -extract_language Identify the language of each document and set 'Language'\n";
+#      print STDERR "                      metadata. Note that this will be done automatically if\n";
+#      print STDERR "                     -input_encoding is 'auto'.\n\n";
+#      print STDERR "   -default_language If Greenstone fails to work out what language a document is\n";
+#      print STDERR "                     the 'Language' metadata element will be set to this value.\n";
+#      print STDERR "                     The default is 'en' (ISO 639 language symbols are used:\n";
+#      print STDERR "                     en = English). Note that if -input_encoding is not set to\n";
+#      print STDERR "                     'auto' and -extract_language is not set, all documents will\n";
+#      print STDERR "                     have their 'Language' metadata set to this value.\n\n";
+#      print STDERR "   -extract_acronyms Extract acronyms from within text and set as metadata\n";
+#      print STDERR "   -markup_acronyms  Add acronym metadata into document text\n\n";
+#      print STDERR "   -first            Comma separated list of first sizes to extract from the\n";
+#      print STDERR "                     text into a metadata field. The field is called 'FirstNNN'.\n\n";
+#      print STDERR "   -extract_email    Extract email addresses as metadata\n\n";
+#      print STDERR "   -extract_historical_years Extract time-period information from historical\n";
+#      print STDERR "                     documents.  This is stored as metadata with the document.\n";
+#      print STDERR "                     There is a search interface for this metadata, which you \n";
+#      print STDERR "                     can include in your collection by adding the statement:\n";
+#      print STDERR "                           format QueryInterface DateSearch\n";
+#      print STDERR "                     to your collection configuration file\n";
+#      print STDERR "   -maximum_year     The maximum historical date to be used as metadata (in a\n";
+#      print STDERR "                     Common Era date, such as 1950)\n";
+#      print STDERR "   -maximum_century  The maximum named century to be extracted as historical\n";
+#      print STDERR "                     metadata (e.g. 14 will extract all references up to the\n";
+#      print STDERR "                     14th century)\n";
+#      print STDERR "   -no_bibliography  Do not try and block bibliographic dates when extracting\n";
+#      print STDERR "                     historical dates.\n";
+#      print STDERR "   -cover_image      Will look for a prefix.jpg file (where prefix is the same\n";
+#      print STDERR "                     prefix as the file being processed) and associate it as a\n";
+#      print STDERR "                     cover image\n\n";
+#  }
 # print_usage should be overridden for any sub-classes having
 # their own plugin specific options
 sub print_usage {
     print STDERR "\nThis plugin has no plugin specific options\n\n";
+}
+# sub print_usage {
+#     print STDERR "\nThis plugin has no plugin specific options\n\n";
+# }
 sub new {

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 4750

Legend:

trunk/gsdl/perllib/plugins/BasPlug.pm

Download in other formats: