Context Navigation

← Previous Changeset
Next Changeset →

Changeset 2084

Timestamp:

2001-02-28T16:24:57+13:00 (23 years ago)

Author:

jrm21

Message:

usage message is now formatted to fit within 80 columns.

File:

: 1 edited

trunk/gsdl/perllib/plugins/BasPlug.pm (modified) (4 diffs)

Legend:

: Unmodified
: Added
: Removed

trunk/gsdl/perllib/plugins/BasPlug.pm

-              r1999
+              r2084
     print STDERR "   -block_exp        Files matching this regular expression will be blocked from\n";
     print STDERR "                     being passed to any further plugins in the list. This has no\n";
+    print STDERR "                     being passed to any later plugins in the list. This has no\n";
     print STDERR "                     real effect other than to prevent lots of warning messages\n";
     print STDERR "                     about input files you don't care about. Each plugin may or may\n";
     print STDERR "                     not have a default block_exp. e.g. by default HTMLPlug blocks\n";
+    print STDERR "                     about input files you don't care about. Each plugin might\n";
+    print STDERR "                     have a default block_exp. e.g. by default HTMLPlug blocks\n";
     print STDERR "                     any files with .gif, .jpg, .jpeg, .png, .rtf or .css\n";
     print STDERR "                     file extensions.\n\n";
 …
     print STDERR "                         within the same collection.\n";
     print STDERR "                       ascii: Plain 7 bit ascii. This may be a little faster than\n";
     print STDERR "                         using iso_8859_1. Beware of using 'ascii' on a collection\n";
     print STDERR "                         of documents that may contain characters outside of plain\n";
     print STDERR "                         7 bit ascii though (e.g. German or French documents\n";
     print STDERR "                         containing accents), use iso_8859_1 instead.\n";
+    print STDERR "                       ascii: Plain 7 bit ascii. This may be a bit faster than\n";
+    print STDERR "                         using iso_8859_1. Beware of using this on a collection\n";
+    print STDERR "                         of documents that may contain characters outside the\n";
+    print STDERR "                         plain 7 bit ascii set though (e.g. German or French\n";
+    print STDERR "                         documents containing accents), use iso_8859_1 instead.\n";
     print STDERR "                       utf8: either utf8 or unicode -- automatically detected\n";
 …
     print STDERR "                       $enc: $e->{$enc}->{'name'}\n";
+    }
+    print STDERR "   -default_encoding If -input_encoding is set to 'auto' and the text categorization\n";
+    print STDERR "                     algorithm fails to extract the encoding or extracts an encoding\n";
+    print STDERR "                     that is not supported by Greenstone, this encoding will be used\n";
+    print STDERR "                     instead. The default is iso_8859_1\n\n";
+    print STDERR "   -extract_language Identify the language of each document and set 'Language' metadata. Note\n";
+    print STDERR "                     that this will be done automatically if -input_encoding is 'auto'.\n";
+    print STDERR "   -default_language If Greenstone fails to work out what language a document is the\n";
+    print STDERR "                     'Language' metadata element will be set to this value. The default\n";
+    print STDERR "                     is 'en' (ISO 639 language symbols should be used - en = English).\n";
+    print STDERR "                     Note that if -input_encoding is not set to 'auto' and -extract_language\n";
+    print STDERR "                     is not set, all documents will have their 'Language' metadata set to\n";
+    print STDERR "                     this value.\n\n";
+    print STDERR "\n";
+    print STDERR "   -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n";
+    print STDERR "                     the text categorization algorithm fails to extract the\n";
+    print STDERR "                     encoding or extracts an encoding unsupported by Greenstone.\n";
+    print STDERR "                     The default is iso_8859_1.\n\n";
+    print STDERR "   -extract_language Identify the language of each document and set 'Language'\n";
+    print STDERR "                      metadata. Note that this will be done automatically if\n";
+    print STDERR "                     -input_encoding is 'auto'.\n\n";
+    print STDERR "   -default_language If Greenstone fails to work out what language a document is\n";
+    print STDERR "                     the 'Language' metadata element will be set to this value.\n";
+    print STDERR "                     The default is 'en' (ISO 639 language symbols are used:\n";
+    print STDERR "                     en = English). Note that if -input_encoding is not set to\n";
+    print STDERR "                     'auto' and -extract_language is not set, all documents will\n";
+    print STDERR "                     have their 'Language' metadata set to this value.\n\n";
     print STDERR "   -extract_acronyms Extract acronyms from within text and set as metadata\n";
 …
     print STDERR "   -markup_acronyms  Add acronym metadata into document text\n\n";
     print STDERR "   -first            Comma seperated list of first sizes to extract from the text\n";
     print STDERR "                     into a metadata field. The fields are called 'FirstNNN'.\n\n";
+    print STDERR "   -first            Comma separated list of first sizes to extract from the\n";
+    print STDERR "                     text into a metadata field. The field is called 'FirstNNN'.\n\n";
     print STDERR "   -extract_email    Extract email addresses as metadata\n\n";
+    print STDERR "   -extract_date     Extract dates pertaining to the content of documents about history\n";
+    print STDERR "   -maximum_date     The maximum historical date to be used as metadata (in a Common Era\n";
+    print STDERR "                     date such as 1950)\n";
+    print STDERR "   -maximum_century  The maximum named century to be extracted as historical metadata\n";
+    print STDERR "                     (e.g. 14 will extract all references up to the 14th century)\n";
+    print STDERR "   -no_bibliography  Do not try and block bibliographic dates when extracting historical dates.\n\n";
+    print STDERR "   -extract_date     Extract dates pertaining to the content of documents about\n";
+    print STDERR "                     history\n";
+    print STDERR "   -maximum_date     The maximum historical date to be used as metadata (in a\n";
+    print STDERR "                     Common Era date, such as 1950)\n";
+    print STDERR "   -maximum_century  The maximum named century to be extracted as historical\n";
+    print STDERR "                     metadata (e.g. 14 will extract all references up to the\n";
+    print STDERR "                     14th century)\n";
+    print STDERR "   -no_bibliography  Do not try and block bibliographic dates when extracting\n";
+    print STDERR "                     historical dates.\n\n";
+}

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 2084

Legend:

trunk/gsdl/perllib/plugins/BasPlug.pm

Download in other formats: