Changeset 2084


Ignore:
Timestamp:
2001-02-28T16:24:57+13:00 (23 years ago)
Author:
jrm21
Message:

usage message is now formatted to fit within 80 columns.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/BasPlug.pm

    r1999 r2084  
    4848
    4949    print STDERR "   -block_exp        Files matching this regular expression will be blocked from\n";
    50     print STDERR "                     being passed to any further plugins in the list. This has no\n";
     50    print STDERR "                     being passed to any later plugins in the list. This has no\n";
    5151    print STDERR "                     real effect other than to prevent lots of warning messages\n";
    52     print STDERR "                     about input files you don't care about. Each plugin may or may\n";
    53     print STDERR "                     not have a default block_exp. e.g. by default HTMLPlug blocks\n";
     52    print STDERR "                     about input files you don't care about. Each plugin might\n";
     53    print STDERR "                     have a default block_exp. e.g. by default HTMLPlug blocks\n";
    5454    print STDERR "                     any files with .gif, .jpg, .jpeg, .png, .rtf or .css\n";
    5555    print STDERR "                     file extensions.\n\n";
     
    6767    print STDERR "                         within the same collection.\n";
    6868
    69     print STDERR "                       ascii: Plain 7 bit ascii. This may be a little faster than\n";
    70     print STDERR "                         using iso_8859_1. Beware of using 'ascii' on a collection\n";
    71     print STDERR "                         of documents that may contain characters outside of plain\n";
    72     print STDERR "                         7 bit ascii though (e.g. German or French documents\n";
    73     print STDERR "                         containing accents), use iso_8859_1 instead.\n";
     69    print STDERR "                       ascii: Plain 7 bit ascii. This may be a bit faster than\n";
     70    print STDERR "                         using iso_8859_1. Beware of using this on a collection\n";
     71    print STDERR "                         of documents that may contain characters outside the\n";
     72    print STDERR "                         plain 7 bit ascii set though (e.g. German or French\n";
     73    print STDERR "                         documents containing accents), use iso_8859_1 instead.\n";
    7474
    7575    print STDERR "                       utf8: either utf8 or unicode -- automatically detected\n";
     
    8080    print STDERR "                       $enc: $e->{$enc}->{'name'}\n";
    8181    }
    82 
    83     print STDERR "   -default_encoding If -input_encoding is set to 'auto' and the text categorization\n";
    84     print STDERR "                     algorithm fails to extract the encoding or extracts an encoding\n";
    85     print STDERR "                     that is not supported by Greenstone, this encoding will be used\n";
    86     print STDERR "                     instead. The default is iso_8859_1\n\n";
    87 
    88     print STDERR "   -extract_language Identify the language of each document and set 'Language' metadata. Note\n";
    89     print STDERR "                     that this will be done automatically if -input_encoding is 'auto'.\n";
    90     print STDERR "   -default_language If Greenstone fails to work out what language a document is the\n";
    91     print STDERR "                     'Language' metadata element will be set to this value. The default\n";
    92     print STDERR "                     is 'en' (ISO 639 language symbols should be used - en = English).\n";
    93     print STDERR "                     Note that if -input_encoding is not set to 'auto' and -extract_language\n";
    94     print STDERR "                     is not set, all documents will have their 'Language' metadata set to\n";
    95     print STDERR "                     this value.\n\n";
     82    print STDERR "\n";
     83    print STDERR "   -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n";
     84    print STDERR "                     the text categorization algorithm fails to extract the\n";
     85    print STDERR "                     encoding or extracts an encoding unsupported by Greenstone.\n";
     86    print STDERR "                     The default is iso_8859_1.\n\n";
     87
     88    print STDERR "   -extract_language Identify the language of each document and set 'Language'\n";
     89    print STDERR "                      metadata. Note that this will be done automatically if\n";
     90    print STDERR "                     -input_encoding is 'auto'.\n\n";
     91    print STDERR "   -default_language If Greenstone fails to work out what language a document is\n";
     92    print STDERR "                     the 'Language' metadata element will be set to this value.\n";
     93    print STDERR "                     The default is 'en' (ISO 639 language symbols are used:\n";
     94    print STDERR "                     en = English). Note that if -input_encoding is not set to\n";
     95    print STDERR "                     'auto' and -extract_language is not set, all documents will\n";
     96    print STDERR "                     have their 'Language' metadata set to this value.\n\n";
    9697
    9798    print STDERR "   -extract_acronyms Extract acronyms from within text and set as metadata\n";
     
    99100    print STDERR "   -markup_acronyms  Add acronym metadata into document text\n\n";
    100101
    101     print STDERR "   -first            Comma seperated list of first sizes to extract from the text\n";
    102     print STDERR "                     into a metadata field. The fields are called 'FirstNNN'.\n\n";
     102    print STDERR "   -first            Comma separated list of first sizes to extract from the\n";
     103    print STDERR "                     text into a metadata field. The field is called 'FirstNNN'.\n\n";
    103104
    104105    print STDERR "   -extract_email    Extract email addresses as metadata\n\n";
    105106
    106     print STDERR "   -extract_date     Extract dates pertaining to the content of documents about history\n";
    107     print STDERR "   -maximum_date     The maximum historical date to be used as metadata (in a Common Era\n";
    108     print STDERR "                     date such as 1950)\n";
    109     print STDERR "   -maximum_century  The maximum named century to be extracted as historical metadata\n";
    110     print STDERR "                     (e.g. 14 will extract all references up to the 14th century)\n";
    111     print STDERR "   -no_bibliography  Do not try and block bibliographic dates when extracting historical dates.\n\n";
     107    print STDERR "   -extract_date     Extract dates pertaining to the content of documents about\n";
     108    print STDERR "                     history\n";
     109    print STDERR "   -maximum_date     The maximum historical date to be used as metadata (in a\n";
     110    print STDERR "                     Common Era date, such as 1950)\n";
     111    print STDERR "   -maximum_century  The maximum named century to be extracted as historical\n";
     112    print STDERR "                     metadata (e.g. 14 will extract all references up to the\n";
     113    print STDERR "                     14th century)\n";
     114    print STDERR "   -no_bibliography  Do not try and block bibliographic dates when extracting\n";
     115    print STDERR "                     historical dates.\n\n";
    112116}
    113117
Note: See TracChangeset for help on using the changeset viewer.