Changeset 2084
- Timestamp:
- 2001-02-28T16:24:57+13:00 (23 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/BasPlug.pm
r1999 r2084 48 48 49 49 print STDERR " -block_exp Files matching this regular expression will be blocked from\n"; 50 print STDERR " being passed to any further plugins in the list. This has no\n";50 print STDERR " being passed to any later plugins in the list. This has no\n"; 51 51 print STDERR " real effect other than to prevent lots of warning messages\n"; 52 print STDERR " about input files you don't care about. Each plugin m ay or may\n";53 print STDERR " nothave a default block_exp. e.g. by default HTMLPlug blocks\n";52 print STDERR " about input files you don't care about. Each plugin might\n"; 53 print STDERR " have a default block_exp. e.g. by default HTMLPlug blocks\n"; 54 54 print STDERR " any files with .gif, .jpg, .jpeg, .png, .rtf or .css\n"; 55 55 print STDERR " file extensions.\n\n"; … … 67 67 print STDERR " within the same collection.\n"; 68 68 69 print STDERR " ascii: Plain 7 bit ascii. This may be a littlefaster than\n";70 print STDERR " using iso_8859_1. Beware of using 'ascii'on a collection\n";71 print STDERR " of documents that may contain characters outside of plain\n";72 print STDERR " 7 bit ascii though (e.g. German or French documents\n";73 print STDERR " containing accents), use iso_8859_1 instead.\n";69 print STDERR " ascii: Plain 7 bit ascii. This may be a bit faster than\n"; 70 print STDERR " using iso_8859_1. Beware of using this on a collection\n"; 71 print STDERR " of documents that may contain characters outside the\n"; 72 print STDERR " plain 7 bit ascii set though (e.g. German or French\n"; 73 print STDERR " documents containing accents), use iso_8859_1 instead.\n"; 74 74 75 75 print STDERR " utf8: either utf8 or unicode -- automatically detected\n"; … … 80 80 print STDERR " $enc: $e->{$enc}->{'name'}\n"; 81 81 } 82 83 print STDERR " -default_encoding If -input_encoding is set to 'auto' and the text categorization\n"; 84 print STDERR " algorithm fails to extract the encoding or extracts an encoding\n"; 85 print STDERR " that is not supported by Greenstone, this encoding will be used\n"; 86 print STDERR " instead. The default is iso_8859_1\n\n"; 87 88 print STDERR " -extract_language Identify the language of each document and set 'Language' metadata. Note\n"; 89 print STDERR " that this will be done automatically if -input_encoding is 'auto'.\n"; 90 print STDERR " -default_language If Greenstone fails to work out what language a document is the\n"; 91 print STDERR " 'Language' metadata element will be set to this value. The default\n"; 92 print STDERR " is 'en' (ISO 639 language symbols should be used - en = English).\n"; 93 print STDERR " Note that if -input_encoding is not set to 'auto' and -extract_language\n"; 94 print STDERR " is not set, all documents will have their 'Language' metadata set to\n"; 95 print STDERR " this value.\n\n"; 82 print STDERR "\n"; 83 print STDERR " -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n"; 84 print STDERR " the text categorization algorithm fails to extract the\n"; 85 print STDERR " encoding or extracts an encoding unsupported by Greenstone.\n"; 86 print STDERR " The default is iso_8859_1.\n\n"; 87 88 print STDERR " -extract_language Identify the language of each document and set 'Language'\n"; 89 print STDERR " metadata. Note that this will be done automatically if\n"; 90 print STDERR " -input_encoding is 'auto'.\n\n"; 91 print STDERR " -default_language If Greenstone fails to work out what language a document is\n"; 92 print STDERR " the 'Language' metadata element will be set to this value.\n"; 93 print STDERR " The default is 'en' (ISO 639 language symbols are used:\n"; 94 print STDERR " en = English). Note that if -input_encoding is not set to\n"; 95 print STDERR " 'auto' and -extract_language is not set, all documents will\n"; 96 print STDERR " have their 'Language' metadata set to this value.\n\n"; 96 97 97 98 print STDERR " -extract_acronyms Extract acronyms from within text and set as metadata\n"; … … 99 100 print STDERR " -markup_acronyms Add acronym metadata into document text\n\n"; 100 101 101 print STDERR " -first Comma sep erated list of first sizes to extract from the text\n";102 print STDERR " into a metadata field. The fields arecalled 'FirstNNN'.\n\n";102 print STDERR " -first Comma separated list of first sizes to extract from the\n"; 103 print STDERR " text into a metadata field. The field is called 'FirstNNN'.\n\n"; 103 104 104 105 print STDERR " -extract_email Extract email addresses as metadata\n\n"; 105 106 106 print STDERR " -extract_date Extract dates pertaining to the content of documents about history\n"; 107 print STDERR " -maximum_date The maximum historical date to be used as metadata (in a Common Era\n"; 108 print STDERR " date such as 1950)\n"; 109 print STDERR " -maximum_century The maximum named century to be extracted as historical metadata\n"; 110 print STDERR " (e.g. 14 will extract all references up to the 14th century)\n"; 111 print STDERR " -no_bibliography Do not try and block bibliographic dates when extracting historical dates.\n\n"; 107 print STDERR " -extract_date Extract dates pertaining to the content of documents about\n"; 108 print STDERR " history\n"; 109 print STDERR " -maximum_date The maximum historical date to be used as metadata (in a\n"; 110 print STDERR " Common Era date, such as 1950)\n"; 111 print STDERR " -maximum_century The maximum named century to be extracted as historical\n"; 112 print STDERR " metadata (e.g. 14 will extract all references up to the\n"; 113 print STDERR " 14th century)\n"; 114 print STDERR " -no_bibliography Do not try and block bibliographic dates when extracting\n"; 115 print STDERR " historical dates.\n\n"; 112 116 } 113 117
Note:
See TracChangeset
for help on using the changeset viewer.