Changeset 4750
- Timestamp:
- 2003-06-23T11:52:31+12:00 (21 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/BasPlug.pm
r4746 r4750 127 127 'desc' => "Base class for all the import plugins.", 128 128 'inherits' => "No", 129 'args' => $arguments, 130 'process_exp' => "", 131 'block_exp' => "" }; 129 'args' => $arguments }; 132 130 133 131 sub print_xml_usage { … … 200 198 201 199 202 sub print_usage_new200 sub new_print_usage 203 201 { 204 202 local $self = shift(@_); 205 local $optionlist = $self->{'option_list'}; 206 local $pluginoptions = pop(@$optionlist); 207 return if (!defined($pluginoptions)); 208 209 local $pluginname = $pluginoptions->{'name'}; 203 204 # Print the usage message for a plugin (recursively) 205 local $descoffset = $self->determine_description_offset(0); 206 $self->print_plugin_usage($descoffset, 1); 207 } 208 209 210 sub determine_description_offset 211 { 212 local $self = shift(@_); 213 local $maxoffset = shift(@_); 214 215 local $optionlistref = $self->{'option_list'}; 216 local @optionlist = @$optionlistref; 217 local $pluginoptions = pop(@$optionlistref); 218 return $maxoffset if (!defined($pluginoptions)); 219 220 # Find the length of the longest option string of this plugin 210 221 local $pluginargs = $pluginoptions->{'args'}; 211 212 # Produce the usage information using the data structure above213 print STDERR " usage: plugin $pluginname";214 222 if (defined($pluginargs)) { 215 print STDERR " [options]";216 }217 print STDERR "\n\n";218 219 # Display the plugin options, if there are some220 if (defined($pluginargs)) {221 # Find the length of the longest option string222 local $maxlength = 0;223 223 foreach $option (@$pluginargs) { 224 224 local $optionname = $option->{'name'}; 225 225 local $optiontype = $option->{'type'}; 226 226 227 local $option stringlength =length($optionname);227 local $optiondescoffset = 3 + length($optionname); 228 228 if ($optiontype ne "flag") { 229 $option stringlength = $optionstringlength + 3 + length($optiontype);229 $optiondescoffset = $optiondescoffset + 2 + length($optiontype) + 1; 230 230 } 231 231 232 232 # Remember the longest 233 if ($option stringlength > $maxlength) {234 $max length = $optionstringlength;233 if ($optiondescoffset > $maxoffset) { 234 $maxoffset = $optiondescoffset; 235 235 } 236 236 } 237 237 } 238 239 # Recurse up the plugin hierarchy 240 $maxoffset = $self->determine_description_offset($maxoffset); 241 $self->{'option_list'} = \@optionlist; 242 return $maxoffset; 243 } 244 245 246 sub print_plugin_usage 247 { 248 local $self = shift(@_); 249 local $descoffset = shift(@_); 250 local $isleafclass = shift(@_); 251 252 local $optionlistref = $self->{'option_list'}; 253 local @optionlist = @$optionlistref; 254 local $pluginoptions = pop(@$optionlistref); 255 return if (!defined($pluginoptions)); 256 257 local $pluginname = $pluginoptions->{'name'}; 258 local $pluginargs = $pluginoptions->{'args'}; 259 260 # Produce the usage information using the data structure above 261 if ($isleafclass) { 262 print STDERR " usage: plugin $pluginname [options]\n\n"; 263 } 264 265 # Display the plugin options, if there are some 266 if (defined($pluginargs)) { 238 267 # Calculate the column offset of the option descriptions 239 local $optiondescoffset = 3 + $maxlength + 2; 268 local $optiondescoffset = $descoffset + 2; # 2 spaces between options & descriptions 269 270 if ($isleafclass) { 271 print STDERR " specific options:\n"; 272 } 273 else { 274 print STDERR " general options (from $pluginname):\n"; 275 } 240 276 241 277 # Display the plugin options 242 print STDERR " options:\n";243 278 foreach $option (@$pluginargs) { 244 279 # Display option name 245 280 local $optionname = $option->{'name'}; 246 281 print STDERR " -$optionname"; 247 local $optionstringlength = 3 + length($optionname);248 282 local $optionstringlength = length(" -$optionname"); 283 249 284 # Display option type, if the option is not a flag 250 285 local $optiontype = $option->{'type'}; 251 286 if ($optiontype ne "flag") { 252 287 print STDERR " <$optiontype>"; 253 $optionstringlength = $optionstringlength + (2 + length($optiontype) + 1);288 $optionstringlength = $optionstringlength + length(" <$optiontype>"); 254 289 } 255 290 … … 286 321 local $encodingname = $enc; 287 322 print STDERR " " x $optiondescoffset; 288 print STDERR "$enc :";323 print STDERR "$encodingname:"; 289 324 290 325 local $encodingdesc = $e->{$enc}->{'name'}; … … 299 334 } 300 335 301 # If the plugin inherits from another, do the parent now 302 if (defined($optionlist)) { 303 $self->print_usage_new(); 304 } 336 # Recurse up the plugin hierarchy 337 $self->print_plugin_usage($descoffset, 0); 338 $self->{'option_list'} = \@optionlist; 305 339 } 306 340 … … 338 372 339 373 # Write the word 340 print STDERR " " . $word;341 $linelength = $linelength + (length($word) + 1);374 print STDERR " $word"; 375 $linelength = $linelength + length(" $word"); 342 376 } 343 377 … … 346 380 347 381 348 sub print_general_usage {349 my ($plugin_name) = @_;350 351 print STDERR "\n usage: plugin $plugin_name [options]\n\n";352 353 print STDERR " -process_exp A perl regular expression to match against filenames.\n";354 print STDERR " Matching filenames will be processed by this plugin.\n";355 print STDERR " Each plugin has its own default process_exp. e.g HTMLPlug\n";356 print STDERR " defaults to '(?i)\.html?\$' i.e. all documents ending in\n";357 print STDERR " .htm or .html (case-insensitive).\n\n";358 359 print STDERR " -block_exp Files matching this regular expression will be blocked from\n";360 print STDERR " being passed to any later plugins in the list. This has no\n";361 print STDERR " real effect other than to prevent lots of warning messages\n";362 print STDERR " about input files you don't care about. Each plugin might\n";363 print STDERR " have a default block_exp. e.g. by default HTMLPlug blocks\n";364 print STDERR " any files with .gif, .jpg, .jpeg, .png or .css\n";365 print STDERR " file extensions.\n\n";366 367 368 print STDERR " -input_encoding The encoding of the source documents. Documents will be\n";369 print STDERR " converted from these encodings and stored internally as\n";370 print STDERR " utf8. The default input_encoding is 'auto'. Accepted values\n";371 print STDERR " are:\n";372 373 print STDERR " auto: Use text categorization algorithm to automatically\n";374 print STDERR " identify the encoding of each source document. This\n";375 print STDERR " will be slower than explicitly setting the encoding\n";376 print STDERR " but will work where more than one encoding is used\n";377 print STDERR " within the same collection.\n";378 379 print STDERR " ascii: Plain 7 bit ascii. This may be a bit faster than\n";380 print STDERR " using iso_8859_1. Beware of using this on a collection\n";381 print STDERR " of documents that may contain characters outside the\n";382 print STDERR " plain 7 bit ascii set though (e.g. German or French\n";383 print STDERR " documents containing accents), use iso_8859_1 instead.\n";384 385 print STDERR " utf8: either utf8 or unicode -- automatically detected\n";386 print STDERR " unicode: just unicode\n";387 388 my $e = $encodings::encodings;389 foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) {390 print STDERR " $enc: $e->{$enc}->{'name'}\n";391 }392 print STDERR "\n";393 print STDERR " -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n";394 print STDERR " the text categorization algorithm fails to extract the\n";395 print STDERR " encoding or extracts an encoding unsupported by Greenstone.\n";396 print STDERR " The default is iso_8859_1.\n\n";397 398 print STDERR " -extract_language Identify the language of each document and set 'Language'\n";399 print STDERR " metadata. Note that this will be done automatically if\n";400 print STDERR " -input_encoding is 'auto'.\n\n";401 print STDERR " -default_language If Greenstone fails to work out what language a document is\n";402 print STDERR " the 'Language' metadata element will be set to this value.\n";403 print STDERR " The default is 'en' (ISO 639 language symbols are used:\n";404 print STDERR " en = English). Note that if -input_encoding is not set to\n";405 print STDERR " 'auto' and -extract_language is not set, all documents will\n";406 print STDERR " have their 'Language' metadata set to this value.\n\n";407 408 print STDERR " -extract_acronyms Extract acronyms from within text and set as metadata\n";409 410 print STDERR " -markup_acronyms Add acronym metadata into document text\n\n";411 412 print STDERR " -first Comma separated list of first sizes to extract from the\n";413 print STDERR " text into a metadata field. The field is called 'FirstNNN'.\n\n";414 415 print STDERR " -extract_email Extract email addresses as metadata\n\n";416 417 print STDERR " -extract_historical_years Extract time-period information from historical\n";418 print STDERR " documents. This is stored as metadata with the document.\n";419 print STDERR " There is a search interface for this metadata, which you \n";420 print STDERR " can include in your collection by adding the statement:\n";421 print STDERR " format QueryInterface DateSearch\n";422 print STDERR " to your collection configuration file\n";423 print STDERR " -maximum_year The maximum historical date to be used as metadata (in a\n";424 print STDERR " Common Era date, such as 1950)\n";425 print STDERR " -maximum_century The maximum named century to be extracted as historical\n";426 print STDERR " metadata (e.g. 14 will extract all references up to the\n";427 print STDERR " 14th century)\n";428 print STDERR " -no_bibliography Do not try and block bibliographic dates when extracting\n";429 print STDERR " historical dates.\n";430 print STDERR " -cover_image Will look for a prefix.jpg file (where prefix is the same\n";431 print STDERR " prefix as the file being processed) and associate it as a\n";432 print STDERR " cover image\n\n";433 }382 # sub print_general_usage { 383 # my ($plugin_name) = @_; 384 385 # print STDERR "\n usage: plugin $plugin_name [options]\n\n"; 386 387 # print STDERR " -process_exp A perl regular expression to match against filenames.\n"; 388 # print STDERR " Matching filenames will be processed by this plugin.\n"; 389 # print STDERR " Each plugin has its own default process_exp. e.g HTMLPlug\n"; 390 # print STDERR " defaults to '(?i)\.html?\$' i.e. all documents ending in\n"; 391 # print STDERR " .htm or .html (case-insensitive).\n\n"; 392 393 # print STDERR " -block_exp Files matching this regular expression will be blocked from\n"; 394 # print STDERR " being passed to any later plugins in the list. This has no\n"; 395 # print STDERR " real effect other than to prevent lots of warning messages\n"; 396 # print STDERR " about input files you don't care about. Each plugin might\n"; 397 # print STDERR " have a default block_exp. e.g. by default HTMLPlug blocks\n"; 398 # print STDERR " any files with .gif, .jpg, .jpeg, .png or .css\n"; 399 # print STDERR " file extensions.\n\n"; 400 401 402 # print STDERR " -input_encoding The encoding of the source documents. Documents will be\n"; 403 # print STDERR " converted from these encodings and stored internally as\n"; 404 # print STDERR " utf8. The default input_encoding is 'auto'. Accepted values\n"; 405 # print STDERR " are:\n"; 406 407 # print STDERR " auto: Use text categorization algorithm to automatically\n"; 408 # print STDERR " identify the encoding of each source document. This\n"; 409 # print STDERR " will be slower than explicitly setting the encoding\n"; 410 # print STDERR " but will work where more than one encoding is used\n"; 411 # print STDERR " within the same collection.\n"; 412 413 # print STDERR " ascii: Plain 7 bit ascii. This may be a bit faster than\n"; 414 # print STDERR " using iso_8859_1. Beware of using this on a collection\n"; 415 # print STDERR " of documents that may contain characters outside the\n"; 416 # print STDERR " plain 7 bit ascii set though (e.g. German or French\n"; 417 # print STDERR " documents containing accents), use iso_8859_1 instead.\n"; 418 419 # print STDERR " utf8: either utf8 or unicode -- automatically detected\n"; 420 # print STDERR " unicode: just unicode\n"; 421 422 # my $e = $encodings::encodings; 423 # foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e)) { 424 # print STDERR " $enc: $e->{$enc}->{'name'}\n"; 425 # } 426 # print STDERR "\n"; 427 # print STDERR " -default_encoding Use this encoding if -input_encoding is set to 'auto' and\n"; 428 # print STDERR " the text categorization algorithm fails to extract the\n"; 429 # print STDERR " encoding or extracts an encoding unsupported by Greenstone.\n"; 430 # print STDERR " The default is iso_8859_1.\n\n"; 431 432 # print STDERR " -extract_language Identify the language of each document and set 'Language'\n"; 433 # print STDERR " metadata. Note that this will be done automatically if\n"; 434 # print STDERR " -input_encoding is 'auto'.\n\n"; 435 # print STDERR " -default_language If Greenstone fails to work out what language a document is\n"; 436 # print STDERR " the 'Language' metadata element will be set to this value.\n"; 437 # print STDERR " The default is 'en' (ISO 639 language symbols are used:\n"; 438 # print STDERR " en = English). Note that if -input_encoding is not set to\n"; 439 # print STDERR " 'auto' and -extract_language is not set, all documents will\n"; 440 # print STDERR " have their 'Language' metadata set to this value.\n\n"; 441 442 # print STDERR " -extract_acronyms Extract acronyms from within text and set as metadata\n"; 443 444 # print STDERR " -markup_acronyms Add acronym metadata into document text\n\n"; 445 446 # print STDERR " -first Comma separated list of first sizes to extract from the\n"; 447 # print STDERR " text into a metadata field. The field is called 'FirstNNN'.\n\n"; 448 449 # print STDERR " -extract_email Extract email addresses as metadata\n\n"; 450 451 # print STDERR " -extract_historical_years Extract time-period information from historical\n"; 452 # print STDERR " documents. This is stored as metadata with the document.\n"; 453 # print STDERR " There is a search interface for this metadata, which you \n"; 454 # print STDERR " can include in your collection by adding the statement:\n"; 455 # print STDERR " format QueryInterface DateSearch\n"; 456 # print STDERR " to your collection configuration file\n"; 457 # print STDERR " -maximum_year The maximum historical date to be used as metadata (in a\n"; 458 # print STDERR " Common Era date, such as 1950)\n"; 459 # print STDERR " -maximum_century The maximum named century to be extracted as historical\n"; 460 # print STDERR " metadata (e.g. 14 will extract all references up to the\n"; 461 # print STDERR " 14th century)\n"; 462 # print STDERR " -no_bibliography Do not try and block bibliographic dates when extracting\n"; 463 # print STDERR " historical dates.\n"; 464 # print STDERR " -cover_image Will look for a prefix.jpg file (where prefix is the same\n"; 465 # print STDERR " prefix as the file being processed) and associate it as a\n"; 466 # print STDERR " cover image\n\n"; 467 # } 434 468 435 469 # print_usage should be overridden for any sub-classes having 436 470 # their own plugin specific options 437 sub print_usage {438 print STDERR "\nThis plugin has no plugin specific options\n\n";439 }471 # sub print_usage { 472 # print STDERR "\nThis plugin has no plugin specific options\n\n"; 473 # } 440 474 441 475 sub new {
Note:
See TracChangeset
for help on using the changeset viewer.