Changeset 4873 for trunk/gsdl
- Timestamp:
- 2003-07-03T15:59:04+12:00 (21 years ago)
- Location:
- trunk/gsdl
- Files:
-
- 1 added
- 39 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/bin/script/buildcol.pl
r4776 r4873 46 46 my $mode_list = 47 47 [ { 'name' => "all", 48 'desc' => " Do everything." },48 'desc' => "{buildcol.mode.all}" }, 49 49 { 'name' => "compress_text", 50 'desc' => " Just compress the text." },50 'desc' => "{buildcol.mode.compress_text}" }, 51 51 { 'name' => "build_index", 52 'desc' => " Just index the text." },52 'desc' => "{buildcol.mode.build_index}" }, 53 53 { 'name' => "infodb", 54 'desc' => " Just build the metadata database." } ];54 'desc' => "{buildcol.mode.infodb}" } ]; 55 55 56 56 my $arguments = 57 57 [ { 'name' => "archivedir", 58 'desc' => " Where the archives live.",58 'desc' => "{buildcol.archivedir}", 59 59 'type' => "string", 60 60 'reqd' => "no" }, 61 61 { 'name' => "verbosity", 62 'desc' => " 0=none, 3=lots",62 'desc' => "{buildcol.verbosity}", 63 63 'type' => "int", 64 64 'deft' => "2", 65 65 'reqd' => "no" }, 66 66 { 'name' => "builddir", 67 'desc' => " Where to put the built indexes.",68 'type' => "string", 69 'reqd' => "no" }, 70 71 'desc' => "?",72 'type' => "string",73 'reqd' => "no" },67 'desc' => "{buildcol.builddir}", 68 'type' => "string", 69 'reqd' => "no" }, 70 # { 'name' => "cachedir", 71 # 'desc' => "{buildcol.cachedir}", 72 # 'type' => "string", 73 # 'reqd' => "no" }, 74 74 { 'name' => "maxdocs", 75 'desc' => " Maximum number of documents to build.",75 'desc' => "{buildcol.maxdocs}", 76 76 'type' => "int", 77 77 'reqd' => "no" }, 78 78 { 'name' => "debug", 79 'desc' => " Print output to STDOUT.",79 'desc' => "{buildcol.debug}", 80 80 'type' => "flag", 81 81 'reqd' => "no" }, 82 82 { 'name' => "mode", 83 'desc' => " The parts of the building process to carry out.",83 'desc' => "{buildcol.mode}", 84 84 'type' => "enum", 85 85 'list' => $mode_list, … … 87 87 'reqd' => "no" }, 88 88 { 'name' => "index", 89 'desc' => " Index to build (will build all in config file if not set).",89 'desc' => "{buildcol.index}", 90 90 'type' => "string", 91 91 'reqd' => "no" }, 92 92 { 'name' => "keepold", 93 'desc' => " Will not destroy the current contents of the building directory.",93 'desc' => "{buildcol.keepold}", 94 94 'type' => "flag", 95 95 'reqd' => "no" }, 96 { 'name' => "no_text", 97 'desc' => " Don't store compressed text. This option is useful for minimizing the size of the built indexes if you intend always to display the original documents at run time (i.e. you won't be able to retrieve the compressed text version).",96 { 'name' => "no_text", 97 'desc' => "{buildcol.no_text}", 98 98 'type' => "flag", 99 99 'reqd' => "no" }, 100 100 { 'name' => "allclassifications", 101 'desc' => " Don't remove empty classifications.",101 'desc' => "{buildcol.allclassifications}", 102 102 'type' => "flag", 103 103 'reqd' => "no" }, 104 104 { 'name' => "create_images", 105 'desc' => " Attempt to create default images for new collection. This relies on the Gimp being installed along with relevant perl modules to allow scripting from perl.",105 'desc' => "{buildcol.create_images}", 106 106 'type' => "flag", 107 107 'reqd' => "no" }, 108 108 { 'name' => "collectdir", 109 'desc' => " Collection directory.",109 'desc' => "{buildcol.collectdir}", 110 110 'type' => "string", 111 111 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"), 112 112 'reqd' => "no" }, 113 113 { 'name' => "out", 114 'desc' => " Filename or handle to print output status to.",114 'desc' => "{buildcol.out}", 115 115 'type' => "string", 116 116 'deft' => "STDERR", 117 117 'reqd' => "no" }, 118 118 { 'name' => "no_strip_html", 119 'desc' => " Do not strip the html tags from the indexed text (only used for mgpp collections).",119 'desc' => "{buildcol.no_strip_html}", 120 120 'type' => "flag", 121 121 'reqd' => "no" }, 122 122 { 'name' => "faillog", 123 'desc' => " Fail log filename. This log receives the filenames of any files which fail to be processed.",123 'desc' => "{buildcol.faillog}", 124 124 'type' => "string", 125 125 'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"), 126 'reqd' => "no" }, 127 { 'name' => "language", 128 'desc' => "{scripts.language}", 129 'type' => "string", 126 130 'reqd' => "no" } ]; 127 131 … … 133 137 sub print_xml_usage 134 138 { 139 local $language = shift(@_); 140 135 141 &PrintUsage::print_xml_header(); 136 142 … … 140 146 print STDERR " <Arguments>\n"; 141 147 if (defined($options->{'args'})) { 142 &PrintUsage::print_options_xml($ options->{'args'});148 &PrintUsage::print_options_xml($language, $options->{'args'}); 143 149 } 144 150 print STDERR " </Arguments>\n"; … … 149 155 sub print_txt_usage 150 156 { 157 local $language = shift(@_); 158 151 159 local $programname = $options->{'name'}; 152 160 local $programargs = $options->{'args'}; … … 169 177 170 178 # Display the program options 171 &PrintUsage::print_options_txt($ programargs, $optiondescoffset);179 &PrintUsage::print_options_txt($language, $programargs, $optiondescoffset); 172 180 } 173 181 } … … 222 230 $debug, $mode, $indexname, $keepold, $allclassifications, 223 231 $create_images, $collectdir, $out, $buildtype, $textindex, 224 $no_strip_html, $no_text, $faillog );232 $no_strip_html, $no_text, $faillog, $language); 225 233 226 234 # ***** 11-04-03 - John Thompson ***** … … 231 239 # later (after we check the collect.cfg file) 232 240 if (!parsargv::parse(\@ARGV, 241 'language/.*/', \$language, 233 242 'verbosity/\d+/', \$verbosity, 234 243 'archivedir/.*/', \$archivedir, 235 'cachedir/.*/', \$cachedir, 244 'cachedir/.*/', \$cachedir, # UNDOCUMENTED 236 245 'builddir/.*/', \$builddir, 237 246 'maxdocs/^\-?\d+/', \$maxdocs, … … 248 257 'faillog/.*/', \$faillog, 249 258 q^xml^, \$xml)) { 250 &print_txt_usage( );259 &print_txt_usage($language); 251 260 die "\n"; 252 261 } 253 262 254 263 if ($xml) { 255 &print_xml_usage( );264 &print_xml_usage($language); 256 265 die "\n"; 257 266 } … … 268 277 # get and check the collection 269 278 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") { 270 &print_txt_usage( );279 &print_txt_usage($language); 271 280 die "\n"; 272 281 } -
trunk/gsdl/bin/script/classinfo.pl
r4779 r4873 42 42 print STDERR "\n"; 43 43 print STDERR "classinfo.pl: Prints information about a classifier.\n\n"; 44 print STDERR " usage: $0 [options] classifier\n\n"; 45 print STDERR " options:\n"; 46 print STDERR " -collect collection-name Giving a collection name will make\n"; 47 print STDERR " classinfo.pl look in collect/collection-name/perllib/classify\n"; 48 print STDERR " first. If it doesn't find it there it will look in the general\n"; 49 print STDERR " perllib/classify directory.\n\n"; 50 print STDERR " -xml Produces the information in an xml form, without\n"; 51 print STDERR " 'pretty' comments but with much more detail.\n"; 44 print STDERR " usage: $0 [options] classifier\n\n"; 45 print STDERR " options:\n"; 46 print STDERR " -collect collection-name Giving a collection name will make classinfo.pl\n"; 47 print STDERR " look in collect/collection-name/perllib/classify\n"; 48 print STDERR " first. If the classifier is not found there it will\n"; 49 print STDERR " look in the general perllib/classify directory.\n\n"; 50 print STDERR " -xml Produces the information in an xml form, without\n"; 51 print STDERR " 'pretty' comments but with much more detail.\n\n"; 52 print STDERR " -language language-code Language to display plugin options in (eg. 'en_US'\n"; 53 print STDERR " specifies American English). Requires translations\n"; 54 print STDERR " of the option descriptions to exist in the\n"; 55 print STDERR " perllib/strings_language-code.rb file.\n"; 52 56 } 53 57 … … 57 61 my $collect = ""; 58 62 my $xml = 0; 63 my $language = ""; # Will display in the default language if not set 59 64 60 65 # Parse command line 61 if (!parsargv::parse(\@ARGV, q^collect/.*/^, \$collect, q^xml^, \$xml )) 66 if (!parsargv::parse(\@ARGV, 67 q^collect/.*/^, \$collect, 68 q^xml^, \$xml, 69 q^language/.*/^, \$language)) 62 70 { 63 71 &print_usage(); … … 84 92 85 93 if ($xml) { 86 $classobj->print_xml_usage( );94 $classobj->print_xml_usage($language); 87 95 } 88 96 else { … … 101 109 print STDERR "General options are inherited from parent classes of the classifer.\n\n"; 102 110 103 $classobj->print_txt_usage( );111 $classobj->print_txt_usage($language); 104 112 } 105 113 } -
trunk/gsdl/bin/script/import.pl
r4776 r4873 50 50 my $oidtype_list = 51 51 [ { 'name' => "hash", 52 'desc' => " Hashes the contents of the file. Document identifier will be the same every time the collection is imported." },52 'desc' => "{import.OIDtype.hash}" }, 53 53 { 'name' => "incremental", 54 'desc' => " A simple document count that is significantly faster than \"hash\". It is not guaranteed to always assign the same identifier to a given document though and does not allow further documents to be added to existing xml archives." } ];54 'desc' => "{import.OIDtype.incremental}" } ]; 55 55 56 56 my $arguments = 57 57 [ { 'name' => "archivedir", 58 'desc' => " Where the converted material ends up.",58 'desc' => "{import.achivedir}", 59 59 'type' => "string", 60 60 'reqd' => "no" }, 61 61 { 'name' => "collectdir", 62 'desc' => " Collection directory.",62 'desc' => "{import.collectdir}", 63 63 'type' => "string", 64 64 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"), 65 65 'reqd' => "no" }, 66 66 { 'name' => "debug", 67 'desc' => " Print imported text to STDOUT.",67 'desc' => "{import.debug}", 68 68 'type' => "flag", 69 69 'reqd' => "no" }, 70 70 { 'name' => "faillog", 71 'desc' => " Fail log filename. This log receives the filenames of any files which fail to be processed.",71 'desc' => "{import.faillog}", 72 72 'type' => "string", 73 73 'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"), 74 74 'reqd' => "no" }, 75 75 { 'name' => "groupsize", 76 'desc' => " Number of import documents to group into one XML file.",76 'desc' => "{import.groupsize}", 77 77 'type' => "int", 78 78 'deft' => "1", 79 79 'reqd' => "no" }, 80 80 { 'name' => "gzip", 81 'desc' => " Use gzip to compress resulting xml documents (don't forget to include ZIPPlug in your plugin list when building from compressed documents).",81 'desc' => "{import.gzip}", 82 82 'type' => "flag", 83 83 'reqd' => "no" }, 84 84 { 'name' => "importdir", 85 'desc' => " Where the original material lives.",85 'desc' => "{import.importdir}", 86 86 'type' => "string", 87 87 'reqd' => "no" }, 88 88 { 'name' => "keepold", 89 'desc' => " Will not destroy the current contents of the archives directory (the default).",89 'desc' => "{import.keepold}", 90 90 'type' => "flag", 91 91 'reqd' => "no" }, 92 92 { 'name' => "maxdocs", 93 'desc' => " Maximum number of documents to import.",93 'desc' => "{import.maxdocs}", 94 94 'type' => "int", 95 95 'reqd' => "no" }, 96 96 { 'name' => "OIDtype", 97 'desc' => " The method to use when generating unique identifiers for each document.",97 'desc' => "{import.OIDtype}", 98 98 'type' => "enum", 99 99 'list' => $oidtype_list, … … 101 101 'reqd' => "no" }, 102 102 { 'name' => "out", 103 'desc' => " Filename or handle to print output status to.",103 'desc' => "{import.out}", 104 104 'type' => "string", 105 105 'deft' => "STDERR", 106 106 'reqd' => "no" }, 107 107 { 'name' => "removeold", 108 'desc' => " Will remove the old contents of the archives directory -- use with care.",108 'desc' => "{import.removeold}", 109 109 'type' => "flag", 110 110 'reqd' => "no" }, 111 111 { 'name' => "sortmeta", 112 'desc' => " Sort documents alphabetically by metadata for building. This will be disabled if groupsize > 1.",112 'desc' => "{import.sortmeta}", 113 113 'type' => "string", 114 114 'reqd' => "no" }, 115 115 { 'name' => "statsfile", 116 'desc' => " Filename or handle to print import statistics to.",116 'desc' => "{import.statsfile}", 117 117 'type' => "string", 118 118 'deft' => "STDERR", 119 119 'reqd' => "no" }, 120 120 { 'name' => "verbosity", 121 'desc' => " 0=none, 3=lots",121 'desc' => "{import.verbosity}", 122 122 'type' => "int", 123 123 'deft' => "2", 124 'reqd' => "no" }, 125 { 'name' => "language", 126 'desc' => "{scripts.language}", 127 'type' => "string", 124 128 'reqd' => "no" } ]; 125 129 … … 131 135 sub print_xml_usage 132 136 { 137 local $language = shift(@_); 138 133 139 &PrintUsage::print_xml_header(); 134 140 … … 138 144 print STDERR " <Arguments>\n"; 139 145 if (defined($options->{'args'})) { 140 &PrintUsage::print_options_xml($ options->{'args'});146 &PrintUsage::print_options_xml($language, $options->{'args'}); 141 147 } 142 148 print STDERR " </Arguments>\n"; … … 147 153 sub print_txt_usage 148 154 { 155 local $language = shift(@_); 156 149 157 local $programname = $options->{'name'}; 150 158 local $programargs = $options->{'args'}; … … 167 175 168 176 # Display the program options 169 &PrintUsage::print_options_txt($ programargs, $optiondescoffset);177 &PrintUsage::print_options_txt($language, $programargs, $optiondescoffset); 170 178 } 171 179 } … … 224 232 $maxdocs, $collection, $configfilename, $collectcfg, 225 233 $pluginfo, $sortmeta, $archive_info_filename, $statsfile, 226 $archive_info, $processor, $out, $faillog, $collectdir );234 $archive_info, $processor, $out, $faillog, $collectdir, $language); 227 235 228 236 # ***** 11-04-03 - John Thompson ***** … … 233 241 # later (after we check the collect.cfg file) 234 242 if (!parsargv::parse(\@ARGV, 243 'language/.*/', \$language, 235 244 'verbosity/\d+/', \$verbosity, 236 245 'importdir/.*/', \$importdir, … … 249 258 'faillog/.*/', \$faillog, 250 259 q^xml^, \$xml)) { 251 &print_txt_usage( );260 &print_txt_usage($language); 252 261 die "\n"; 253 262 } 254 263 255 264 if ($xml) { 256 &print_xml_usage( );265 &print_xml_usage($language); 257 266 die "\n"; 258 267 } … … 271 280 # get and check the collection name 272 281 if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") { 273 &print_txt_usage( );282 &print_txt_usage($language); 274 283 die "\n"; 275 284 } -
trunk/gsdl/bin/script/mkcol.pl
r4776 r4873 46 46 my $arguments = 47 47 [ { 'name' => "creator", 48 'desc' => " The collection creator's e-mail address.",48 'desc' => "{mkcol.creator}", 49 49 'type' => "string", 50 50 'reqd' => "yes" }, 51 51 { 'name' => "optionfile", 52 'desc' => " Get options from file, useful on systems where long command lines may cause problems.",52 'desc' => "{mkcol.optionfile}", 53 53 'type' => "string", 54 54 'reqd' => "no" }, 55 55 { 'name' => "maintainer", 56 'desc' => " The collection maintainer's email address (if different from the creator).",56 'desc' => "{mkcol.maintainer}", 57 57 'type' => "string", 58 58 'reqd' => "no" }, 59 59 { 'name' => "collectdir", 60 'desc' => " Directory where new collection will be created.",60 'desc' => "{mkcol.collectdir}", 61 61 'type' => "string", 62 62 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"), 63 63 'reqd' => "no" }, 64 64 { 'name' => "public", 65 'desc' => " If this collection has anonymous access (true/false).",65 'desc' => "{mkcol.public}", 66 66 'type' => "string", 67 67 'deft' => "true", 68 68 'reqd' => "no" }, 69 69 { 'name' => "title", 70 'desc' => " The title of the collection.",70 'desc' => "{mkcol.title}", 71 71 'type' => "string", 72 72 'reqd' => "no" }, 73 73 { 'name' => "about", 74 'desc' => " The about text for the collection.",74 'desc' => "{mkcol.about}", 75 75 'type' => "string", 76 76 'reqd' => "no" }, 77 77 { 'name' => "plugin", 78 'desc' => " Perl plugin module to use (there may be multiple plugin entries).",78 'desc' => "{mkcol.plugin}", 79 79 'type' => "string", 80 80 'reqd' => "no" }, 81 81 { 'name' => "quiet", 82 'desc' => " Operate quietly.",82 'desc' => "{mkcol.quiet}", 83 83 'type' => "flag", 84 'reqd' => "no" }, 85 { 'name' => "language", 86 'desc' => "{scripts.language}", 87 'type' => "string", 84 88 'reqd' => "no" } ]; 85 89 … … 91 95 sub print_xml_usage 92 96 { 97 local $language = shift(@_); 98 93 99 &PrintUsage::print_xml_header(); 94 100 … … 98 104 print STDERR " <Arguments>\n"; 99 105 if (defined($options->{'args'})) { 100 &PrintUsage::print_options_xml($ options->{'args'});106 &PrintUsage::print_options_xml($language, $options->{'args'}); 101 107 } 102 108 print STDERR " </Arguments>\n"; … … 107 113 sub print_txt_usage 108 114 { 115 local $language = shift(@_); 116 109 117 local $programname = $options->{'name'}; 110 118 local $programargs = $options->{'args'}; … … 127 135 128 136 # Display the program options 129 &PrintUsage::print_options_txt($ programargs, $optiondescoffset);137 &PrintUsage::print_options_txt($language, $programargs, $optiondescoffset); 130 138 } 131 139 } … … 221 229 my ($argref) = @_; 222 230 if (!&parsargv::parse($argref, 231 'language/.*/', \$language, 223 232 'optionfile/.*/', \$optionfile, 224 233 'collectdir/.*/', \$collectdir, … … 232 241 q^xml^, \$xml 233 242 )) { 234 &print_txt_usage( );243 &print_txt_usage($language); 235 244 die "\n"; 236 245 } … … 242 251 243 252 if ($xml) { 244 &print_xml_usage( );253 &print_xml_usage($language); 245 254 die "\n"; 246 255 } … … 255 264 close OPTIONS; 256 265 &parse_args ($options); 257 258 266 } 259 267 … … 268 276 if (!defined($collection)) { 269 277 print STDOUT "ERROR: no collection name was specified\n"; 270 &print_txt_usage( );278 &print_txt_usage($language); 271 279 die "\n"; 272 280 } -
trunk/gsdl/bin/script/pluginfo.pl
r4779 r4873 44 44 print STDERR "\n"; 45 45 print STDERR "pluginfo.pl: Prints information about a plugin.\n\n"; 46 print STDERR " usage: $0 [options] plugin\n\n"; 47 print STDERR " options:\n"; 48 print STDERR " -collect collection-name Giving a collection name will make pluginfo.pl\n"; 49 print STDERR " look in collect/collection-name/perllib/plugins\n"; 50 print STDERR " for plugin first. If it doesn't find it there\n"; 51 print STDERR " it will look in the general perllib/plugins\n"; 52 print STDERR " directory\n"; 53 print STDERR " -xml Produces the information in an xml form, without\n"; 54 print STDERR " 'pretty' comments but with much more detail.\n"; 46 print STDERR " usage: $0 [options] plugin\n\n"; 47 print STDERR " options:\n"; 48 print STDERR " -collect collection-name Giving a collection name will make pluginfo.pl\n"; 49 print STDERR " look in collect/collection-name/perllib/plugins\n"; 50 print STDERR " first. If the plugin is not found there it will\n"; 51 print STDERR " look in the general perllib/plugins directory.\n\n"; 52 print STDERR " -xml Produces the information in an xml form, without\n"; 53 print STDERR " 'pretty' comments but with much more detail.\n\n"; 54 print STDERR " -language language-code Language to display plugin options in (eg. 'en_US'\n"; 55 print STDERR " specifies American English). Requires translations\n"; 56 print STDERR " of the option descriptions to exist in the\n"; 57 print STDERR " perllib/strings_language-code.rb file.\n"; 55 58 } 56 59 … … 61 64 my $collect = ""; 62 65 my $xml = 0; 66 my $language = ""; # Will display in the default language if not set 63 67 64 if (!parsargv::parse(\@ARGV, q^collect/.*/^, \$collect, q^xml^, \$xml )) 68 if (!parsargv::parse(\@ARGV, 69 q^collect/.*/^, \$collect, 70 q^xml^, \$xml, 71 q^language/.*/^, \$language)) 65 72 { 66 73 &print_usage(); … … 82 89 83 90 my $pluginfo = &plugin::load_plugins ([[$plugin]]); 84 85 91 my $plugobj = shift @$pluginfo; 86 92 87 if ($xml) { 88 $plugobj->print_xml_usage( );93 if ($xml) { 94 $plugobj->print_xml_usage($language); 89 95 } 90 96 else { 91 my $pluginfo = &plugin::load_plugins ([[$plugin]]);92 93 my $plugobj = shift @$pluginfo;94 95 97 print STDERR "\n$plugin info:\n\n"; 96 98 … … 105 107 print STDERR "General options are inherited from parent classes of the plugin.\n\n"; 106 108 107 $plugobj->print_txt_usage( );109 $plugobj->print_txt_usage($language); 108 110 109 111 # print STDERR " The default process_exp for $plugin is: "; -
trunk/gsdl/perllib/classify/AZCompactList.pm
r4840 r4873 52 52 my $doclevel_list = 53 53 [ { 'name' => "top", 54 'desc' => " Whole document." },54 'desc' => "{AZCompactList.doclevel.top}" }, 55 55 { 'name' => "section", 56 'desc' => " By sections." } ];56 'desc' => "{AZCompactList.doclevel.section}" } ]; 57 57 58 58 my $arguments = 59 59 [ { 'name' => "metadata", 60 'desc' => " Metadata field used for classification. List will be sorted by this element.",60 'desc' => "{AZCompactList.metadata}", 61 61 'type' => "metadata", 62 62 'reqd' => "yes" }, 63 63 { 'name' => "buttonname", 64 'desc' => " Button name for this classification.",64 'desc' => "{AZCompactList.buttonname}", 65 65 'type' => "string", 66 66 'deft' => "Metadata element specified with -metadata", 67 67 'reqd' => "no" }, 68 68 { 'name' => "removeprefix", 69 'desc' => " A prefix to ignore in the Metadata values for the field when sorting.",69 'desc' => "{AZCompactList.removeprefix}", 70 70 'type' => "string", 71 71 'deft' => "", … … 77 77 'reqd' => "no" }, 78 78 { 'name' => "mingroup", 79 'desc' => " The smallest value that will cause a group in the hierarchy to form.",79 'desc' => "{AZCompactList.mingroup}", 80 80 'type' => "int", 81 81 'deft' => "2", 82 82 'reqd' => "no" }, 83 83 { 'name' => "minnesting", 84 'desc' => " The smallest value that will cause a list to converted into nested list.",84 'desc' => "{AZCompactList.minnesting}", 85 85 'type' => "int", 86 86 'deft' => "20", 87 87 'reqd' => "no" }, 88 88 { 'name' => "mincompact", 89 'desc' => " Minimum number of documents to be displayed per page.",89 'desc' => "{AZCompactList.mincompact}", 90 90 'type' => "int", 91 91 'deft' => "10", 92 92 'reqd' => "no" }, 93 93 { 'name' => "maxcompact", 94 'desc' => " Maximum number of documents to be displayed per page.",94 'desc' => "{AZCompactList.maxcompact}", 95 95 'type' => "int", 96 96 'deft' => "30", 97 97 'reqd' => "no" }, 98 98 { 'name' => "doclevel", 99 'desc' => " Level to process document at.",99 'desc' => "{AZCompactList.doclevel}", 100 100 'type' => "enum", 101 101 'list' => $doclevel_list, … … 103 103 'reqd' => "no" }, 104 104 { 'name' => "onlyfirst", 105 'desc' => " Control whether all or only first metadata value used from array of metadata.",105 'desc' => "{AZCompactList.onlyfirst}", 106 106 'type' => "flag", 107 107 'reqd' => "no" }, 108 108 { 'name' => "freqsort", 109 'desc' => " Sort by node frequency rather than alpha-numeric.",110 'type' => "flag"} ,111 { 'name' => "recopt",112 'desc' => "?",113 'type' => "string",114 'deft' => "" } ];109 'desc' => "{AZCompactList.freqsort}", 110 'type' => "flag"} ]; 111 # { 'name' => "recopt", 112 # 'desc' => "{AZCompactList.recopt}", 113 # 'type' => "string", 114 # 'deft' => "" } ]; 115 115 116 116 my $options = … … 169 169 q^onlyfirst/.*/0^, \$onlyfirst, 170 170 q^freqsort/.*/0^, \$freqsort, 171 q^recopt/.*/-1^, \$recopt, 171 q^recopt/.*/-1^, \$recopt, # UNDOCUMENTED (what does it do?!?) 172 172 173 173 "allow_extra_options")) { 174 174 175 175 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 176 $self->print_txt_usage( );176 $self->print_txt_usage(""); # Use default resource bundle 177 177 die "\n"; 178 178 } -
trunk/gsdl/perllib/classify/AZList.pm
r4840 r4873 38 38 my $arguments = 39 39 [ { 'name' => "metadata", 40 'desc' => " Metadata field used for classification. List will be sorted by this element.",40 'desc' => "{AZList.metadata}", 41 41 'type' => "metadata", 42 42 'reqd' => "yes" } , 43 43 { 'name' => "buttonname", 44 'desc' => " Button name for this classification.",44 'desc' => "{AZList.buttonname}", 45 45 'type' => "string", 46 46 'deft' => "Metadata element specified with -metadata", 47 47 'reqd' => "no" }, 48 48 { 'name' => "removeprefix", 49 'desc' => " A prefix to ignore in the Metadata values for the field when sorting.",49 'desc' => "{AZList.removeprefix}", 50 50 'type' => "string", 51 51 'deft' => "", … … 97 97 98 98 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 99 $self->print_txt_usage( );99 $self->print_txt_usage(""); # Use default resource bundle 100 100 die "\n"; 101 101 } 102 102 103 103 if (!defined $metaname) { 104 $self->print_txt_usage( );104 $self->print_txt_usage(""); # Use default resource bundle 105 105 print STDERR "AZList used with no metadata name\n"; 106 106 die "\n"; -
trunk/gsdl/perllib/classify/AZSectionList.pm
r4786 r4873 47 47 my $arguments = 48 48 [ { 'name' => "metadata", 49 'desc' => " Metadata field used for classification. List will be sorted by this element.",49 'desc' => "{AZList.metadata}", 50 50 'type' => "metadata", 51 51 'reqd' => "yes" }, 52 52 { 'name' => "buttonname", 53 'desc' => " Button name for this classification.",53 'desc' => "{AZList.buttonname}", 54 54 'type' => "string", 55 55 'deft' => "Metadata element specified with -metadata", -
trunk/gsdl/perllib/classify/BasClas.pm
r4786 r4873 55 55 use printusage; 56 56 57 my $verbosity_list =58 [ { 'name' => "0",59 'desc' => "No output." },60 { 'name' => "1",61 'desc' => "A little bit of output." },62 { 'name' => "2",63 'desc' => "Some output." },64 { 'name' => "3",65 'desc' => "Lots of output." } ];66 57 67 58 my $arguments = 68 59 [ { 'name' => "builddir", 69 'desc' => " Where to put the built indexes.",60 'desc' => "{BasClas.builddir}", 70 61 'type' => "string", 71 62 'deft' => "" }, 72 63 { 'name' => "outhandle", 73 'desc' => " The file handle to write output to.",64 'desc' => "{BasClas.outhandle}", 74 65 'type' => "string", 75 66 'deft' => "STDERR" }, 76 67 { 'name' => "verbosity", 77 'desc' => " Controls the quantity of output.",68 'desc' => "{BasClas.verbosity}", 78 69 'type' => "enum", 79 'list' => $verbosity_list,80 70 'deft' => "2", 81 71 'reqd' => "no" } ]; … … 90 80 { 91 81 local $self = shift(@_); 92 93 print STDERR "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n"; 94 $self->print_xml(); 82 local $language = shift(@_); 83 84 &PrintUsage::print_xml_header(); 85 $self->print_xml($language); 95 86 } 96 87 … … 99 90 { 100 91 local $self = shift(@_); 92 local $language = shift(@_); 101 93 102 94 local $optionlistref = $self->{'option_list'}; … … 111 103 print STDERR " <Arguments>\n"; 112 104 if (defined($classifieroptions->{'args'})) { 113 &PrintUsage::print_options_xml($ classifieroptions->{'args'});105 &PrintUsage::print_options_xml($language, $classifieroptions->{'args'}); 114 106 } 115 107 116 108 # Recurse up the classifier hierarchy 117 $self->print_xml( );109 $self->print_xml($language); 118 110 119 111 print STDERR " </Arguments>\n"; … … 125 117 { 126 118 local $self = shift(@_); 119 local $language = shift(@_); 127 120 128 121 # Print the usage message for a classifier (recursively) 129 122 local $descoffset = $self->determine_description_offset(0); 130 $self->print_classifier_usage($ descoffset, 1);123 $self->print_classifier_usage($language, $descoffset, 1); 131 124 } 132 125 … … 161 154 { 162 155 local $self = shift(@_); 156 local $language = shift(@_); 163 157 local $descoffset = shift(@_); 164 158 local $isleafclass = shift(@_); … … 190 184 191 185 # Display the classifier options 192 &PrintUsage::print_options_txt($ classifierargs, $optiondescoffset);186 &PrintUsage::print_options_txt($language, $classifierargs, $optiondescoffset); 193 187 } 194 188 195 189 # Recurse up the classifier hierarchy 196 $self->print_classifier_usage($ descoffset, 0);190 $self->print_classifier_usage($language, $descoffset, 0); 197 191 $self->{'option_list'} = \@optionlist; 198 192 } … … 237 231 print STDERR "(general options are those available to all classifiers).\n"; 238 232 print STDERR "Check your collect.cfg configuration file.\n"; 239 &print_txt_usage();233 $self->print_txt_usage(""); # Use default resource bundle 240 234 die "\n"; 241 235 } 242 243 244 236 245 237 return bless $self, $class; -
trunk/gsdl/perllib/classify/DateList.pm
r4786 r4873 46 46 my $arguments = 47 47 [ { 'name' => "bymonth", 48 'desc' => " Classify by year and month.",48 'desc' => "{DateList.bymonth}", 49 49 'type' => "flag", 50 50 'reqd' => "no" } ]; … … 82 82 q^bymonth^, \$self->{'bymonth'}, 83 83 "allow_extra_options")) { 84 $self->print_txt_usage( );84 $self->print_txt_usage(""); # Use default resource bundle 85 85 die "\n"; 86 86 } -
trunk/gsdl/perllib/classify/HTML.pm
r4786 r4873 43 43 my $arguments = 44 44 [ { 'name' => "url", 45 'desc' => " The url of the web page to link to.",45 'desc' => "{HTML.url}", 46 46 'type' => "string", 47 47 'reqd' => "yes" } , 48 48 { 'name' => "buttonname", 49 'desc' => " The title field for this classification.",49 'desc' => "{HTML.buttonname}", 50 50 'type' => "string", 51 51 'deft' => "Browse", … … 86 86 87 87 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 88 $self->print_txt_usage( );88 $self->print_txt_usage(""); # Use default resource bundle 89 89 die "\n"; 90 90 } -
trunk/gsdl/perllib/classify/Hierarchy.pm
r4786 r4873 57 57 my $arguments = 58 58 [ { 'name' => "metadata", 59 'desc' => " Metadata field used for classification. List will be sorted by this element, unless -sort is used.",59 'desc' => "{Hierarchy.metadata}", 60 60 'type' => "metadata", 61 61 'reqd' => "yes" }, 62 { 'name' => "hfile", 63 'desc' => "{Hierarchy.hfile}", 64 'type' => "string", 65 'deft' => "", 66 'reqd' => "yes" }, 62 67 { 'name' => "buttonname", 63 'desc' => " Title field for this classification.",68 'desc' => "{Hierarchy.buttonname}", 64 69 'type' => "string", 65 70 'deft' => "Metadata element specified with -metadata", 66 71 'reqd' => "no" }, 67 { 'name' => "hfile",68 'desc' => "The classification structure file.",69 'type' => "string",70 'deft' => "",71 'reqd' => "yes" },72 72 { 'name' => "sort", 73 'desc' => " Metadata field to sort by. Use '-sort nosort' for no sorting.",73 'desc' => "{Hierarchy.sort}", 74 74 'type' => "string", 75 75 'deft' => "Metadata field specified with -metadata", 76 76 'reqd' => "no" }, 77 77 { 'name' => "hlist_at_top", 78 'desc' => " Display the first level of the classification horizontally.",78 'desc' => "{Hierarchy.hlist_at_top}", 79 79 'type' => "flag", 80 80 'reqd' => "no" } ]; … … 151 151 if (!-e $subjectfile) { 152 152 my $outhandle = $self->{'outhandle'}; 153 $self->print_txt_usage( );153 $self->print_txt_usage(""); # Use default resource bundle 154 154 print STDERR "\nHierarchy Error: Can't locate subject file $hfile\n"; 155 155 print STDERR "This file should be in $collfile or $subjectfile\n"; … … 175 175 if(defined $self->{'construction_error'} || !defined $self->{'metaname'} || !defined $self->{'subjectfile'}) { 176 176 print STDERR "Error: " , $self->{'construction_error'} , "\n"; 177 $self->print_txt_usage( );177 $self->print_txt_usage(""); # Use default resource bundle 178 178 die "\n"; 179 179 } -
trunk/gsdl/perllib/classify/List.pm
r4786 r4873 51 51 my $arguments = 52 52 [ { 'name' => "metadata", 53 'desc' => " Metadata field used for classification. List will be sorted by this element.",53 'desc' => "{List.metadata}", 54 54 'type' => "metadata", 55 'reqd' => "yes" } 55 'reqd' => "yes" }, 56 56 { 'name' => "buttonname", 57 'desc' => " Button name for this classification.",57 'desc' => "{List.buttonname}", 58 58 'type' => "string", 59 59 'deft' => "Metadata element specified with -metadata", 60 60 'reqd' => "no" }, 61 61 { 'name' => "sort", 62 'desc' => " Metadata field to sort by. If not set, sorts in build (random) order.",62 'desc' => "{List.sort}", 63 63 'type' => "string", 64 64 'deft' => "Metadata field specified with -metadata", … … 104 104 105 105 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 106 $self->print_txt_usage( );106 $self->print_txt_usage(""); # Use default resource bundle 107 107 die "\n"; 108 108 } -
trunk/gsdl/perllib/classify/Phind.pm
r4786 r4873 85 85 my $arguments = 86 86 [ { 'name' => "text", 87 'desc' => " The text used to build the phrase hierarchy.",87 'desc' => "{Phind.text}", 88 88 'type' => "string", 89 89 'deft' => "section:Title,section:text", 90 90 'reqd' => "no" }, 91 91 { 'name' => "title", 92 'desc' => " The metadata field used to describe each document.",92 'desc' => "{Phind.title}", 93 93 'type' => "metadata", 94 94 'deft' => "Title", 95 95 'reqd' => "no" }, 96 96 { 'name' => "button", 97 'desc' => " The label for the classifier screen and button in navigation bar.",97 'desc' => "{Phind.button}", 98 98 'type' => "string", 99 99 'deft' => "Phrase", 100 100 'reqd' => "no" }, 101 101 { 'name' => "language", 102 'desc' => " Language or languages to use building hierarchy. Languages are identified by two-letter country codes like en (English), es (Spanish), and fr (French). Language is a regular expression, so 'en|fr' (English or French) and '..' (match any language) are valid.",102 'desc' => "{Phind.language}", 103 103 'type' => "language", 104 104 'deft' => "en", 105 105 'reqd' => "no" }, 106 106 { 'name' => "savephrases", 107 'desc' => " If set, the phrase infomation will be stored in the given file as text. It is probably a good idea to use an absolute path.",107 'desc' => "{Phind.savephrases}", 108 108 'type' => "string", 109 109 'deft' => "", 110 110 'reqd' => "no" }, 111 111 { 'name' => "builddir", 112 'desc' => " Where to put the built indexes.",112 'desc' => "{BasClas.builddir}", 113 113 'type' => "string", 114 114 'deft' => "" }, 115 115 { 'name' => "suffixmode", 116 'desc' => " The smode parameter to the phrase extraction program. A value of 0 means that stopwords are ignored, and of 1 means that stopwords are used.",116 'desc' => "{Phind.suffixmode}", 117 117 'type' => "int", 118 118 'deft' => "1", 119 119 'reqd' => "no" }, 120 120 { 'name' => "thesaurus", 121 'desc' => " Name of a thesaurus stored in Phind format in the collection's etc directory.",121 'desc' => "{Phind.thesaurus}", 122 122 'type' => "string", 123 123 'deft' => "", 124 124 'reqd' => "no" }, 125 125 { 'name' => "untidy", 126 'desc' => " Don't remove working files.",126 'desc' => "{Phind.untidy}", 127 127 'type' => "flag", 128 128 'reqd' => "no" } ]; … … 219 219 220 220 print STDERR "\nIncorrect options passed to $class, check your collect.cfg file\n"; 221 $self->print_txt_usage( );221 $self->print_txt_usage(""); # Use default resource bundle 222 222 die "\n"; 223 223 } -
trunk/gsdl/perllib/classify/SectionList.pm
r4786 r4873 39 39 } 40 40 41 my $arguments =42 [ { 'name' => "metadata",43 'desc' => "Metadata field used for classification. List will be sorted by this element.",44 'type' => "metadata",45 'reqd' => "yes" } ,46 { 'name' => "buttonname",47 'desc' => "Button name for this classification.",48 'type' => "string",49 'deft' => "Metadata element specified with -metadata",50 'reqd' => "no" },51 { 'name' => "sort",52 'desc' => "Metadata field to sort by. If not set, sorts in build (random) order.",53 'type' => "string",54 'deft' => "Metadata field specified with -metadata",55 'reqd' => "no" } ];56 57 41 my $options = { 'name' => "SectionList", 58 42 'desc' => "Same as List classifier but includes all sections of document (excluding top level) rather than just top level document itself.", 59 'inherits' => "Yes", 60 'args' => $arguments }; 43 'inherits' => "Yes" }; 61 44 62 45 # sub print_usage { -
trunk/gsdl/perllib/plugins/BasPlug.pm
r4845 r4873 45 45 use printusage; 46 46 47 my $unicode_list = 47 my $unicode_list = 48 48 [ { 'name' => "auto", 49 'desc' => " Use text categorization algorithm to automatically identify the encoding of each source document. This will be slower than explicitly setting the encoding but will work where more than one encoding is used within the same collection." },49 'desc' => "{BasPlug.input_encoding.auto}" }, 50 50 { 'name' => "ascii", 51 'desc' => " Plain 7 bit ascii. This may be a bit faster than using iso_8859_1. Beware of using this on a collection of documents that may contain characters outside the plain 7 bit ascii set though (e.g. German or French documents containing accents), use iso_8859_1 instead." },51 'desc' => "{BasPlug.input_encoding.ascii}" }, 52 52 { 'name' => "utf8", 53 'desc' => " either utf8 or unicode -- automatically detected." },53 'desc' => "{BasPlug.input_encoding.utf8}" }, 54 54 { 'name' => "unicode", 55 'desc' => " just unicode" } ];56 57 my $arguments = 55 'desc' => "{BasPlug.input_encoding.unicode}" } ]; 56 57 my $arguments = 58 58 [ { 'name' => "process_exp", 59 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",59 'desc' => "{BasPlug.process_exp}", 60 60 'type' => "string", 61 61 'deft' => "", 62 62 'reqd' => "no" }, 63 63 { 'name' => "block_exp", 64 'desc' => " Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",65 'type' => 'string',64 'desc' => "{BasPlug.block_exp}", 65 'type' => "string", 66 66 'deft' => "", 67 67 'reqd' => "no" }, 68 68 { 'name' => "input_encoding", 69 'desc' => " The encoding of the source documents. Documents will be converted from these encodings and stored internally as utf8.",69 'desc' => "{BasPlug.input_encoding}", 70 70 'type' => "enum", 71 71 'list' => $unicode_list, … … 73 73 'deft' => "auto" } , 74 74 { 'name' => "default_encoding", 75 'desc' => " Use this encoding if -input_encoding is set to 'auto' and the text categorization algorithm fails to extract the encoding or extracts an encoding unsupported by Greenstone.",75 'desc' => "{BasPlug.default_encoding}", 76 76 'type' => "enum", 77 77 'reqd' => "no", 78 78 'deft' => "utf8" }, 79 79 { 'name' => "extract_language", 80 'desc' => " Identify the language of each document and set 'Language' metadata. Note that this will be done automatically if -input_encoding is 'auto'.",80 'desc' => "{BasPlug.extract_language}", 81 81 'type' => "flag", 82 82 'reqd' => "no" }, 83 83 { 'name' => "default_language", 84 'desc' => " If Greenstone fails to work out what language a document is the 'Language' metadata element will be set to this value. The default is 'en' (ISO 639 language symbols are used: en = English). Note that if -input_encoding is not set to 'auto' and -extract_language is not set, all documents will have their 'Language' metadata set to this value.",84 'desc' => "{BasPlug.default_language}", 85 85 'type' => "language", 86 86 'deft' => "en", 87 87 'reqd' => "no" }, 88 88 { 'name' => "extract_acronyms", 89 'desc' => " Extract acronyms from within text and set as metadata.",89 'desc' => "{BasPlug.extract_acronyms}", 90 90 'type' => "flag", 91 91 'reqd' => "no" }, 92 92 { 'name' => "markup_acronyms", 93 'desc' => " Add acronym metadata into document text.",93 'desc' => "{BasPlug.markup_acronyms}", 94 94 'type' => "flag", 95 95 'reqd' => "no" }, 96 96 { 'name' => "first", 97 'desc' => " Comma separated list of first sizes to extract from the text into a metadata field. The field is called 'FirstNNN'.",97 'desc' => "{BasPlug.first}", 98 98 'type' => "string", 99 99 'reqd' => "no" }, 100 100 { 'name' => "extract_email", 101 'desc' => " Extract email addresses as metadata.",101 'desc' => "{BasPlug.extract_email}", 102 102 'type' => "flag", 103 103 'reqd' => "no" }, 104 104 { 'name' => "extract_historical_years", 105 'desc' => " Extract time-period information from historical documents. This is stored as metadata with the document. There is a search interface for this metadata, which you can include in your collection by adding the statement, \"format QueryInterface DateSearch\" to your collection configuration file.",105 'desc' => "{BasPlug.extract_historical_years}", 106 106 'type' => "flag", 107 107 'reqd' => "no" }, 108 108 { 'name' => "maximum_year", 109 'desc' => " The maximum historical date to be used as metadata (in a Common Era date, such as 1950).",109 'desc' => "{BasPlug.maximum_year}", 110 110 'type' => "int", 111 111 'deft' => (localtime)[5]+1900, 112 112 'reqd' => "no"}, 113 113 { 'name' => "maximum_century", 114 'desc' => " The maximum named century to be extracted as historical metadata (e.g. 14 will extract all references up to the 14th century).",114 'desc' => "{BasPlug.maximum_century}", 115 115 'type' => "int", 116 116 'deft' => "-1", 117 117 'reqd' => "no" }, 118 118 { 'name' => "no_bibliography", 119 'desc' => " Do not try to block bibliographic dates when extracting historical dates.",119 'desc' => "{BasPlug.no_bibliography}", 120 120 'type' => "flag", 121 121 'reqd' => "no"}, 122 122 { 'name' => "cover_image", 123 'desc' => " Will look for a prefix.jpg file (where prefix is the same prefix as the file being processed) and associate it as a cover image.",123 'desc' => "{BasPlug.cover_image}", 124 124 'type' => "flag", 125 125 'reqd' => "no" } ]; … … 131 131 132 132 133 sub get_arguments 134 { 135 local $self = shift(@_); 136 local $optionlistref = $self->{'option_list'}; 137 local @optionlist = @$optionlistref; 138 local $pluginoptions = pop(@$optionlistref); 139 local $pluginarguments = $pluginoptions->{'args'}; 140 return $pluginarguments; 141 } 142 143 133 144 sub print_xml_usage 134 145 { 135 146 local $self = shift(@_); 136 137 print STDERR "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n\n"; 138 $self->print_xml(); 147 local $language = shift(@_); 148 149 &PrintUsage::print_xml_header(); 150 $self->print_xml($language); 139 151 } 140 152 … … 143 155 { 144 156 local $self = shift(@_); 157 local $language = shift(@_); 145 158 146 159 local $optionlistref = $self->{'option_list'}; … … 155 168 print STDERR " <Arguments>\n"; 156 169 if (defined($pluginoptions->{'args'})) { 157 &PrintUsage::print_options_xml($ pluginoptions->{'args'});170 &PrintUsage::print_options_xml($language, $pluginoptions->{'args'}); 158 171 } 159 172 160 173 # Recurse up the plugin hierarchy 161 $self->print_xml( );174 $self->print_xml($language); 162 175 163 176 print STDERR " </Arguments>\n"; … … 169 182 { 170 183 local $self = shift(@_); 184 local $language = shift(@_); 171 185 172 186 # Print the usage message for a plugin (recursively) 173 187 local $descoffset = $self->determine_description_offset(0); 174 $self->print_plugin_usage($ descoffset, 1);188 $self->print_plugin_usage($language, $descoffset, 1); 175 189 } 176 190 … … 205 219 { 206 220 local $self = shift(@_); 221 local $language = shift(@_); 207 222 local $descoffset = shift(@_); 208 223 local $isleafclass = shift(@_); … … 234 249 235 250 # Display the plugin options 236 &PrintUsage::print_options_txt($ pluginargs, $optiondescoffset);251 &PrintUsage::print_options_txt($language, $pluginargs, $optiondescoffset); 237 252 } 238 253 239 254 # Recurse up the plugin hierarchy 240 $self->print_plugin_usage($ descoffset, 0);255 $self->print_plugin_usage($language, $descoffset, 0); 241 256 $self->{'option_list'} = \@optionlist; 242 257 } … … 380 395 print STDERR "\nThe $plugin_name plugin uses an incorrect general option (general options are those\n"; 381 396 print STDERR "available to all plugins). Check your collect.cfg configuration file.\n"; 382 # &print_general_usage($plugin_name); 383 $self->print_txt_usage(); 397 $self->print_txt_usage(""); # Use default resource bundle 384 398 die "\n"; 385 399 } -
trunk/gsdl/perllib/plugins/BibTexPlug.pm
r4792 r4873 53 53 my $arguments = 54 54 [ { 'name' => "process_exp", 55 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",55 'desc' => "{BasPlug.process_exp}", 56 56 'type' => "string", 57 'reqd' => "no" 58 'deft' => q^(?i)\.bib$^} ];57 'reqd' => "no", 58 'deft' => &get_default_process_exp() } ]; 59 59 60 60 my $options = { 'name' => "BibTexPlug", -
trunk/gsdl/perllib/plugins/BookPlug.pm
r4744 r4873 62 62 my $arguments = 63 63 [ { 'name' => "process_exp", 64 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",64 'desc' => "{BasPlug.process_exp}", 65 65 'type' => "string", 66 66 'reqd' => "no", 67 67 'deft' => &get_default_process_exp() }, 68 68 { 'name' => "block_exp", 69 'desc' => " Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",69 'desc' => "{BasPlug.block_exp}", 70 70 'type' => "string", 71 71 'reqd' => "no", -
trunk/gsdl/perllib/plugins/ConvertToPlug.pm
r4842 r4873 51 51 my $convert_to_list = 52 52 [ { 'name' => "html", 53 'desc' => " HTML format" },53 'desc' => "{ConvertToPlug.convert_to.html}" }, 54 54 { 'name' => "text", 55 'desc' => " Plain text format" } ];56 57 my $arguments = 55 'desc' => "{ConvertToPlug.convert_to.text}" } ]; 56 57 my $arguments = 58 58 [ { 'name' => "convert_to", 59 'desc' => " Plugin converts to TEXT or HTML.",59 'desc' => "{ConvertToPlug.convert_to}", 60 60 'type' => "enum", 61 61 'reqd' => "no", … … 63 63 'deft' => "html" }, 64 64 { 'name' => "use_strings", 65 'desc' => " If set, a simple strings function will be called to extract text if the conversion utility fails.",65 'desc' => "{ConvertToPlug.use_strings}", 66 66 'type' => "flag", 67 67 'reqd' => "no" } ]; -
trunk/gsdl/perllib/plugins/DBPlug.pm
r4844 r4873 46 46 my $arguments = 47 47 [ { 'name' => "process_exp", 48 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",48 'desc' => "{BasPlug.process_exp}", 49 49 'type' => "string", 50 50 'deft' => &get_default_process_exp(), … … 65 65 # no plugin-specific options 66 66 # if (!parsargv::parse(\@_, "allow_extra_options")) { 67 # $self->print_txt_usage( );67 # $self->print_txt_usage(""); # Use default resource bundle 68 68 # die "\n"; 69 69 # } -
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r4785 r4873 90 90 my $arguments = 91 91 [ { 'name' => "process_exp", 92 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",92 'desc' => "{BasPlug.process_exp}", 93 93 'type' => "string", 94 94 'reqd' => "no", 95 95 'deft' => &get_default_process_exp() }, 96 96 { 'name' => "no_attachments", 97 'desc' => " Do not save message attachments.",97 'desc' => "{EMAILPlug.no_attachments}", 98 98 'type' => "flag", 99 99 'reqd' => "no" }, 100 100 { 'name' => "split_exp", 101 'desc' => " A perl regular expression used to split files containing many messages into individual documents.",101 'desc' => "{EMAILPlug.split_exp}", 102 102 'type' => "string", 103 103 'deft' => "" } ]; … … 126 126 print STDERR "\nIncorrect options passed to $class."; 127 127 print STDERR "\nCheck your collect.cfg configuration file\n"; 128 $self->print_txt_usage( );128 $self->print_txt_usage(""); # Use default resource bundle 129 129 die "\n"; 130 130 } -
trunk/gsdl/perllib/plugins/HTMLPlug.pm
r4845 r4873 50 50 my $arguments = 51 51 [ { 'name' => "process_exp", 52 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",52 'desc' => "{BasPlug.process_exp}", 53 53 'type' => "string", 54 54 'deft' => &get_default_process_exp() }, 55 55 { 'name' => "block_exp", 56 'desc' => " Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",56 'desc' => "{BasPlug.block_exp}", 57 57 'type' => 'string', 58 58 'deft' => &get_default_block_exp() }, 59 59 { 'name' => "nolinks", 60 'desc' => "Don't make any attempt to trap links (setting this flag may improve speed of building/importing but any relative links within documents will be broken).",60 'desc' => "{HTMLPlug.nolinks}", 61 61 'type' => "flag" }, 62 62 { 'name' => "keep_head", 63 'desc' => " Don't remove headers from html files.",63 'desc' => "{HTMLPlug.keep_head}", 64 64 'type' => "flag" }, 65 65 { 'name' => "no_metadata", 66 'desc' => " Don't attempt to extract any metadata from files.",66 'desc' => "{HTMLPlug.no_metadata}", 67 67 'type' => "flag" }, 68 68 { 'name' => "metadata_fields", 69 'desc' => " Comma separated list of metadata fields to attempt to extract. Use 'tag<tagname>' to have the contents of the first <tagname> pair put in a metadata element called 'tagname'. Capitalise this as you want the metadata capitalised in Greenstone, since the tag extraction is case insensitive.",69 'desc' => "{HTMLPlug.metadata_fields}", 70 70 'type' => "metadata", 71 71 'deft' => "Title" }, 72 72 { 'name' => "hunt_creator_metadata", 73 'desc' => " Find as much metadata as possible on authorship and place it in the 'Creator' field. Requires the -metadata_fields flag.",73 'desc' => "{HTMLPlug.hunt_creator_metadata}", 74 74 'type' => "flag" }, 75 75 { 'name' => "file_is_url", 76 'desc' => " Set if input filenames make up url of original source documents e.g. if a web mirroring tool was used to create the import directory structure.",76 'desc' => "{HTMLPlug.file_is_url}", 77 77 'type' => "flag" }, 78 78 { 'name' => "assoc_files", 79 'desc' => " Perl regular expression of file extensions to associate with html documents.",79 'desc' => "{HTMLPlug.assoc_files}", 80 80 'type' => "string", 81 81 'deft' => q^(?i)\.(jpe?g|gif|png|css)$^ }, 82 82 { 'name' => "rename_assoc_files", 83 'desc' => " Renames files associated with documents (e.g. images). Also creates much shallower directory structure (useful when creating collections to go on cd-rom).",83 'desc' => "{HTMLPlug.rename_assoc_files}", 84 84 'type' => "flag" }, 85 85 { 'name' => "title_sub", 86 'desc' => " Substitution expression to modify string stored as Title. Used by, for example, PDFPlug to remove \"Page 1\", etc from text used as the title.",86 'desc' => "{HTMLPlug.title_sub}", 87 87 'type' => "string", 88 88 'deft' => "" }, 89 89 { 'name' => "description_tags", 90 'desc' => " Split document into sub-sections where <Section> tags occur. Note that by setting this option you implicitly set -no_metadata, as all metadata should be included within the <Section> tags. Also, '-keep_head' will have no effect when this option is set.",90 'desc' => "{HTMLPlug.description_tags}", 91 91 'type' => "flag" } ]; 92 92 … … 160 160 161 161 print STDERR "\nIncorrect options passed to HTMLPlug, check your collect.cfg configuration file\n"; 162 $self->print_txt_usage( );162 $self->print_txt_usage(""); # Use default resource bundle 163 163 die "\n"; 164 164 } -
trunk/gsdl/perllib/plugins/ImagePlug.pm
r4790 r4873 34 34 my $arguments = 35 35 [ { 'name' => "process_exp", 36 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",36 'desc' => "{BasPlug.process_exp}", 37 37 'type' => "string", 38 38 'deft' => &get_default_process_exp(), 39 39 'reqd' => "no" }, 40 40 { 'name' => "noscaleup", 41 'desc' => " Don't scale up small images when making thumbnails.",41 'desc' => "{ImagePlug.noscaleup}", 42 42 'type' => "flag", 43 43 'reqd' => "no" }, 44 44 { 'name' => "thumbnailsize", 45 'desc' => " Make thumbnails of size nxn.",45 'desc' => "{ImagePlug.thumbnailsize}", 46 46 'type' => "int", 47 47 'deft' => "100", 48 48 'reqd' => "no" }, 49 49 { 'name' => "thumbnailtype", 50 'desc' => " Make thumbnails in format 's'.",50 'desc' => "{ImagePlug.thumbnailtype}", 51 51 'type' => "string", 52 52 'deft' => "gif", 53 53 'reqd' => "no" }, 54 54 { 'name' => "screenviewsize", 55 'desc' => " If set, makes an image of size n for screen display and sets Screen, ScreenSize, ScreenWidth and ScreenHeight metadata. By default it is not set.",55 'desc' => "{ImagePlug.screenviewsize}", 56 56 'type' => "int", 57 57 'deft' => "0", 58 58 'reqd' => "no" }, 59 59 { 'name' => "screenviewtype", 60 'desc' => " If -screenviewsize is set, this sets the screen display image type.",60 'desc' => "{ImagePlug.screenviewtype}", 61 61 'type' => "string", 62 62 'deft' => "jpg", 63 63 'reqd' => "no" }, 64 64 { 'name' => "converttotype", 65 'desc' => " Convert main image to.",65 'desc' => "{ImagePlug.converttotype}", 66 66 'type' => "string", 67 67 'deft' => "", 68 68 'reqd' => "no" }, 69 69 { 'name' => "minimumsize", 70 'desc' => " Ignore images smaller than n bytes.",70 'desc' => "{ImagePlug.minimumsize}", 71 71 'type' => "int", 72 72 'deft' => "100", … … 127 127 print STDERR "\nImagePlug uses an incorrect option.\n"; 128 128 print STDERR "Check your collect.cfg configuration file.\n"; 129 $self->print_txt_usage( );129 $self->print_txt_usage(""); # Use default resource bundle 130 130 die "\n"; 131 131 } -
trunk/gsdl/perllib/plugins/MACROPlug.pm
r4785 r4873 36 36 my $arguments = 37 37 [ { 'name' => "process_exp", 38 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",38 'desc' => "{BasPlug.process_exp}", 39 39 'type' => "string", 40 40 'deft' => &get_default_process_exp(), -
trunk/gsdl/perllib/plugins/MARCPlug.pm
r4791 r4873 40 40 my $arguments = 41 41 [ { 'name' => "metadata_mapping", 42 'desc' => " Name of file that includes mapping details from MARC values to Greenstone metadata names. Defaults to 'marctodc.txt' found in the site's etc directory.",42 'desc' => "{MARCPlug.metadata_mapping}", 43 43 'type' => "string", 44 44 'deft' => "marctodc.txt", … … 46 46 47 47 my $options = { 'name' => "MARCPlug", 48 'desc' => " ",48 'desc' => "Basic MARC plugin.", 49 49 'inherits' => "Yes", 50 50 'args' => $arguments }; … … 73 73 74 74 print STDERR "\nIncorrect options passed to MARCPlug, check your collect.cfg configuration file\n"; 75 $self->print_txt_usage( );75 $self->print_txt_usage(""); # Use default resource bundle 76 76 die "\n"; 77 77 } -
trunk/gsdl/perllib/plugins/OAIPlug.pm
r4785 r4873 49 49 my $self = new BasPlug ($class, @_); 50 50 51 # 14-05-02 To allow for proper inheritance of arguments - John Thompson 52 my $option_list = $self->{'option_list'}; 53 push( @{$option_list}, $options ); 54 51 55 if (!parsargv::parse(\@_, 52 56 "allow_extra_options")) { 53 57 54 58 print STDERR "\nIncorrect options passed to OAIPlug, check your collect.cfg configuration file\n"; 55 $self->print_txt_usage( );59 $self->print_txt_usage(""); # Use default resource bundle 56 60 die "\n"; 57 61 } -
trunk/gsdl/perllib/plugins/PDFPlug.pm
r4785 r4873 34 34 my $arguments = 35 35 [ { 'name' => "process_exp", 36 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",36 'desc' => "{BasPlug.process_exp}", 37 37 'type' => "string", 38 38 'deft' => &get_default_process_exp(), 39 39 'reqd' => "no" }, 40 40 { 'name' => "block_exp", 41 'desc' => " Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",41 'desc' => "{BasPlug.block_exp}", 42 42 'type' => "string", 43 'deft' => q^^},43 'deft' => &get_default_block_exp() }, 44 44 { 'name' => "noimages", 45 'desc' => " Don't attempt to extract images from PDF.",45 'desc' => "{PDFPlug.noimages}", 46 46 'type' => "flag" }, 47 47 { 'name' => "complex", 48 'desc' => " Create more complex output. With this option set the output html will look much more like the original PDF file. For this to function properly you Ghostscript installed (for *nix gs should be on your path while for windows you must have gswin32c.exe on your path).",48 'desc' => "{PDFPlug.complex}", 49 49 'type' => "flag" }, 50 50 { 'name' => "nohidden", 51 'desc' => " Prevent pdftohtml from attempting to extract hidden text. This is only useful if the -complex option is also set.",51 'desc' => "{PDFPlug.nohidden}", 52 52 'type' => "flag" }, 53 53 { 'name' => "zoom", 54 'desc' => " The factor by which to zoom the PDF for output (this is only useful if -complex is set).",54 'desc' => "{PDFPlug.zoom}", 55 55 'deft' => "2", 56 56 'type' => "int" }, 57 57 { 'name' => "use_sections", 58 'desc' => " Create a separate section for each page of the PDF file.",58 'desc' => "{PDFPlug.use_sections}", 59 59 'type' => "flag" } ]; 60 60 … … 79 79 print STDERR "\nIncorrect options passed to PDFPlug, check your collect.cfg configuration file\n"; 80 80 local $self = new ConvertToPlug($class, @_, "-title_sub", '^(Page\s+\d+)?(\s*1\s+)?'); 81 $self->print_txt_usage( );81 $self->print_txt_usage(""); # Use default resource bundle 82 82 die "\n"; 83 83 } -
trunk/gsdl/perllib/plugins/PSPlug.pm
r4785 r4873 37 37 my $arguments = 38 38 [ { 'name' => "process_exp", 39 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",39 'desc' => "{BasPlug.process_exp}", 40 40 'type' => "string", 41 41 'deft' => &get_default_process_exp(), 42 42 'reqd' => "no" }, 43 43 { 'name' => "block_exp", 44 'desc' => " Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",44 'desc' => "{BasPlug.block_exp}", 45 45 'type' => 'string', 46 46 'deft' => &get_default_block_exp() }, 47 47 { 'name' => "extract_date", 48 'desc' => " Extract date from PS header.",48 'desc' => "{PSPlug.extract_date}", 49 49 'type' => "flag" }, 50 50 { 'name' => "extract_pages", 51 'desc' => " Extract pages from PS header.",51 'desc' => "{PSPlug.extract_pages}", 52 52 'type' => "flag" }, 53 53 { 'name' => "extract_title", 54 'desc' => " Extract title from PS header.",54 'desc' => "{PSPlug.extract_title}", 55 55 'type' => "flag" } ]; 56 56 57 57 my $options = { 'name' => "PSPlug", 58 'desc' => "This might look VERY similar to the PDF plugin.",58 'desc' => "This is a \"poor man's\" ps to text converter. If you are serious, consider using the PRESCRIPT package, which is available for download at http://www.nzdl.org/html/software.html", 59 59 'inherits' => "yes", 60 60 'args' => $arguments }; … … 77 77 "allow_extra_options")) { 78 78 print STDERR "\nIncorrect options passed to HTMLPlug, check your collect.cfg configuration file\n"; 79 $self->print_txt_usage( );79 $self->print_txt_usage(""); # Use default resource bundle 80 80 die "\n"; 81 81 } -
trunk/gsdl/perllib/plugins/RTFPlug.pm
r4744 r4873 37 37 my $arguments = 38 38 [ { 'name' => "process_exp", 39 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",39 'desc' => "{BasPlug.process_exp}", 40 40 'type' => "string", 41 41 'deft' => &get_default_process_exp(), -
trunk/gsdl/perllib/plugins/RecPlug.pm
r4785 r4873 108 108 my $arguments = 109 109 [ { 'name' => "block_exp", 110 'desc' => " Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",110 'desc' => "{BasPlug.block_exp}", 111 111 'type' => "string", 112 112 'deft' => &get_default_block_exp(), 113 113 'reqd' => "no" }, 114 114 { 'name' => "use_metadata_files", 115 'desc' => " Read metadata from metadata XML files.",115 'desc' => "{RecPlug.use_metadata_files}", 116 116 'type' => "flag", 117 117 'reqd' => "no" } ]; … … 149 149 print STDERR "\nRecPlug uses an incorrect option.\n"; 150 150 print STDERR "Check your collect.cfg configuration file.\n\n"; 151 $self->print_txt_usage( );151 $self->print_txt_usage(""); # Use default resource bundle 152 152 die "\n"; 153 153 } -
trunk/gsdl/perllib/plugins/ReferPlug.pm
r4744 r4873 74 74 my $arguments = 75 75 [ { 'name' => "process_exp", 76 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",76 'desc' => "{BasPlug.process_exp}", 77 77 'type' => "string", 78 78 'deft' => &get_default_process_exp(), -
trunk/gsdl/perllib/plugins/SRCPlug.pm
r4785 r4873 48 48 my $arguments = 49 49 [ { 'name' => "process_exp", 50 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",50 'desc' => "{BasPlug.process_exp}", 51 51 'type' => "string", 52 52 'deft' => &get_default_process_exp(), 53 53 'reqd' => "no" } , 54 54 { 'name' => "block_exp", 55 'desc' => " Files matching this regular expression will be blocked from being passed to any later plugins in the list. This has no real effect other than to prevent lots of warning messages about input files you don't care about. Each plugin might have a default block_exp. e.g. by default HTMLPlug blocks any files with .gif, .jpg, .jpeg, .png or .css file extensions.",55 'desc' => "{BasPlug.block_exp}", 56 56 'type' => 'string', 57 57 'deft' => &get_default_block_exp(), 58 'reqd' => "no" } 58 'reqd' => "no" }, 59 59 { 'name' => "remove_prefix", 60 'desc' => " Remove this leading pattern from the filename (eg -remove_prefix /tmp/XX/src/). The default is to remove the whole path from the filename.",60 'desc' => "{SRCPlug.remove_prefix}", 61 61 'type' => 'string', 62 'deft' => " ",62 'deft' => "^.*[/\\]", 63 63 'reqd' => "no" } ]; 64 64 … … 94 94 print STDERR "\nIncorrect options passed to SRCPlug, "; 95 95 print STDERR "check your collect.cfg configuration file\n"; 96 $self->print_txt_usage( );96 $self->print_txt_usage(""); # Use default resource bundle 97 97 die "\n"; 98 98 } -
trunk/gsdl/perllib/plugins/SplitPlug.pm
r4744 r4873 51 51 my $arguments = 52 52 [ { 'name' => "split_exp", 53 'desc' => " A perl regular expression to split input files into segments.",53 'desc' => "{SplitPlug.split_exp}", 54 54 'type' => "string", 55 55 'deft' => &get_default_split_exp(), 56 'reqd' => "no" } 57 ]; 56 'reqd' => "no" } ]; 58 57 59 58 my $options = { 'name' => "SplitPlug", -
trunk/gsdl/perllib/plugins/TEXTPlug.pm
r4785 r4873 41 41 my $arguments = 42 42 [ { 'name' => "process_exp", 43 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",43 'desc' => "{BasPlug.process_exp}", 44 44 'type' => "string", 45 45 'deft' => &get_default_process_exp(), 46 46 'reqd' => "no" } , 47 47 { 'name' => "title_sub", 48 'desc' => " Substitution expression to modify string stored as Title. Used by, for example, PSPlug to remove \"Page 1\" etc from text used as the title.",48 'desc' => "{TEXTPlug.title_sub}", 49 49 'type' => "string", 50 50 'deft' => "", … … 78 78 "allow_extra_options")) { 79 79 print STDERR "\nIncorrect options passed to TEXTPlug, check your collect.cfg configuration file\n"; 80 $self->print_txt_usage( );80 $self->print_txt_usage(""); # Use default resource bundle 81 81 die "\n"; 82 82 } -
trunk/gsdl/perllib/plugins/UnknownPlug.pm
r4785 r4873 66 66 my $arguments = 67 67 [ { 'name' => "assoc_field", 68 'desc' => " Name of the metadata field that will hold the associated file's name.",68 'desc' => "{UnknownPlug.assoc_field}", 69 69 'type' => "string", 70 70 'deft' => "", 71 71 'reqd' => "no" } , 72 72 { 'name' => "file_type", 73 'desc' => " Mime type of the file (e.g. image/gif)",73 'desc' => "{UnknownPlug.file_type}", 74 74 'type' => "string", 75 75 'deft' => "", … … 104 104 "allow_extra_options")) { 105 105 print STDERR "\nIncorrect options passed to UnknownPlug, check your collect.cfg configuration file\n"; 106 $self->print_txt_usage( );106 $self->print_txt_usage(""); # Use default resource bundle 107 107 die "\n"; 108 108 } -
trunk/gsdl/perllib/plugins/W3ImgPlug.pm
r4785 r4873 125 125 my $aggressiveness_list = 126 126 [ { 'name' => "1", 127 'desc' => " Filename, path, ALT text only." },127 'desc' => "{W3ImgPlug.aggressiveness.1}" }, 128 128 { 'name' => "2", 129 'desc' => " All of 1, plus caption where available." },129 'desc' => "{W3ImgPlug.aggressiveness.2}" }, 130 130 { 'name' => "3", 131 'desc' => " All of 2, plus near paragraphs where available." },131 'desc' => "{W3ImgPlug.aggressiveness.3}" }, 132 132 { 'name' => "4", 133 'desc' => " All of 3, plus previous headers (<h1>, <h2>...) where available." },133 'desc' => "{W3ImgPlug.aggressiveness.4}" }, 134 134 { 'name' => "5", 135 'desc' => " All of 4, plus textual references where available." },135 'desc' => "{W3ImgPlug.aggressiveness.5}" }, 136 136 { 'name' => "6", 137 'desc' => " All of 4, plus page metatags (title, keywords, etc)." },137 'desc' => "{W3ImgPlug.aggressiveness.6}" }, 138 138 { 'name' => "7", 139 'desc' => " All of 6, 5 and 4 combined." },139 'desc' => "{W3ImgPlug.aggressiveness.7}" }, 140 140 { 'name' => "8", 141 'desc' => " All of 7, plus repeat caption, filename, etc (raise ranking of more relevant results)." },141 'desc' => "{W3ImgPlug.aggressiveness.8}" }, 142 142 { 'name' => "9", 143 'desc' => " All of 1, plus full text of source page." } ];143 'desc' => "{W3ImgPlug.aggressiveness.9}" } ]; 144 144 145 145 my $arguments = 146 146 [ { 'name' => "aggressiveness", 147 'desc' => " Range of related text extraction techniques to use.",147 'desc' => "{W3ImgPlug.aggressiveness}", 148 148 'type' => "int", 149 149 'list' => $aggressiveness_list, … … 151 151 'reqd' => "no" }, 152 152 { 'name' => "index_pages", 153 'desc' => " Index the pages along with the images. Otherwise reference the pages at the source URL.",153 'desc' => "{W3ImgPlug.index_pages}", 154 154 'type' => "flag", 155 155 'reqd' => "no" }, 156 156 { 'name' => "no_cache_images", 157 'desc' => " Don't cache images (point to URL of original)",157 'desc' => "{W3ImgPlug.no_cache_images}", 158 158 'type' => "flag", 159 159 'reqd' => "no" }, 160 160 { 'name' => "min_size", 161 'desc' => " Bytes. Skip images smaller than this.",161 'desc' => "{W3ImgPlug.min_size}", 162 162 'type' => "int", 163 163 'deft' => "2000", 164 164 'reqd' => "no" }, 165 165 { 'name' => "min_width", 166 'desc' => " Pixels. Skip images narrower than this.",166 'desc' => "{W3ImgPlug.min_width}", 167 167 'type' => "int", 168 168 'deft' => "50", 169 169 'reqd' => "no" }, 170 170 { 'name' => "min_height", 171 'desc' => " Pixels. Skip images shorter than this.",171 'desc' => "{W3ImgPlug.min_height}", 172 172 'type' => "int", 173 173 'deft' => "50", 174 174 'reqd' => "no" }, 175 175 { 'name' => "thumb_size", 176 'desc' => " Max thumbnail size. Both width and height.",176 'desc' => "{W3ImgPlug.thumb_size}", 177 177 'type' => "int", 178 178 'deft' => "100", 179 179 'reqd' => "no" }, 180 180 { 'name' => "convert_params", 181 'desc' => " Additional parameters for ImageMagicK convert on thumbnail creation. For example, '-raise' will give a three dimensional effect to thumbnail images.",181 'desc' => "{W3ImgPlug.convert_params}", 182 182 'type' => "string", 183 183 'deft' => "", 184 184 'reqd' => "no" }, 185 185 { 'name' => "min_near_text", 186 'desc' => " Minimum characters of near text or caption to extract.",186 'desc' => "{W3ImgPlug.min_near_text}", 187 187 'type' => "int", 188 188 'deft' => "10", 189 189 'reqd' => "no" }, 190 190 { 'name' => "max_near_text", 191 'desc' => " Maximum characters near images to extract.",191 'desc' => "{W3ImgPlug.max_near_text}", 192 192 'type' => "int", 193 193 'deft' => "400", 194 194 'reqd' => "no" }, 195 195 { 'name' => "smallpage_threshold", 196 'desc' => " Images on pages smaller than this (bytes) will have the page (title, keywords, etc) meta-data added.",196 'desc' => "{W3ImgPlug.smallpage_threshold}", 197 197 'type' => "int", 198 198 'deft' => "2048", 199 199 'reqd' => "no" }, 200 200 { 'name' => "textrefs_threshold", 201 'desc' => " Threshold for textual references. Lower values mean the algorithm is less strict.",201 'desc' => "{W3ImgPlug.textrefs_threshold}", 202 202 'type' => "int", 203 203 'deft' => "2", 204 204 'reqd' => "no" }, 205 205 { 'name' => "caption_length", 206 'desc' => " Maximum length of captions (in characters).",206 'desc' => "{W3ImgPlug.caption_length}", 207 207 'type' => "int", 208 208 'deft' => "80", 209 209 'reqd' => "no" }, 210 210 { 'name' => "neartext_length", 211 'desc' => " Target length of near text (in characters).",211 'desc' => "{W3ImgPlug.neartext_length}", 212 212 'type' => "int", 213 213 'deft' => "300", 214 214 'reqd' => "no" }, 215 215 { 'name' => "document_text", 216 'desc' => " Add image text as document:text (otherwise IndexedText metadata field).",216 'desc' => "{W3ImgPlug.document_text}", 217 217 'type' => "flag", 218 'reqd' => "no" } 219 ]; 218 'reqd' => "no" } ]; 220 219 221 220 my $options = { 'name' => "W3ImgPlug", … … 301 300 302 301 print STDERR "\nIncorrect options passed to W3ImgPlug, check your collect.cfg configuration file\n"; 303 $self->print_txt_usage( );302 $self->print_txt_usage(""); # Use default resource bundle 304 303 die "\n"; 305 304 } -
trunk/gsdl/perllib/plugins/WordPlug.pm
r4744 r4873 36 36 my $arguments = 37 37 [ { 'name' => "process_exp", 38 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",38 'desc' => "{BasPlug.process_exp}", 39 39 'type' => "string", 40 40 'deft' => &get_default_process_exp(), -
trunk/gsdl/perllib/plugins/XMLPlug.pm
r4744 r4873 38 38 my $arguments = 39 39 [ { 'name' => "process_exp", 40 'desc' => " A perl regular expression to match against filenames. Matching filenames will be processed by this plugin. For example, using '(?i).html?\$' matches all documents ending in .htm or .html (case-insensitive).",40 'desc' => "{BasPlug.process_exp}", 41 41 'type' => "string", 42 42 'deft' => &get_default_process_exp(), … … 55 55 # $self is global for use within subroutines called by XML::Parser 56 56 $self = new BasPlug ($class, @_); 57 58 57 59 58 # 14-05-02 To allow for proper inheritance of arguments - John Thompson -
trunk/gsdl/perllib/printusage.pm
r4777 r4873 48 48 sub print_options_xml 49 49 { 50 local $language = shift(@_); 50 51 local $options = shift(@_); 51 52 52 53 foreach $option (@$options) { 53 54 local $optionname = $option->{'name'}; 55 local $optiondesc = &lookup_string($language, $option->{'desc'}); 56 57 # Escape '<' and '>' characters 58 $optiondesc =~ s/</</g; 59 $optiondesc =~ s/>/>/g; 54 60 55 61 # Display option name, description and type 56 62 print STDERR " <Option>\n"; 57 63 print STDERR " <Name>$optionname</Name>\n"; 58 print STDERR " <Desc>$option ->{'desc'}</Desc>\n";64 print STDERR " <Desc>$optiondesc</Desc>\n"; 59 65 print STDERR " <Type>$option->{'type'}</Type>\n"; 60 66 … … 72 78 print STDERR " <Name>$optionvalue->{'name'}</Name>\n"; 73 79 if (defined $optionvalue->{'desc'}) { 74 print STDERR " <Desc>$optionvalue->{'desc'}</Desc>\n"; 80 local $optionvaluedesc = &lookup_string($language, $optionvalue->{'desc'}); 81 82 # Escape '<' and '>' characters 83 $optionvaluedesc =~ s/</</g; 84 $optionvaluedesc =~ s/>/>/g; 85 86 print STDERR " <Desc>$optionvaluedesc</Desc>\n"; 75 87 } 76 88 print STDERR " </Value>\n"; … … 126 138 sub print_options_txt 127 139 { 140 local $language = shift(@_); 128 141 local $options = shift(@_); 129 142 local $optiondescoffset = shift(@_); … … 143 156 144 157 # Display the option description 145 local $optiondesc = $option->{'desc'};158 local $optiondesc = &lookup_string($language, $option->{'desc'}); 146 159 local $optionreqd = $option->{'reqd'}; 147 160 if (defined($optionreqd) && $optionreqd eq "yes") { … … 166 179 print STDERR "$optionvaluename:"; 167 180 168 local $optionvaluedesc = $optionvalue->{'desc'};181 local $optionvaluedesc = &lookup_string($language, $optionvalue->{'desc'}); 169 182 &display_text_in_column($optionvaluedesc, $optiondescoffset + 2, 170 183 $optiondescoffset + length($optionvaluename), 80); … … 191 204 192 205 206 sub lookup_string 207 { 208 local ($language, $stringkey) = @_; 209 210 # Load the appropriate resource bundle 211 local %resourcebundle = &load_resource_bundle($language); 212 213 # Return the text matching the key (or just the key, if no match was found) 214 return $resourcebundle{$stringkey} || $stringkey; 215 } 216 217 218 my $cachedlanguage = "<none>"; 219 my %cachedresourcebundle = (); 220 221 sub load_resource_bundle 222 { 223 local $language = shift(@_); 224 225 # If the desired resource bundle is the one cached, return it 226 if ($language eq $cachedlanguage) { 227 return %cachedresourcebundle; 228 } 229 230 # Open the appropriate resource bundle 231 local $resourcebundlehome = &util::filename_cat("$ENV{'GSDLHOME'}", "perllib"); 232 local $resourcebundlename = "strings_" . $language . ".rb"; 233 local $resourcebundlefile = &util::filename_cat($resourcebundlehome, $resourcebundlename); 234 235 # If the specific resource bundle cannot be opened, use the generic (English) one 236 if (!open(RESOURCE_BUNDLE, "<$resourcebundlefile")) { 237 $resourcebundlename = "strings.rb"; 238 $resourcebundlefile = &util::filename_cat($resourcebundlehome, $resourcebundlename); 239 open(RESOURCE_BUNDLE, "<$resourcebundlefile") 240 or die "Error: Could not open generic resource bundle $resourcebundlefile.\n"; 241 } 242 243 local @resourcebundlelines = <RESOURCE_BUNDLE>; 244 close(RESOURCE_BUNDLE); 245 246 # Load and cache this resource bundle 247 $cachedlanguage = $language; 248 %cachedresourcebundle = (); 249 foreach $line (@resourcebundlelines) { 250 # Remove any trailing whitespace 251 $line =~ s/(\s*)$//; 252 253 # Ignore comments and empty lines 254 if ($line !~ /^\#/ && $line ne "") { 255 # Parse key (everything up to the first colon) 256 $line =~ /^([^:]+):(.+)$/; 257 local $linekey = "{" . $1 . "}"; 258 local $linetext = $2; 259 260 # Map key to text 261 $cachedresourcebundle{$linekey} = $linetext; 262 } 263 } 264 265 return %cachedresourcebundle; 266 } 267 268 193 269 sub display_text_in_column 194 270 { … … 211 287 212 288 foreach $word (@words) { 213 # Unescape '<' and '>' characters214 $word =~ s/</</g;215 $word =~ s/>/>/g;216 217 289 # If printing this word would exceed the column end, start a new line 218 290 if (($linelength + length($word)) >= $columnend) {
Note:
See TracChangeset
for help on using the changeset viewer.