[1031] | 1 | #!/usr/bin/perl -w
|
---|
[4] | 2 |
|
---|
[538] | 3 | ###########################################################################
|
---|
| 4 | #
|
---|
| 5 | # buildcol.pl -- This program will build a particular collection
|
---|
| 6 | # A component of the Greenstone digital library software
|
---|
| 7 | # from the New Zealand Digital Library Project at the
|
---|
| 8 | # University of Waikato, New Zealand.
|
---|
| 9 | #
|
---|
| 10 | # Copyright (C) 1999 New Zealand Digital Library Project
|
---|
| 11 | #
|
---|
| 12 | # This program is free software; you can redistribute it and/or modify
|
---|
| 13 | # it under the terms of the GNU General Public License as published by
|
---|
| 14 | # the Free Software Foundation; either version 2 of the License, or
|
---|
| 15 | # (at your option) any later version.
|
---|
| 16 | #
|
---|
| 17 | # This program is distributed in the hope that it will be useful,
|
---|
| 18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 20 | # GNU General Public License for more details.
|
---|
| 21 | #
|
---|
| 22 | # You should have received a copy of the GNU General Public License
|
---|
| 23 | # along with this program; if not, write to the Free Software
|
---|
| 24 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 25 | #
|
---|
| 26 | ###########################################################################
|
---|
[4] | 27 |
|
---|
[4776] | 28 | # 11/04/03 Added usage datastructure - John Thompson
|
---|
| 29 |
|
---|
[1424] | 30 | package buildcol;
|
---|
| 31 |
|
---|
[4] | 32 | BEGIN {
|
---|
| 33 | die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
|
---|
| 34 | die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
|
---|
[8] | 35 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
|
---|
| 36 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
|
---|
[946] | 37 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
|
---|
[4] | 38 | }
|
---|
| 39 |
|
---|
[130] | 40 | use colcfg;
|
---|
[4] | 41 | use parsargv;
|
---|
| 42 | use util;
|
---|
[1424] | 43 | use FileHandle;
|
---|
[4776] | 44 | use printusage;
|
---|
[4] | 45 |
|
---|
[4776] | 46 | my $mode_list =
|
---|
| 47 | [ { 'name' => "all",
|
---|
[4873] | 48 | 'desc' => "{buildcol.mode.all}" },
|
---|
[4776] | 49 | { 'name' => "compress_text",
|
---|
[4873] | 50 | 'desc' => "{buildcol.mode.compress_text}" },
|
---|
[4776] | 51 | { 'name' => "build_index",
|
---|
[4873] | 52 | 'desc' => "{buildcol.mode.build_index}" },
|
---|
[4776] | 53 | { 'name' => "infodb",
|
---|
[4873] | 54 | 'desc' => "{buildcol.mode.infodb}" } ];
|
---|
[4] | 55 |
|
---|
[4776] | 56 | my $arguments =
|
---|
| 57 | [ { 'name' => "archivedir",
|
---|
[4873] | 58 | 'desc' => "{buildcol.archivedir}",
|
---|
[4776] | 59 | 'type' => "string",
|
---|
| 60 | 'reqd' => "no" },
|
---|
| 61 | { 'name' => "verbosity",
|
---|
[4873] | 62 | 'desc' => "{buildcol.verbosity}",
|
---|
[4776] | 63 | 'type' => "int",
|
---|
| 64 | 'deft' => "2",
|
---|
| 65 | 'reqd' => "no" },
|
---|
| 66 | { 'name' => "builddir",
|
---|
[4873] | 67 | 'desc' => "{buildcol.builddir}",
|
---|
[4776] | 68 | 'type' => "string",
|
---|
| 69 | 'reqd' => "no" },
|
---|
[4873] | 70 | # { 'name' => "cachedir",
|
---|
| 71 | # 'desc' => "{buildcol.cachedir}",
|
---|
| 72 | # 'type' => "string",
|
---|
| 73 | # 'reqd' => "no" },
|
---|
[4776] | 74 | { 'name' => "maxdocs",
|
---|
[4873] | 75 | 'desc' => "{buildcol.maxdocs}",
|
---|
[4776] | 76 | 'type' => "int",
|
---|
| 77 | 'reqd' => "no" },
|
---|
| 78 | { 'name' => "debug",
|
---|
[4873] | 79 | 'desc' => "{buildcol.debug}",
|
---|
[4776] | 80 | 'type' => "flag",
|
---|
| 81 | 'reqd' => "no" },
|
---|
| 82 | { 'name' => "mode",
|
---|
[4873] | 83 | 'desc' => "{buildcol.mode}",
|
---|
[4776] | 84 | 'type' => "enum",
|
---|
| 85 | 'list' => $mode_list,
|
---|
| 86 | 'deft' => "all",
|
---|
| 87 | 'reqd' => "no" },
|
---|
| 88 | { 'name' => "index",
|
---|
[4873] | 89 | 'desc' => "{buildcol.index}",
|
---|
[4776] | 90 | 'type' => "string",
|
---|
| 91 | 'reqd' => "no" },
|
---|
| 92 | { 'name' => "keepold",
|
---|
[4873] | 93 | 'desc' => "{buildcol.keepold}",
|
---|
[4776] | 94 | 'type' => "flag",
|
---|
| 95 | 'reqd' => "no" },
|
---|
[4873] | 96 | { 'name' => "no_text",
|
---|
| 97 | 'desc' => "{buildcol.no_text}",
|
---|
[4776] | 98 | 'type' => "flag",
|
---|
| 99 | 'reqd' => "no" },
|
---|
| 100 | { 'name' => "allclassifications",
|
---|
[4873] | 101 | 'desc' => "{buildcol.allclassifications}",
|
---|
[4776] | 102 | 'type' => "flag",
|
---|
| 103 | 'reqd' => "no" },
|
---|
| 104 | { 'name' => "create_images",
|
---|
[4873] | 105 | 'desc' => "{buildcol.create_images}",
|
---|
[4776] | 106 | 'type' => "flag",
|
---|
| 107 | 'reqd' => "no" },
|
---|
| 108 | { 'name' => "collectdir",
|
---|
[4873] | 109 | 'desc' => "{buildcol.collectdir}",
|
---|
[4776] | 110 | 'type' => "string",
|
---|
| 111 | 'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
|
---|
| 112 | 'reqd' => "no" },
|
---|
| 113 | { 'name' => "out",
|
---|
[4873] | 114 | 'desc' => "{buildcol.out}",
|
---|
[4776] | 115 | 'type' => "string",
|
---|
| 116 | 'deft' => "STDERR",
|
---|
| 117 | 'reqd' => "no" },
|
---|
| 118 | { 'name' => "no_strip_html",
|
---|
[4873] | 119 | 'desc' => "{buildcol.no_strip_html}",
|
---|
[4776] | 120 | 'type' => "flag",
|
---|
| 121 | 'reqd' => "no" },
|
---|
| 122 | { 'name' => "faillog",
|
---|
[4873] | 123 | 'desc' => "{buildcol.faillog}",
|
---|
[4776] | 124 | 'type' => "string",
|
---|
| 125 | 'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
|
---|
[4873] | 126 | 'reqd' => "no" },
|
---|
| 127 | { 'name' => "language",
|
---|
| 128 | 'desc' => "{scripts.language}",
|
---|
| 129 | 'type' => "string",
|
---|
[4776] | 130 | 'reqd' => "no" } ];
|
---|
| 131 |
|
---|
| 132 | my $options = { 'name' => "buildcol.pl",
|
---|
| 133 | 'desc' => "PERL script used to build a greenstone collection from GML documents.",
|
---|
| 134 | 'args' => $arguments };
|
---|
| 135 |
|
---|
| 136 |
|
---|
| 137 | sub print_xml_usage
|
---|
| 138 | {
|
---|
[4873] | 139 | local $language = shift(@_);
|
---|
| 140 |
|
---|
[4776] | 141 | &PrintUsage::print_xml_header();
|
---|
| 142 |
|
---|
| 143 | print STDERR "<Info>\n";
|
---|
| 144 | print STDERR " <Name>$options->{'name'}</Name>\n";
|
---|
| 145 | print STDERR " <Desc>$options->{'desc'}</Desc>\n";
|
---|
| 146 | print STDERR " <Arguments>\n";
|
---|
| 147 | if (defined($options->{'args'})) {
|
---|
[4873] | 148 | &PrintUsage::print_options_xml($language, $options->{'args'});
|
---|
[4776] | 149 | }
|
---|
| 150 | print STDERR " </Arguments>\n";
|
---|
| 151 | print STDERR "</Info>\n";
|
---|
[4] | 152 | }
|
---|
| 153 |
|
---|
| 154 |
|
---|
[4776] | 155 | sub print_txt_usage
|
---|
| 156 | {
|
---|
[4873] | 157 | local $language = shift(@_);
|
---|
| 158 |
|
---|
[4776] | 159 | local $programname = $options->{'name'};
|
---|
| 160 | local $programargs = $options->{'args'};
|
---|
| 161 |
|
---|
| 162 | # Find the length of the longest option string
|
---|
| 163 | local $descoffset = 0;
|
---|
| 164 | if (defined($programargs)) {
|
---|
| 165 | $descoffset = &PrintUsage::find_longest_option_string($programargs);
|
---|
| 166 | }
|
---|
| 167 |
|
---|
| 168 | # Produce the usage information using the data structure above
|
---|
| 169 | print STDERR " usage: $programname [options] collection-name\n\n";
|
---|
| 170 |
|
---|
| 171 | # Display the program options, if there are some
|
---|
| 172 | if (defined($programargs)) {
|
---|
| 173 | # Calculate the column offset of the option descriptions
|
---|
| 174 | local $optiondescoffset = $descoffset + 2; # 2 spaces between options & descriptions
|
---|
| 175 |
|
---|
| 176 | print STDERR " options:\n";
|
---|
| 177 |
|
---|
| 178 | # Display the program options
|
---|
[4873] | 179 | &PrintUsage::print_options_txt($language, $programargs, $optiondescoffset);
|
---|
[4776] | 180 | }
|
---|
| 181 | }
|
---|
| 182 |
|
---|
| 183 |
|
---|
| 184 | # sub print_usage {
|
---|
| 185 | # print STDOUT "\n";
|
---|
| 186 | # print STDOUT "buildcol.pl: Builds the indexes of a Greenstone collection.\n\n";
|
---|
| 187 | # print STDOUT " usage: $0 [options] collection-name\n\n";
|
---|
| 188 | # print STDOUT " options:\n";
|
---|
| 189 | # print STDOUT " -verbosity number 0=none, 3=lots\n";
|
---|
| 190 | # print STDOUT " -archivedir directory Where the archives live\n";
|
---|
| 191 | # print STDOUT " -builddir directory Where to put the built indexes\n";
|
---|
| 192 | # print STDOUT " -maxdocs number Maximum number of documents to build\n";
|
---|
| 193 | # print STDOUT " -debug Print output to STDOUT\n";
|
---|
| 194 | # print STDOUT " -mode all|compress_text|build_index|infodb\n";
|
---|
| 195 | # print STDOUT " -index indexname Index to build (will build all in\n";
|
---|
| 196 | # print STDOUT " config file if not set)\n";
|
---|
| 197 | # print STDOUT " -keepold will not destroy the current contents of the\n";
|
---|
| 198 | # print STDOUT " building directory\n";
|
---|
| 199 | # print STDOUT " -no_text Don't store compressed text. This option is\n";
|
---|
| 200 | # print STDOUT " useful for minimizing the size of the built\n";
|
---|
| 201 | # print STDOUT " indexes if you intend always to display the\n";
|
---|
| 202 | # print STDOUT " original documents at run time (i.e. you won't\n";
|
---|
| 203 | # print STDOUT " be able to retrieve the compressed text version)\n";
|
---|
| 204 | # print STDOUT " -allclassifications Don't remove empty classifications\n";
|
---|
| 205 | # print STDOUT " -create_images Attempt to create default images for new\n";
|
---|
| 206 | # print STDOUT " collection. This relies on the Gimp being\n";
|
---|
| 207 | # print STDOUT " installed along with relevant perl modules\n";
|
---|
| 208 | # print STDOUT " to allow scripting from perl\n";
|
---|
| 209 | # print STDOUT " -collectdir directory Collection directory (defaults to " .
|
---|
| 210 | # &util::filename_cat ($ENV{'GSDLHOME'}, "collect") . ")\n";
|
---|
| 211 | # print STDOUT " -out Filename or handle to print output status to.\n";
|
---|
| 212 | # print STDOUT " The default is STDERR\n";
|
---|
| 213 | # print STDOUT " -no_strip_html Do not strip the html tags from the indexed text\n";
|
---|
| 214 | # print STDOUT " (only used for mgpp collections).\n\n";
|
---|
| 215 | # print STDOUT " -faillog name Fail log filename. This log receives the filenames\n";
|
---|
| 216 | # print STDOUT " of any files which fail to be processed (defaults.\n";
|
---|
| 217 | # print STDOUT " to " .
|
---|
| 218 | # &util::filename_cat("<collectdir>", "colname", "etc", "fail.log") . ")\n";
|
---|
| 219 | # print STDOUT " [Type \"perl -S buildcol.pl | more\" if this help text scrolled off your screen]";
|
---|
| 220 | # print STDOUT "\n" unless $ENV{'GSDLOS'} =~ /^windows$/i;
|
---|
| 221 | # }
|
---|
| 222 |
|
---|
| 223 |
|
---|
| 224 | &main();
|
---|
| 225 |
|
---|
| 226 |
|
---|
[4] | 227 | sub main
|
---|
| 228 | {
|
---|
[783] | 229 | my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
|
---|
[1267] | 230 | $debug, $mode, $indexname, $keepold, $allclassifications,
|
---|
[1853] | 231 | $create_images, $collectdir, $out, $buildtype, $textindex,
|
---|
[4873] | 232 | $no_strip_html, $no_text, $faillog, $language);
|
---|
[2355] | 233 |
|
---|
[4776] | 234 | # ***** 11-04-03 - John Thompson *****
|
---|
| 235 | my $xml = 0;
|
---|
| 236 | # ************************************
|
---|
| 237 |
|
---|
[2355] | 238 | # note that no defaults are passed for most options as they're set
|
---|
| 239 | # later (after we check the collect.cfg file)
|
---|
[4] | 240 | if (!parsargv::parse(\@ARGV,
|
---|
[4873] | 241 | 'language/.*/', \$language,
|
---|
[2355] | 242 | 'verbosity/\d+/', \$verbosity,
|
---|
[4] | 243 | 'archivedir/.*/', \$archivedir,
|
---|
[4873] | 244 | 'cachedir/.*/', \$cachedir, # UNDOCUMENTED
|
---|
[313] | 245 | 'builddir/.*/', \$builddir,
|
---|
[2355] | 246 | 'maxdocs/^\-?\d+/', \$maxdocs,
|
---|
[783] | 247 | 'debug', \$debug,
|
---|
[2355] | 248 | 'mode/^(all|compress_text|build_index|infodb)$/', \$mode,
|
---|
[783] | 249 | 'index/.*/', \$indexname,
|
---|
[2336] | 250 | 'no_text', \$no_text,
|
---|
[784] | 251 | 'keepold', \$keepold,
|
---|
[1267] | 252 | 'allclassifications', \$allclassifications,
|
---|
[1424] | 253 | 'create_images', \$create_images,
|
---|
[1454] | 254 | 'collectdir/.*/', \$collectdir,
|
---|
[1853] | 255 | 'out/.*/STDERR', \$out,
|
---|
| 256 | 'no_strip_html', \$no_strip_html,
|
---|
[4776] | 257 | 'faillog/.*/', \$faillog,
|
---|
| 258 | q^xml^, \$xml)) {
|
---|
[4873] | 259 | &print_txt_usage($language);
|
---|
[4] | 260 | die "\n";
|
---|
| 261 | }
|
---|
| 262 |
|
---|
[4776] | 263 | if ($xml) {
|
---|
[4873] | 264 | &print_xml_usage($language);
|
---|
[4776] | 265 | die "\n";
|
---|
| 266 | }
|
---|
| 267 |
|
---|
[1853] | 268 | $textindex = "";
|
---|
[1424] | 269 | my $close_out = 0;
|
---|
| 270 | if ($out !~ /^(STDERR|STDOUT)$/i) {
|
---|
[1431] | 271 | open (OUT, ">$out") || die "Couldn't open output file $out\n";
|
---|
[1424] | 272 | $out = "buildcol::OUT";
|
---|
| 273 | $close_out = 1;
|
---|
| 274 | }
|
---|
| 275 | $out->autoflush(1);
|
---|
| 276 |
|
---|
[4] | 277 | # get and check the collection
|
---|
[1454] | 278 | if (($collection = &util::use_collection(@ARGV, $collectdir)) eq "") {
|
---|
[4873] | 279 | &print_txt_usage($language);
|
---|
[4] | 280 | die "\n";
|
---|
| 281 | }
|
---|
[130] | 282 |
|
---|
[2785] | 283 | if ($faillog eq "") {
|
---|
| 284 | $faillog = &util::filename_cat($ENV{'GSDLCOLLECTDIR'}, "etc", "fail.log");
|
---|
| 285 | }
|
---|
| 286 | # note that we're appending to the faillog here (import.pl clears it each time)
|
---|
| 287 | # this could potentially create a situation where the faillog keeps being added
|
---|
| 288 | # to over multiple builds (if the import process is being skipped)
|
---|
| 289 | open (FAILLOG, ">>$faillog") || die "Couldn't open fail log $faillog\n";
|
---|
| 290 | $faillog = 'buildcol::FAILLOG';
|
---|
| 291 | $faillog->autoflush(1);
|
---|
| 292 |
|
---|
[130] | 293 | # read the configuration file
|
---|
[1267] | 294 | $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc", "collect.cfg");
|
---|
[130] | 295 | if (-e $configfilename) {
|
---|
| 296 | $collectcfg = &colcfg::read_collect_cfg ($configfilename);
|
---|
[1853] | 297 |
|
---|
[2355] | 298 | if ($verbosity !~ /\d+/) {
|
---|
| 299 | if (defined $collectcfg->{'verbosity'} && $collectcfg->{'verbosity'} =~ /\d+/) {
|
---|
| 300 | $verbosity = $collectcfg->{'verbosity'};
|
---|
| 301 | } else {
|
---|
| 302 | $verbosity = 2; # the default
|
---|
| 303 | }
|
---|
| 304 | }
|
---|
[4766] | 305 | # we use searchtype for determining buildtype, but for old versions, use buildtype
|
---|
| 306 | if (defined $collectcfg->{'searchtype'}) {
|
---|
| 307 | $buildtype = "mgpp";
|
---|
| 308 | }
|
---|
| 309 | elsif (defined $collectcfg->{'buildtype'}) {
|
---|
[1853] | 310 | $buildtype = $collectcfg->{'buildtype'};
|
---|
[4766] | 311 | } else {
|
---|
| 312 | $buildtype = "mg"; #mg is the default
|
---|
[1853] | 313 | }
|
---|
[130] | 314 | if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
|
---|
| 315 | $archivedir = $collectcfg->{'archivedir'};
|
---|
| 316 | }
|
---|
| 317 | if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
|
---|
| 318 | $cachedir = $collectcfg->{'cachedir'};
|
---|
| 319 | }
|
---|
| 320 | if (defined $collectcfg->{'builddir'} && $builddir eq "") {
|
---|
| 321 | $builddir = $collectcfg->{'builddir'};
|
---|
| 322 | }
|
---|
[2355] | 323 | if ($maxdocs !~ /\-?\d+/) {
|
---|
| 324 | if (defined $collectcfg->{'maxdocs'} && $collectcfg->{'maxdocs'} =~ /\-?\d+/) {
|
---|
| 325 | $maxdocs = $collectcfg->{'maxdocs'};
|
---|
| 326 | } else {
|
---|
| 327 | $maxdocs = -1; # the default
|
---|
| 328 | }
|
---|
[2336] | 329 | }
|
---|
[2355] | 330 | if (defined $collectcfg->{'debug'} && $collectcfg->{'debug'} =~ /^true$/i) {
|
---|
| 331 | $debug = 1;
|
---|
| 332 | }
|
---|
| 333 | if ($mode !~ /^(all|compress_text|build_index|infodb)$/) {
|
---|
| 334 | if (defined $collectcfg->{'mode'} && $collectcfg->{'mode'} =~ /^(all|compress_text|build_index|infodb)$/) {
|
---|
| 335 | $mode = $collectcfg->{'mode'};
|
---|
| 336 | } else {
|
---|
| 337 | $mode = "all"; # the default
|
---|
| 338 | }
|
---|
| 339 | }
|
---|
| 340 | if (defined $collectcfg->{'index'} && $indexname eq "") {
|
---|
| 341 | $indexname = $collectcfg->{'index'};
|
---|
| 342 | }
|
---|
[2336] | 343 | if (defined $collectcfg->{'no_text'} && $no_text == 0) {
|
---|
[2355] | 344 | if ($collectcfg->{'no_text'} =~ /^true$/i) {
|
---|
[2336] | 345 | $no_text = 1;
|
---|
| 346 | }
|
---|
| 347 | }
|
---|
| 348 | if (defined $collectcfg->{'allclassifications'} && $allclassifications == 0) {
|
---|
[2355] | 349 | if ($collectcfg->{'allclassifications'} =~ /^true$/i) {
|
---|
[2336] | 350 | $allclassifications = 1;
|
---|
| 351 | }
|
---|
| 352 | }
|
---|
[2355] | 353 | if (defined $collectcfg->{'keepold'} && $collectcfg->{'keepold'} =~ /^true$/i) {
|
---|
| 354 | $keepold = 1;
|
---|
| 355 | }
|
---|
| 356 | if (defined $collectcfg->{'create_images'} && $collectcfg->{'create_images'} =~ /^true$/i) {
|
---|
| 357 | $create_images = 1;
|
---|
| 358 | }
|
---|
[1853] | 359 | if ($buildtype eq "mgpp" && defined $collectcfg->{'textcompress'}) {
|
---|
| 360 | $textindex = $collectcfg->{'textcompress'};
|
---|
| 361 | }
|
---|
| 362 |
|
---|
[130] | 363 | } else {
|
---|
| 364 | die "Couldn't find the configuration file $configfilename\n";
|
---|
[4] | 365 | }
|
---|
[2524] | 366 |
|
---|
[1853] | 367 | #set the text index
|
---|
| 368 | if ($buildtype eq "mgpp") {
|
---|
| 369 | if ($textindex eq "") {
|
---|
| 370 | $textindex = "text";
|
---|
| 371 | }
|
---|
| 372 | }
|
---|
| 373 | else {
|
---|
| 374 | $textindex = "section:text";
|
---|
| 375 | }
|
---|
| 376 |
|
---|
[1267] | 377 | # create default images if required
|
---|
| 378 | if ($create_images) {
|
---|
| 379 | my $collection_name = $collection;
|
---|
[3729] | 380 | $collection_name = $collectcfg->{'collectionmeta'}->{'collectionname'}->{'default'}
|
---|
| 381 | if defined $collectcfg->{'collectionmeta'}->{'collectionname'}->{'default'};
|
---|
[1267] | 382 | &create_images ($collection_name);
|
---|
| 383 | }
|
---|
| 384 |
|
---|
[4] | 385 | # fill in the default archives and building directories if none
|
---|
| 386 | # were supplied, turn all \ into / and remove trailing /
|
---|
[1454] | 387 | $archivedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives") if $archivedir eq "";
|
---|
[4] | 388 | $archivedir =~ s/[\\\/]+/\//g;
|
---|
| 389 | $archivedir =~ s/\/$//;
|
---|
[1454] | 390 | $builddir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "building") if $builddir eq "";
|
---|
[4] | 391 | $builddir =~ s/[\\\/]+/\//g;
|
---|
| 392 | $builddir =~ s/\/$//;
|
---|
| 393 |
|
---|
| 394 | # update the archive cache if needed
|
---|
| 395 | if ($cachedir) {
|
---|
[1424] | 396 | print $out "Updating archive cache\n" if ($verbosity >= 1);
|
---|
[4] | 397 |
|
---|
| 398 | $cachedir =~ s/[\\\/]+$//;
|
---|
| 399 | $cachedir .= "/collect/$collection" unless
|
---|
| 400 | $cachedir =~ /collect\/$collection/;
|
---|
| 401 |
|
---|
| 402 | $realarchivedir = "$cachedir/archives";
|
---|
| 403 | $realbuilddir = "$cachedir/building";
|
---|
| 404 | &util::mk_all_dir ($realarchivedir);
|
---|
| 405 | &util::mk_all_dir ($realbuilddir);
|
---|
| 406 | &util::cachedir ($archivedir, $realarchivedir, $verbosity);
|
---|
| 407 |
|
---|
| 408 | } else {
|
---|
| 409 | $realarchivedir = $archivedir;
|
---|
| 410 | $realbuilddir = $builddir;
|
---|
| 411 | }
|
---|
| 412 |
|
---|
[1644] | 413 | # build it in realbuilddir
|
---|
[4] | 414 | &util::mk_all_dir ($realbuilddir);
|
---|
| 415 |
|
---|
| 416 |
|
---|
| 417 | # if a builder class has been created for this collection, use it
|
---|
[1853] | 418 | # otherwise, use the mg or mgpp builder
|
---|
[130] | 419 | if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
|
---|
| 420 | $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
|
---|
[4] | 421 | $buildertype = "${collection}builder";
|
---|
| 422 | } else {
|
---|
[8] | 423 | $builderdir = "$ENV{'GSDLHOME'}/perllib";
|
---|
[1853] | 424 | if ($buildtype eq "mgpp") {
|
---|
| 425 | $buildertype = "mgppbuilder";
|
---|
| 426 | }
|
---|
| 427 | else {
|
---|
| 428 | $buildertype = "mgbuilder";
|
---|
| 429 | }
|
---|
[4] | 430 | }
|
---|
| 431 |
|
---|
| 432 | require "$builderdir/$buildertype.pm";
|
---|
| 433 |
|
---|
| 434 | eval("\$builder = new $buildertype(\$collection, " .
|
---|
[313] | 435 | "\$realarchivedir, \$realbuilddir, \$verbosity, " .
|
---|
[2336] | 436 | "\$maxdocs, \$debug, \$keepold, \$allclassifications, " .
|
---|
[2785] | 437 | "\$out, \$no_text, \$faillog)");
|
---|
[4] | 438 | die "$@" if $@;
|
---|
| 439 |
|
---|
| 440 | $builder->init();
|
---|
| 441 |
|
---|
[1853] | 442 | if ($buildertype eq "mgppbuilder" && $no_strip_html) {
|
---|
| 443 | $builder->set_strip_html(0);
|
---|
| 444 | }
|
---|
[783] | 445 | if ($mode =~ /^all$/i) {
|
---|
| 446 | $builder->compress_text($textindex);
|
---|
| 447 | $builder->build_indexes($indexname);
|
---|
| 448 | $builder->make_infodatabase();
|
---|
| 449 | $builder->collect_specific();
|
---|
| 450 | } elsif ($mode =~ /^compress_text$/i) {
|
---|
| 451 | $builder->compress_text($textindex);
|
---|
| 452 | } elsif ($mode =~ /^build_index$/i) {
|
---|
| 453 | $builder->build_indexes($indexname);
|
---|
| 454 | } elsif ($mode =~ /^infodb$/i) {
|
---|
| 455 | $builder->make_infodatabase();
|
---|
| 456 | } else {
|
---|
| 457 | die "unknown mode: $mode\n";
|
---|
| 458 | }
|
---|
[4] | 459 |
|
---|
[783] | 460 | $builder->make_auxiliary_files() if !$debug;
|
---|
| 461 | $builder->deinit();
|
---|
| 462 |
|
---|
| 463 | if (($realbuilddir ne $builddir) && !$debug) {
|
---|
[1424] | 464 | print $out "Copying back the cached build\n" if ($verbosity >= 1);
|
---|
[4] | 465 | &util::rm_r ($builddir);
|
---|
| 466 | &util::cp_r ($realbuilddir, $builddir);
|
---|
| 467 | }
|
---|
[1424] | 468 |
|
---|
| 469 | close OUT if $close_out;
|
---|
[2785] | 470 | close FAILLOG;
|
---|
[4] | 471 | }
|
---|
| 472 |
|
---|
[1267] | 473 | sub create_images {
|
---|
| 474 | my ($collection_name) = @_;
|
---|
[4] | 475 |
|
---|
[3729] | 476 | my $image_script = &util::filename_cat ($ENV{'GSDLHOME'}, "bin", "script", "gimp", "title_icon-1.2.pl");
|
---|
[1267] | 477 | if (!-e $image_script) {
|
---|
[1424] | 478 | print $out "WARNING: Image making script ($image_script) could not be found\n";
|
---|
| 479 | print $out " Default images will not be generated\n\n";
|
---|
[1267] | 480 | return;
|
---|
| 481 | }
|
---|
| 482 |
|
---|
| 483 | my $imagedir = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "images");
|
---|
| 484 |
|
---|
| 485 | &util::mk_all_dir ($imagedir);
|
---|
| 486 |
|
---|
| 487 | # create the images
|
---|
| 488 | system ("$image_script -size 1.5 -image_dir \"$imagedir\" -filename $collection.gif -text \"$collection_name\"");
|
---|
| 489 | system ("$image_script -image_dir \"$imagedir\" -filename ${collection}sm.gif -text \"$collection_name\"");
|
---|
| 490 |
|
---|
| 491 | # update the collect.cfg configuration file (this will need
|
---|
| 492 | # to be changed when the config file format changes)
|
---|
| 493 | if (!open (CFGFILE, $configfilename)) {
|
---|
[1424] | 494 | print $out "WARNING: Couldn't open config file ($configfilename)\n";
|
---|
| 495 | print $out " for updating so collection images may not be linked correctly\n";
|
---|
[1267] | 496 | return;
|
---|
| 497 | }
|
---|
| 498 |
|
---|
| 499 | my $line = ""; my $file = "";
|
---|
| 500 | my $found = 0; my $foundsm = 0;
|
---|
| 501 | while (defined ($line = <CFGFILE>)) {
|
---|
| 502 | if ($line =~ /collectionmeta\s+iconcollection\s+/) {
|
---|
[1309] | 503 | $line = "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n";
|
---|
[1267] | 504 | $found = 1;
|
---|
| 505 | } elsif ($line =~ /collectionmeta\s+iconcollectionsmall\s+/) {
|
---|
[1309] | 506 | $line = "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n";
|
---|
[1267] | 507 | $foundsm = 1;
|
---|
| 508 | }
|
---|
| 509 | $file .= $line;
|
---|
| 510 | }
|
---|
| 511 | close CFGFILE;
|
---|
| 512 |
|
---|
[1383] | 513 | $file .= "collectionmeta iconcollection _httpprefix_/collect/$collection/images/$collection.gif\n" if !$found;
|
---|
| 514 | $file .= "collectionmeta iconcollectionsmall _httpprefix_/collect/$collection/images/${collection}sm.gif\n" if !$foundsm;
|
---|
[1267] | 515 |
|
---|
| 516 | if (!open (CFGFILE, ">$configfilename")) {
|
---|
[1424] | 517 | print $out "WARNING: Couldn't open config file ($configfilename)\n";
|
---|
| 518 | print $out " for updating so collection images may not be linked correctly\n";
|
---|
[1267] | 519 | return;
|
---|
| 520 | }
|
---|
| 521 | print CFGFILE $file;
|
---|
| 522 | close CFGFILE;
|
---|
| 523 | }
|
---|
[1853] | 524 |
|
---|