source: main/trunk/greenstone2/bin/script/mkcol.pl

Last change on this file was 32292, checked in by ak19, 6 years ago

Making PDFv2Plugin the default plugin for PDFs when running mkcol.pl in GS3. This would break for GS2 unless pdf-box is installed, but the default plugin for PDFs in the plugin pipeline mkcol sets up for GS2 is now PDFv1Plugin. That means GS2 users will have to consciously choose to add PDFv2Plugin to their pipeline (and remove v1) after setting up the pdfbox extension for GS2.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.4 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# mkcol.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will setup a new collection from a model one. It does this by
30# copying the model, moving files to have the correct names, and replacing
31# text within the files to match the parameters.
32
33package mkcol;
34
35BEGIN {
36 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
37 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
39}
40
41use parse2;
42use util;
43use cfgread;
44use gsprintf 'gsprintf';
45use printusage;
46
47use strict;
48no strict 'subs'; # allow barewords (eg STDERR) as function arguments
49
50my $public_list =
51 [ { 'name' => "true",
52 'desc' => "{mkcol.public.true}"},
53 { 'name' => "false",
54 'desc' => "{mkcol.public.false}"}
55 ];
56
57my $win31compat_list =
58 [ { 'name' => "true",
59 'desc' => "{mkcol.win31compat.true}"},
60 { 'name' => "false",
61 'desc' => "{mkcol.win31compat.false}"}
62 ];
63
64my $buildtype_list =
65 [ { 'name' => "mgpp",
66 'desc' => "{mkcol.buildtype.mgpp}"},
67 { 'name' => "lucene",
68 'desc' => "{mkcol.buildtype.lucene}"},
69 { 'name' => "mg",
70 'desc' => "{mkcol.buildtype.mg}"}
71 ];
72
73my $infodbtype_list =
74 [ { 'name' => "gdbm",
75 'desc' => "{mkcol.infodbtype.gdbm}"},
76 { 'name' => "sqlite",
77 'desc' => "{mkcol.infodbtype.sqlite}"},
78 { 'name' => "jdbm",
79 'desc' => "{mkcol.infodbtype.jdbm}"},
80 { 'name' => "mssql",
81 'desc' => "{mkcol.infodbtype.mssql}"},
82 { 'name' => "gdbm-txtgz",
83 'desc' => "{mkcol.infodbtype.gdbm-txtgz}"}
84 ];
85
86my $arguments =
87 [ { 'name' => "creator",
88 'desc' => "{mkcol.creator}",
89 'type' => "string",
90 'reqd' => "no" },
91 { 'name' => "optionfile",
92 'desc' => "{mkcol.optionfile}",
93 'type' => "string",
94 'reqd' => "no" },
95 { 'name' => "maintainer",
96 'desc' => "{mkcol.maintainer}",
97 'type' => "string",
98 'reqd' => "no" },
99 { 'name' => "group",
100 'desc' => "{mkcol.group}",
101 'type' => "flag",
102 'reqd' => "no" },
103 # For gs3, either -collectdir and -gs3mode (deprecated), or -site must be provided in order to locate the right collect directory and create a gs3 collection.
104 { 'name' => "gs3mode",
105 'desc' => "{mkcol.gs3mode}",
106 'type' => "flag",
107 'reqd' => "no" },
108 { 'name' => "collectdir",
109 'desc' => "{mkcol.collectdir}",
110 'type' => "string",
111 'reqd' => "no" },
112 { 'name' => "site",
113 'desc' => "{mkcol.site}",
114 'type' => "string",
115 'reqd' => "no" },
116 { 'name' => "public",
117 'desc' => "{mkcol.public}",
118 'type' => "enum",
119 'deft' => "true",
120 'list' => $public_list,
121 'reqd' => "no" },
122 { 'name' => "title",
123 'desc' => "{mkcol.title}",
124 'type' => "string",
125 'reqd' => "no" },
126 { 'name' => "about",
127 'desc' => "{mkcol.about}",
128 'type' => "string",
129 'reqd' => "no" },
130 { 'name' => "buildtype",
131 'desc' => "{mkcol.buildtype}",
132 'type' => "enum",
133 'deft' => "mgpp",
134 'list' => $buildtype_list,
135 'reqd' => "no" },
136 { 'name' => "infodbtype",
137 'desc' => "{mkcol.infodbtype}",
138 'type' => "enum",
139 'deft' => "gdbm",
140 'list' => $infodbtype_list,
141 'reqd' => "no" },
142 { 'name' => "plugin",
143 'desc' => "{mkcol.plugin}",
144 'type' => "string",
145 'reqd' => "no" },
146 { 'name' => "quiet",
147 'desc' => "{mkcol.quiet}",
148 'type' => "flag",
149 'reqd' => "no" },
150 { 'name' => "language",
151 'desc' => "{scripts.language}",
152 'type' => "string",
153 'reqd' => "no" },
154 { 'name' => "win31compat",
155 'desc' => "{mkcol.win31compat}",
156 'type' => "enum",
157 'deft' => "false",
158 'list' => $win31compat_list,
159 'reqd' => "no" },
160 { 'name' => "gli",
161 'desc' => "",
162 'type' => "flag",
163 'reqd' => "no",
164 'hiddengli' => "yes" },
165 { 'name' => "xml",
166 'desc' => "{scripts.xml}",
167 'type' => "flag",
168 'reqd' => "no",
169 'hiddengli' => "yes" }
170 ];
171
172my $options = { 'name' => "mkcol.pl",
173 'desc' => "{mkcol.desc}",
174 'args' => $arguments };
175
176# options
177my ($creator, $optionfile, $maintainer, $gs3mode, $group, $collectdir, $site,
178 $public, $title, $about, $buildtype, $infodbtype, $plugin, $quiet,
179 $language, $win31compat, $gli);
180
181#other variables
182my ($collection, $capcollection,
183 $collection_tail, $capcollection_tail,
184 $pluginstring, @plugin);
185
186&main();
187
188
189sub traverse_dir
190{
191 my ($modeldir, $coldir) = @_;
192 my ($newfile, @filetext);
193
194 if (!(-e $coldir)) {
195
196
197 my $store_umask = umask(0002);
198 my $mkdir_ok = mkdir ($coldir, 0777);
199 umask($store_umask);
200
201 if (!$mkdir_ok)
202 {
203 die "$!";
204 }
205 }
206
207 opendir(DIR, $modeldir) ||
208 (&gsprintf(STDERR, "{common.cannot_read}\n", $modeldir) && die);
209 my @files = grep(!/^(\.\.?|CVS|\.svn)$/, readdir(DIR));
210 closedir(DIR);
211
212 foreach my $file (@files)
213 {
214 if ($file =~ /^macros$/) {
215
216 # don't want macros folder for gs3mode
217 next if $gs3mode;
218 }
219 if ($file =~ /^import$/) {
220 # don't want import for group
221 next if $group;
222 }
223
224 my $thisfile = &util::filename_cat ($modeldir, $file);
225
226 if (-d $thisfile) {
227 my $colfiledir = &util::filename_cat ($coldir, $file);
228 traverse_dir ($thisfile, $colfiledir);
229
230 } else {
231
232 next if ($file =~ /~$/);
233
234 my $destfile = $file;
235 $destfile =~ s/^modelcol/$collection/;
236 $destfile =~ s/^MODELCOL/$capcollection/;
237
238 # There are three configuration files in modelcol directory:
239 # collect.cfg, group.cfg and collectionConfig.xml.
240 # If it is gs2, copy relevant collect.cfg or group.cfg file; if gs3, copy collectionConfig.xml.
241
242 if ($file =~ /^collect\.cfg$/) {
243 next if ($gs3mode || $group);
244 }
245 elsif ($file =~ /^group\.cfg$/) {
246 next unless $group;
247 $destfile =~ s/group\.cfg/collect\.cfg/;
248 }
249 elsif ($file =~ /^collectionConfig\.xml$/) {
250 next unless $gs3mode;
251 }
252
253 &gsprintf(STDOUT, "{mkcol.doing_replacements}\n", $destfile)
254 unless $quiet;
255 $destfile = &util::filename_cat ($coldir, $destfile);
256
257 open (INFILE, $thisfile) ||
258 (&gsprintf(STDERR, "{common.cannot_read_file}\n", $thisfile) && die);
259 open (OUTFILE, ">$destfile") ||
260 (&gsprintf(STDERR, "{common.cannot_create_file}\n", $destfile) && die);
261
262 while (defined (my $line = <INFILE>)) {
263 $line =~ s/\*\*collection\*\*/$collection_tail/g;
264 $line =~ s/\*\*COLLECTION\*\*/$capcollection_tail/g;
265 $line =~ s/\*\*creator\*\*/$creator/g;
266 $line =~ s/\*\*maintainer\*\*/$maintainer/g;
267 $line =~ s/\*\*public\*\*/$public/g;
268 $line =~ s/\*\*title\*\*/$title/g;
269 $line =~ s/\*\*about\*\*/$about/g;
270 $line =~ s/\*\*buildtype\*\*/$buildtype/g;
271 $line =~ s/\*\*infodbtype\*\*/$infodbtype/g;
272 if (!$gs3mode) {
273 $line =~ s/\*\*plugins\*\*/$pluginstring/g;
274 }
275
276 print OUTFILE $line;
277 }
278
279 close (OUTFILE);
280 close (INFILE);
281 }
282 }
283}
284
285
286sub main {
287
288 my $xml = 0;
289
290
291 my $hashParsingResult = {};
292 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
293
294 # If parse returns -1 then something has gone wrong
295 if ($intArgLeftinAfterParsing == -1)
296 {
297 &PrintUsage::print_txt_usage($options, "{mkcol.params}");
298 die "\n";
299 }
300
301 foreach my $strVariable (keys %$hashParsingResult)
302 {
303 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
304 }
305
306 # If $language has been specified, load the appropriate resource bundle
307 # (Otherwise, the default resource bundle will be loaded automatically)
308 if ($language && $language =~ /\S/) {
309 &gsprintf::load_language_specific_resource_bundle($language);
310 }
311
312 if ($xml) {
313 &PrintUsage::print_xml_usage($options);
314 print "\n";
315 return;
316 }
317
318 if ($gli) { # the gli wants strings to be in UTF-8
319 &gsprintf::output_strings_in_UTF8;
320 }
321
322 # now check that we had exactly one leftover arg, which should be
323 # the collection name. We don't want to do this earlier, cos
324 # -xml arg doesn't need a collection name
325 # Or if the user specified -h, then we output the usage also
326 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
327 {
328 &PrintUsage::print_txt_usage($options, "{mkcol.params}");
329 die "\n";
330 }
331
332 if ($optionfile =~ /\w/) {
333 open (OPTIONS, $optionfile) ||
334 (&gsprintf(STDERR, "{common.cannot_open}\n", $optionfile) && die);
335 my $line = [];
336 my $options = [];
337 while (defined ($line = &cfgread::read_cfg_line ('mkcol::OPTIONS'))) {
338 push (@$options, @$line);
339 }
340 close OPTIONS;
341 my $optionsParsingResult = {};
342 if (parse2::parse($options,$arguments,$optionsParsingResult) == -1) {
343 &PrintUsage::print_txt_usage($options, "{mkcol.params}");
344 die "\n";
345 }
346
347 foreach my $strVariable (keys %$optionsParsingResult)
348 {
349 eval "\$$strVariable = \$optionsParsingResult->{\"\$strVariable\"}";
350 }
351 }
352
353 # load default plugins if none were on command line
354 if (!scalar(@plugin)) {
355 my $pdfplugin = ($gs3mode || $site) ? "PDFv2Plugin" : "PDFv1Plugin";
356 @plugin = (ZIPPlugin,GreenstoneXMLPlugin,TextPlugin,HTMLPlugin,EmailPlugin,
357 $pdfplugin,RTFPlugin,WordPlugin,PostScriptPlugin,PowerPointPlugin,ExcelPlugin,ImagePlugin,ISISPlugin,NulPlugin,EmbeddedMetadataPlugin,MetadataXMLPlugin,ArchivesInfPlugin,DirectoryPlugin);
358 }
359
360 # get and check the collection name
361 ($collection) = @ARGV;
362
363 # get capitalised version of the collection
364 $capcollection = $collection;
365 $capcollection =~ tr/a-z/A-Z/;
366
367 $collection_tail = &util::get_dirsep_tail($collection);
368 $capcollection_tail = &util::get_dirsep_tail($capcollection);
369
370
371 if (!defined($collection)) {
372 &gsprintf(STDOUT, "{mkcol.no_colname}\n");
373 &PrintUsage::print_txt_usage($options, "{mkcol.params}");
374 die "\n";
375 }
376
377 if (($win31compat eq "true") && (length($collection_tail)) > 8) {
378 &gsprintf(STDOUT, "{mkcol.long_colname}\n");
379 die "\n";
380 }
381
382 if ($collection eq "modelcol") {
383 &gsprintf(STDOUT, "{mkcol.bad_name_modelcol}\n");
384 die "\n";
385 }
386
387 if ($collection_tail eq "CVS") {
388 &gsprintf(STDOUT, "{mkcol.bad_name_cvs}\n");
389 die "\n";
390 }
391
392 if ($collection_tail eq ".svn") {
393 &gsprintf(STDOUT, "{mkcol.bad_name_svn}\n");
394 die "\n";
395 }
396
397 if (defined($creator) && (!defined($maintainer) || $maintainer eq "")) {
398 $maintainer = $creator;
399 }
400
401 $public = "true" unless defined $public;
402
403 if (!defined($title) || $title eq "") {
404 $title = $collection_tail;
405 }
406
407 if ($gs3mode && $group) {
408 &gsprintf(STDERR,"{mkcol.group_not_valid_in_gs3}\n");
409 die "\n";
410 }
411
412 # get the strings to include.
413 $pluginstring = "";
414 foreach my $plug (@plugin) {
415 $pluginstring .= "plugin $plug\n";
416 }
417
418 if ($gs3mode) {
419 if (!defined $site) {
420 print STDERR "Warning: -gs3mode is deprecated.\n";
421 print STDERR "Use -site <name> instead to create a Greenstone 3 collection\n";
422 }
423 }
424 else {
425 # gs3mode not set
426 if (defined $site && $site =~ /\w/) {
427 # Using -site, so -gs3mode implicitly is true
428 $gs3mode = 1;
429 }
430 }
431
432 my $mdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", "modelcol");
433 my $cdir;
434 if (defined $collectdir && $collectdir =~ /\w/) {
435 if (!-d $collectdir) {
436 &gsprintf(STDOUT, "{mkcol.no_collectdir}\n", $collectdir);
437 die "\n";
438 }
439 $cdir = &util::filename_cat ($collectdir, $collection);
440 } else {
441 if (!$gs3mode) {
442 $cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", $collection);
443 }else {
444 if (defined $site && $site =~ /\w/) {
445 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
446
447 $cdir = &util::filename_cat($ENV{'GSDL3HOME'}, "sites", $site, "collect");
448 if (!-d $cdir) {
449 &gsprintf(STDOUT, "{mkcol.no_collectdir}\n", $cdir);
450 die "\n";
451 }
452 $cdir = &util::filename_cat ($cdir, $collection);
453 } else {
454 &gsprintf(STDOUT, "{mkcol.no_collectdir_specified}\n");
455 die "\n";
456 }
457 }
458 }
459
460 # make sure the model collection exists
461 (&gsprintf(STDERR, "{mkcol.cannot_find_modelcol}\n", $mdir) && die) unless (-d $mdir);
462
463 # make sure this collection does not already exist
464 if (-e $cdir) {
465 &gsprintf(STDOUT, "{mkcol.col_already_exists}\n");
466 die "\n";
467 }
468
469 # start creating the collection
470 &gsprintf(STDOUT, "\n{mkcol.creating_col}...\n", $collection)
471 unless $quiet;
472
473 &traverse_dir ($mdir, $cdir);
474 &gsprintf(STDOUT, "\n{mkcol.success}\n", $cdir)
475 unless $quiet;
476}
477
478
Note: See TracBrowser for help on using the repository browser.