source: main/trunk/greenstone2/bin/script/mkcol.pl@ 24414

Last change on this file since 24414 was 24414, checked in by ak19, 13 years ago

To do with EmbeddedMetadataPlugin: 1.mkcol.pl and GLI changes puts the plugin in the bottom four plugins of the plugin pipeline. 2. EmbeddedMetadataPlugin and PDFPlugin are modified to work together again after the recent changes (introduction of overridable BasePlugin method can_process_file_for_metadata) which were needed to get the EmbeddedMetadataPlugin and OAIPlugin to work together.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 13.3 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# mkcol.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will setup a new collection from a model one. It does this by
30# copying the model, moving files to have the correct names, and replacing
31# text within the files to match the parameters.
32
33package mkcol;
34
35BEGIN {
36 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
37 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
39}
40
41use parse2;
42use util;
43use cfgread;
44use gsprintf 'gsprintf';
45use printusage;
46
47use strict;
48no strict 'subs'; # allow barewords (eg STDERR) as function arguments
49
50my $public_list =
51 [ { 'name' => "true",
52 'desc' => "{mkcol.public.true}"},
53 { 'name' => "false",
54 'desc' => "{mkcol.public.false}"}
55 ];
56
57my $win31compat_list =
58 [ { 'name' => "true",
59 'desc' => "{mkcol.win31compat.true}"},
60 { 'name' => "false",
61 'desc' => "{mkcol.win31compat.false}"}
62 ];
63
64my $buildtype_list =
65 [ { 'name' => "mgpp",
66 'desc' => "{mkcol.buildtype.mgpp}"},
67 { 'name' => "lucene",
68 'desc' => "{mkcol.buildtype.lucene}"},
69 { 'name' => "mg",
70 'desc' => "{mkcol.buildtype.mg}"}
71 ];
72
73my $infodbtype_list =
74 [ { 'name' => "gdbm",
75 'desc' => "{mkcol.infodbtype.gdbm}"},
76 { 'name' => "sqlite",
77 'desc' => "{mkcol.infodbtype.sqlite}"},
78 { 'name' => "jdbm",
79 'desc' => "{mkcol.infodbtype.jdbm}"},
80 { 'name' => "mssql",
81 'desc' => "{mkcol.infodbtype.mssql}"},
82 { 'name' => "gdbm-txtgz",
83 'desc' => "{mkcol.infodbtype.gdbm-txtgz}"}
84 ];
85
86my $arguments =
87 [ { 'name' => "creator",
88 'desc' => "{mkcol.creator}",
89 'type' => "string",
90 'reqd' => "no" },
91 { 'name' => "optionfile",
92 'desc' => "{mkcol.optionfile}",
93 'type' => "string",
94 'reqd' => "no" },
95 { 'name' => "maintainer",
96 'desc' => "{mkcol.maintainer}",
97 'type' => "string",
98 'reqd' => "no" },
99 { 'name' => "group",
100 'desc' => "{mkcol.group}",
101 'type' => "flag",
102 'reqd' => "no" },
103 # For gs3, either -collectdir and -gs3mode (deprecated), or -site must be provided in order to locate the right collect directory and create a gs3 collection.
104 { 'name' => "gs3mode",
105 'desc' => "{mkcol.gs3mode}",
106 'type' => "flag",
107 'reqd' => "no" },
108 { 'name' => "collectdir",
109 'desc' => "{mkcol.collectdir}",
110 'type' => "string",
111 'reqd' => "no" },
112 { 'name' => "site",
113 'desc' => "{mkcol.site}",
114 'type' => "string",
115 'reqd' => "no" },
116 { 'name' => "public",
117 'desc' => "{mkcol.public}",
118 'type' => "enum",
119 'deft' => "true",
120 'list' => $public_list,
121 'reqd' => "no" },
122 { 'name' => "title",
123 'desc' => "{mkcol.title}",
124 'type' => "string",
125 'reqd' => "no" },
126 { 'name' => "about",
127 'desc' => "{mkcol.about}",
128 'type' => "string",
129 'reqd' => "no" },
130 { 'name' => "buildtype",
131 'desc' => "{mkcol.buildtype}",
132 'type' => "enum",
133 'deft' => "mgpp",
134 'list' => $buildtype_list,
135 'reqd' => "no" },
136 { 'name' => "infodbtype",
137 'desc' => "{mkcol.infodbtype}",
138 'type' => "enum",
139 'deft' => "gdbm",
140 'list' => $infodbtype_list,
141 'reqd' => "no" },
142 { 'name' => "plugin",
143 'desc' => "{mkcol.plugin}",
144 'type' => "string",
145 'reqd' => "no" },
146 { 'name' => "quiet",
147 'desc' => "{mkcol.quiet}",
148 'type' => "flag",
149 'reqd' => "no" },
150 { 'name' => "language",
151 'desc' => "{scripts.language}",
152 'type' => "string",
153 'reqd' => "no" },
154 { 'name' => "win31compat",
155 'desc' => "{mkcol.win31compat}",
156 'type' => "enum",
157 'deft' => "false",
158 'list' => $win31compat_list,
159 'reqd' => "no" },
160 { 'name' => "gli",
161 'desc' => "",
162 'type' => "flag",
163 'reqd' => "no",
164 'hiddengli' => "yes" },
165 { 'name' => "xml",
166 'desc' => "{scripts.xml}",
167 'type' => "flag",
168 'reqd' => "no",
169 'hiddengli' => "yes" }
170 ];
171
172my $options = { 'name' => "mkcol.pl",
173 'desc' => "{mkcol.desc}",
174 'args' => $arguments };
175
176# options
177my ($creator, $optionfile, $maintainer, $gs3mode, $group, $collectdir, $site,
178 $public, $title, $about, $buildtype, $infodbtype, $plugin, $quiet,
179 $language, $win31compat, $gli);
180
181#other variables
182my ($collection, $capcollection,
183 $collection_tail, $capcollection_tail,
184 $pluginstring, @plugin);
185
186&main();
187
188
189sub traverse_dir
190{
191 my ($modeldir, $coldir) = @_;
192 my ($newfile, @filetext);
193
194 if (!(-e $coldir)) {
195
196
197 my $store_umask = umask(0002);
198 my $mkdir_ok = mkdir ($coldir, 0777);
199 umask($store_umask);
200
201 if (!$mkdir_ok)
202 {
203 die "$!";
204 }
205 }
206
207 opendir(DIR, $modeldir) ||
208 (&gsprintf(STDERR, "{common.cannot_read}\n", $modeldir) && die);
209 my @files = grep(!/^(\.\.?|CVS|\.svn)$/, readdir(DIR));
210 closedir(DIR);
211
212 foreach my $file (@files)
213 {
214 if ($file =~ /^macros$/) {
215
216 # don't want macros folder for gs3mode
217 next if $gs3mode;
218 }
219 if ($file =~ /^import$/) {
220 # don't want import for group
221 next if $group;
222 }
223
224 my $thisfile = &util::filename_cat ($modeldir, $file);
225
226 if (-d $thisfile) {
227 my $colfiledir = &util::filename_cat ($coldir, $file);
228 traverse_dir ($thisfile, $colfiledir);
229
230 } else {
231
232 next if ($file =~ /~$/);
233
234 my $destfile = $file;
235 $destfile =~ s/^modelcol/$collection/;
236 $destfile =~ s/^MODELCOL/$capcollection/;
237
238 # There are three configuration files in modelcol directory:
239 # collect.cfg, group.cfg and collectionConfig.xml.
240 # If it is gs2, copy relevant collect.cfg or group.cfg file; if gs3, copy collectionConfig.xml.
241
242 if ($file =~ /^collect\.cfg$/) {
243 next if ($gs3mode || $group);
244 }
245 elsif ($file =~ /^group\.cfg$/) {
246 next unless $group;
247 $destfile =~ s/group\.cfg/collect\.cfg/;
248 }
249 elsif ($file =~ /^collectionConfig\.xml$/) {
250 next unless $gs3mode;
251 }
252
253 &gsprintf(STDOUT, "{mkcol.doing_replacements}\n", $destfile)
254 unless $quiet;
255 $destfile = &util::filename_cat ($coldir, $destfile);
256
257 open (INFILE, $thisfile) ||
258 (&gsprintf(STDERR, "{common.cannot_read_file}\n", $thisfile) && die);
259 open (OUTFILE, ">$destfile") ||
260 (&gsprintf(STDERR, "{common.cannot_create_file}\n", $destfile) && die);
261
262 while (defined (my $line = <INFILE>)) {
263 $line =~ s/\*\*collection\*\*/$collection_tail/g;
264 $line =~ s/\*\*COLLECTION\*\*/$capcollection_tail/g;
265 $line =~ s/\*\*creator\*\*/$creator/g;
266 $line =~ s/\*\*maintainer\*\*/$maintainer/g;
267 $line =~ s/\*\*public\*\*/$public/g;
268 $line =~ s/\*\*title\*\*/$title/g;
269 $line =~ s/\*\*about\*\*/$about/g;
270 $line =~ s/\*\*buildtype\*\*/$buildtype/g;
271 $line =~ s/\*\*infodbtype\*\*/$infodbtype/g;
272 if (!$gs3mode) {
273 $line =~ s/\*\*plugins\*\*/$pluginstring/g;
274 }
275
276 print OUTFILE $line;
277 }
278
279 close (OUTFILE);
280 close (INFILE);
281 }
282 }
283}
284
285
286sub main {
287
288 my $xml = 0;
289
290
291 my $hashParsingResult = {};
292 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
293
294 # If parse returns -1 then something has gone wrong
295 if ($intArgLeftinAfterParsing == -1)
296 {
297 &PrintUsage::print_txt_usage($options, "{mkcol.params}");
298 die "\n";
299 }
300
301 foreach my $strVariable (keys %$hashParsingResult)
302 {
303 eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
304 }
305
306 # If $language has been specified, load the appropriate resource bundle
307 # (Otherwise, the default resource bundle will be loaded automatically)
308 if ($language && $language =~ /\S/) {
309 &gsprintf::load_language_specific_resource_bundle($language);
310 }
311
312 if ($xml) {
313 &PrintUsage::print_xml_usage($options);
314 print "\n";
315 return;
316 }
317
318 if ($gli) { # the gli wants strings to be in UTF-8
319 &gsprintf::output_strings_in_UTF8;
320 }
321
322 # now check that we had exactly one leftover arg, which should be
323 # the collection name. We don't want to do this earlier, cos
324 # -xml arg doesn't need a collection name
325 # Or if the user specified -h, then we output the usage also
326 if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
327 {
328 &PrintUsage::print_txt_usage($options, "{mkcol.params}");
329 die "\n";
330 }
331
332 if ($optionfile =~ /\w/) {
333 open (OPTIONS, $optionfile) ||
334 (&gsprintf(STDERR, "{common.cannot_open}\n", $optionfile) && die);
335 my $line = [];
336 my $options = [];
337 while (defined ($line = &cfgread::read_cfg_line ('mkcol::OPTIONS'))) {
338 push (@$options, @$line);
339 }
340 close OPTIONS;
341 my $optionsParsingResult = {};
342 if (parse2::parse($options,$arguments,$optionsParsingResult) == -1) {
343 &PrintUsage::print_txt_usage($options, "{mkcol.params}");
344 die "\n";
345 }
346
347 foreach my $strVariable (keys %$optionsParsingResult)
348 {
349 eval "\$$strVariable = \$optionsParsingResult->{\"\$strVariable\"}";
350 }
351 }
352
353 # load default plugins if none were on command line
354 if (!scalar(@plugin)) {
355 @plugin = (ZIPPlugin,GreenstoneXMLPlugin,TextPlugin,HTMLPlugin,EmailPlugin,
356 PDFPlugin,RTFPlugin,WordPlugin,PostScriptPlugin,PowerPointPlugin,ExcelPlugin,ImagePlugin,ISISPlugin,NulPlugin,EmbeddedMetadataPlugin,MetadataXMLPlugin,ArchivesInfPlugin,DirectoryPlugin);
357 }
358
359 # get and check the collection name
360 ($collection) = @ARGV;
361
362 # get capitalised version of the collection
363 $capcollection = $collection;
364 $capcollection =~ tr/a-z/A-Z/;
365
366 $collection_tail = &util::get_dirsep_tail($collection);
367 $capcollection_tail = &util::get_dirsep_tail($capcollection);
368
369
370 if (!defined($collection)) {
371 &gsprintf(STDOUT, "{mkcol.no_colname}\n");
372 &PrintUsage::print_txt_usage($options, "{mkcol.params}");
373 die "\n";
374 }
375
376 if (($win31compat eq "true") && (length($collection_tail)) > 8) {
377 &gsprintf(STDOUT, "{mkcol.long_colname}\n");
378 die "\n";
379 }
380
381 if ($collection eq "modelcol") {
382 &gsprintf(STDOUT, "{mkcol.bad_name_modelcol}\n");
383 die "\n";
384 }
385
386 if ($collection_tail eq "CVS") {
387 &gsprintf(STDOUT, "{mkcol.bad_name_cvs}\n");
388 die "\n";
389 }
390
391 if ($collection_tail eq ".svn") {
392 &gsprintf(STDOUT, "{mkcol.bad_name_svn}\n");
393 die "\n";
394 }
395
396 if (defined($creator) && (!defined($maintainer) || $maintainer eq "")) {
397 $maintainer = $creator;
398 }
399
400 $public = "true" unless defined $public;
401
402 if (!defined($title) || $title eq "") {
403 $title = $collection_tail;
404 }
405
406 if ($gs3mode && $group) {
407 &gsprintf(STDERR,"{mkcol.group_not_valid_in_gs3}\n");
408 die "\n";
409 }
410
411 # get the strings to include.
412 $pluginstring = "";
413 foreach my $plug (@plugin) {
414 $pluginstring .= "plugin $plug\n";
415 }
416
417 if ($gs3mode) {
418 if (!defined $site) {
419 print STDERR "Warning: -gs3mode is deprecated.\n";
420 print STDERR "Use -site <name> instead to create a Greenstone 3 collection\n";
421 }
422 }
423 else {
424 # gs3mode not set
425 if (defined $site && $site =~ /\w/) {
426 # Using -site, so -gs3mode implicitly is true
427 $gs3mode = 1;
428 }
429 }
430
431 my $mdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", "modelcol");
432 my $cdir;
433 if (defined $collectdir && $collectdir =~ /\w/) {
434 if (!-d $collectdir) {
435 &gsprintf(STDOUT, "{mkcol.no_collectdir}\n", $collectdir);
436 die "\n";
437 }
438 $cdir = &util::filename_cat ($collectdir, $collection);
439 } else {
440 if (!$gs3mode) {
441 $cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", $collection);
442 }else {
443 if (defined $site && $site =~ /\w/) {
444 die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
445
446 $cdir = &util::filename_cat($ENV{'GSDL3HOME'}, "sites", $site, "collect");
447 if (!-d $cdir) {
448 &gsprintf(STDOUT, "{mkcol.no_collectdir}\n", $cdir);
449 die "\n";
450 }
451 $cdir = &util::filename_cat ($cdir, $collection);
452 } else {
453 &gsprintf(STDOUT, "{mkcol.no_collectdir_specified}\n");
454 die "\n";
455 }
456 }
457 }
458
459 # make sure the model collection exists
460 (&gsprintf(STDERR, "{mkcol.cannot_find_modelcol}\n", $mdir) && die) unless (-d $mdir);
461
462 # make sure this collection does not already exist
463 if (-e $cdir) {
464 &gsprintf(STDOUT, "{mkcol.col_already_exists}\n");
465 die "\n";
466 }
467
468 # start creating the collection
469 &gsprintf(STDOUT, "\n{mkcol.creating_col}...\n", $collection)
470 unless $quiet;
471
472 &traverse_dir ($mdir, $cdir);
473 &gsprintf(STDOUT, "\n{mkcol.success}\n", $cdir)
474 unless $quiet;
475}
476
477
Note: See TracBrowser for help on using the repository browser.