root/main/trunk/greenstone2/bin/script/mkcol.pl @ 32292

Revision 32292, 13.4 KB (checked in by ak19, 10 months ago)

Making PDFv2Plugin the default plugin for PDFs when running mkcol.pl in GS3. This would break for GS2 unless pdf-box is installed, but the default plugin for PDFs in the plugin pipeline mkcol sets up for GS2 is now PDFv1Plugin. That means GS2 users will have to consciously choose to add PDFv2Plugin to their pipeline (and remove v1) after setting up the pdfbox extension for GS2.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# mkcol.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will setup a new collection from a model one. It does this by
30# copying the model, moving files to have the correct names, and replacing
31# text within the files to match the parameters.
32
33package mkcol;
34
35BEGIN {
36    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
37    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
38    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
39}
40
41use parse2;
42use util;
43use cfgread;
44use gsprintf 'gsprintf';
45use printusage;
46
47use strict;
48no strict 'subs'; # allow barewords (eg STDERR) as function arguments
49
50my $public_list =
51    [ { 'name' => "true",
52    'desc' => "{mkcol.public.true}"},
53      { 'name' => "false",
54    'desc' => "{mkcol.public.false}"}
55      ];
56
57my $win31compat_list =
58    [ { 'name' => "true",
59    'desc' => "{mkcol.win31compat.true}"},
60      { 'name' => "false",
61    'desc' => "{mkcol.win31compat.false}"}
62      ];
63
64my $buildtype_list =
65    [ { 'name' => "mgpp",
66    'desc' => "{mkcol.buildtype.mgpp}"},
67      { 'name' => "lucene",
68    'desc' => "{mkcol.buildtype.lucene}"},
69      { 'name' => "mg",
70    'desc' => "{mkcol.buildtype.mg}"}
71      ];
72
73my $infodbtype_list =
74    [ { 'name' => "gdbm",
75    'desc' => "{mkcol.infodbtype.gdbm}"},
76      { 'name' => "sqlite",
77    'desc' => "{mkcol.infodbtype.sqlite}"},
78      { 'name' => "jdbm",
79    'desc' => "{mkcol.infodbtype.jdbm}"},
80      { 'name' => "mssql",
81    'desc' => "{mkcol.infodbtype.mssql}"},
82      { 'name' => "gdbm-txtgz",
83    'desc' => "{mkcol.infodbtype.gdbm-txtgz}"}
84      ];
85
86my $arguments =
87    [ { 'name' => "creator",
88    'desc' => "{mkcol.creator}",
89    'type' => "string",
90    'reqd' => "no" },
91      { 'name' => "optionfile",
92    'desc' => "{mkcol.optionfile}",
93    'type' => "string",
94    'reqd' => "no" },
95      { 'name' => "maintainer",
96    'desc' => "{mkcol.maintainer}",
97    'type' => "string",
98    'reqd' => "no" },
99      { 'name' => "group",
100    'desc' => "{mkcol.group}",
101    'type' => "flag",
102    'reqd' => "no" },
103      # For gs3, either -collectdir and -gs3mode (deprecated), or -site must be provided in order to locate the right collect directory and create a gs3 collection.
104      { 'name' => "gs3mode",
105    'desc' => "{mkcol.gs3mode}",
106    'type' => "flag",
107    'reqd' => "no" },
108      { 'name' => "collectdir",
109    'desc' => "{mkcol.collectdir}",
110    'type' => "string",
111    'reqd' => "no" },
112      { 'name' => "site",
113    'desc' => "{mkcol.site}",
114    'type' => "string",
115    'reqd' => "no" },
116      { 'name' => "public",
117    'desc' => "{mkcol.public}",
118    'type' => "enum",
119    'deft' => "true",
120    'list' => $public_list,
121    'reqd' => "no" },
122      { 'name' => "title",
123    'desc' => "{mkcol.title}",
124    'type' => "string",
125    'reqd' => "no" },
126      { 'name' => "about",
127    'desc' => "{mkcol.about}",
128    'type' => "string",
129    'reqd' => "no" },
130      { 'name' => "buildtype",
131    'desc' => "{mkcol.buildtype}",
132    'type' => "enum",
133    'deft' => "mgpp",
134    'list' => $buildtype_list,
135    'reqd' => "no" },
136      { 'name' => "infodbtype",
137    'desc' => "{mkcol.infodbtype}",
138    'type' => "enum",
139    'deft' => "gdbm",
140    'list' => $infodbtype_list,
141    'reqd' => "no" },
142      { 'name' => "plugin",
143    'desc' => "{mkcol.plugin}",
144    'type' => "string",
145    'reqd' => "no" },
146      { 'name' => "quiet",
147    'desc' => "{mkcol.quiet}",
148    'type' => "flag",
149    'reqd' => "no" },
150      { 'name' => "language",
151    'desc' => "{scripts.language}",
152    'type' => "string",
153    'reqd' => "no" },
154      { 'name' => "win31compat",
155    'desc' => "{mkcol.win31compat}",
156    'type' => "enum",
157    'deft' => "false",
158    'list' => $win31compat_list,
159    'reqd' => "no" },
160      { 'name' => "gli",
161    'desc' => "",
162    'type' => "flag",
163    'reqd' => "no",
164    'hiddengli' => "yes" },
165      { 'name' => "xml",
166    'desc' => "{scripts.xml}",
167    'type' => "flag",
168    'reqd' => "no",
169    'hiddengli' => "yes" }
170      ];
171
172my $options = { 'name' => "mkcol.pl",
173        'desc' => "{mkcol.desc}",
174        'args' => $arguments };
175
176# options
177my ($creator, $optionfile, $maintainer, $gs3mode, $group, $collectdir, $site,
178    $public, $title, $about, $buildtype, $infodbtype, $plugin, $quiet,
179    $language, $win31compat, $gli);
180
181#other variables
182my ($collection, $capcollection,
183    $collection_tail, $capcollection_tail,
184    $pluginstring, @plugin);
185
186&main();
187
188
189sub traverse_dir
190{
191    my ($modeldir, $coldir) = @_;
192    my ($newfile, @filetext);
193
194    if (!(-e $coldir)) {
195   
196
197    my $store_umask = umask(0002);
198    my $mkdir_ok = mkdir ($coldir, 0777);
199    umask($store_umask);
200
201    if (!$mkdir_ok)
202    {
203        die "$!";
204    }
205    }
206
207    opendir(DIR, $modeldir) ||
208    (&gsprintf(STDERR, "{common.cannot_read}\n", $modeldir) && die);
209    my @files = grep(!/^(\.\.?|CVS|\.svn)$/, readdir(DIR));
210    closedir(DIR);
211
212    foreach my $file (@files)
213    {
214    if ($file =~ /^macros$/) {
215       
216        # don't want macros folder for gs3mode
217        next if $gs3mode;
218    }
219    if ($file =~ /^import$/) {
220        # don't want import for group
221        next if $group;
222    }
223   
224    my $thisfile = &util::filename_cat ($modeldir, $file);
225
226    if (-d $thisfile) {
227        my $colfiledir = &util::filename_cat ($coldir, $file);
228        traverse_dir ($thisfile, $colfiledir);
229
230    } else {
231
232        next if ($file =~ /~$/);
233
234        my $destfile = $file;
235        $destfile =~ s/^modelcol/$collection/;
236        $destfile =~ s/^MODELCOL/$capcollection/;
237
238        # There are three configuration files in modelcol directory:
239        # collect.cfg, group.cfg and collectionConfig.xml.
240        # If it is gs2, copy relevant collect.cfg or group.cfg file; if gs3, copy collectionConfig.xml.
241       
242        if ($file =~ /^collect\.cfg$/) {
243        next if ($gs3mode || $group);
244        }
245        elsif ($file =~ /^group\.cfg$/) {
246        next unless $group;
247        $destfile =~ s/group\.cfg/collect\.cfg/;
248        }
249        elsif ($file =~ /^collectionConfig\.xml$/) {
250        next unless $gs3mode;
251        }
252       
253        &gsprintf(STDOUT, "{mkcol.doing_replacements}\n", $destfile)
254        unless $quiet;
255        $destfile = &util::filename_cat ($coldir, $destfile);
256
257        open (INFILE, $thisfile) ||
258        (&gsprintf(STDERR, "{common.cannot_read_file}\n", $thisfile) && die);
259        open (OUTFILE, ">$destfile") ||
260        (&gsprintf(STDERR, "{common.cannot_create_file}\n", $destfile) && die);
261
262        while (defined (my $line = <INFILE>)) {
263        $line =~ s/\*\*collection\*\*/$collection_tail/g;
264        $line =~ s/\*\*COLLECTION\*\*/$capcollection_tail/g;
265        $line =~ s/\*\*creator\*\*/$creator/g;
266        $line =~ s/\*\*maintainer\*\*/$maintainer/g;
267        $line =~ s/\*\*public\*\*/$public/g;
268        $line =~ s/\*\*title\*\*/$title/g;
269        $line =~ s/\*\*about\*\*/$about/g;
270        $line =~ s/\*\*buildtype\*\*/$buildtype/g;
271        $line =~ s/\*\*infodbtype\*\*/$infodbtype/g;
272        if (!$gs3mode) {
273           $line =~ s/\*\*plugins\*\*/$pluginstring/g;
274         }
275
276        print OUTFILE $line;
277        }
278       
279        close (OUTFILE);
280        close (INFILE);
281    }
282    }
283}
284
285
286sub main {
287   
288    my $xml = 0;
289   
290
291    my $hashParsingResult = {};
292    my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$hashParsingResult,"allow_extra_options");
293   
294    # If parse returns -1 then something has gone wrong
295    if ($intArgLeftinAfterParsing == -1)
296    {
297    &PrintUsage::print_txt_usage($options, "{mkcol.params}");
298    die "\n";
299    }
300   
301    foreach my $strVariable (keys %$hashParsingResult)
302    {
303    eval "\$$strVariable = \$hashParsingResult->{\"\$strVariable\"}";
304    }
305
306    # If $language has been specified, load the appropriate resource bundle
307    # (Otherwise, the default resource bundle will be loaded automatically)
308    if ($language && $language =~ /\S/) {
309    &gsprintf::load_language_specific_resource_bundle($language);
310    }
311
312    if ($xml) {
313    &PrintUsage::print_xml_usage($options);
314    print "\n";
315    return;
316    }
317
318    if ($gli) { # the gli wants strings to be in UTF-8
319    &gsprintf::output_strings_in_UTF8;
320    }
321
322    # now check that we had exactly one leftover arg, which should be
323    # the collection name. We don't want to do this earlier, cos
324    # -xml arg doesn't need a collection name
325    # Or if the user specified -h, then we output the usage also
326    if ($intArgLeftinAfterParsing != 1 || (@ARGV && $ARGV[0] =~ /^\-+h/))
327    {
328    &PrintUsage::print_txt_usage($options, "{mkcol.params}");
329    die "\n";
330    }
331
332    if ($optionfile =~ /\w/) {
333    open (OPTIONS, $optionfile) ||
334        (&gsprintf(STDERR, "{common.cannot_open}\n", $optionfile) && die);
335    my $line = [];
336    my $options = [];
337    while (defined ($line = &cfgread::read_cfg_line ('mkcol::OPTIONS'))) {
338        push (@$options, @$line);
339    }
340    close OPTIONS;
341    my $optionsParsingResult = {};
342    if (parse2::parse($options,$arguments,$optionsParsingResult) == -1) {
343        &PrintUsage::print_txt_usage($options, "{mkcol.params}");
344        die "\n";
345    }
346       
347    foreach my $strVariable (keys %$optionsParsingResult)
348    {
349        eval "\$$strVariable = \$optionsParsingResult->{\"\$strVariable\"}";
350    }
351    }
352   
353    # load default plugins if none were on command line
354    if (!scalar(@plugin)) {
355    my $pdfplugin = ($gs3mode || $site) ? "PDFv2Plugin" : "PDFv1Plugin";
356    @plugin = (ZIPPlugin,GreenstoneXMLPlugin,TextPlugin,HTMLPlugin,EmailPlugin,
357           $pdfplugin,RTFPlugin,WordPlugin,PostScriptPlugin,PowerPointPlugin,ExcelPlugin,ImagePlugin,ISISPlugin,NulPlugin,EmbeddedMetadataPlugin,MetadataXMLPlugin,ArchivesInfPlugin,DirectoryPlugin);
358    }
359
360    # get and check the collection name
361    ($collection) = @ARGV;
362
363    # get capitalised version of the collection
364    $capcollection = $collection;
365    $capcollection =~ tr/a-z/A-Z/;
366
367    $collection_tail = &util::get_dirsep_tail($collection);
368    $capcollection_tail = &util::get_dirsep_tail($capcollection);
369
370
371    if (!defined($collection)) {
372    &gsprintf(STDOUT, "{mkcol.no_colname}\n");
373    &PrintUsage::print_txt_usage($options, "{mkcol.params}");
374    die "\n";
375    }
376
377    if (($win31compat eq "true") && (length($collection_tail)) > 8) {
378    &gsprintf(STDOUT, "{mkcol.long_colname}\n");
379    die "\n";
380    }
381
382    if ($collection eq "modelcol") {
383    &gsprintf(STDOUT, "{mkcol.bad_name_modelcol}\n");
384    die "\n";
385    }
386
387    if ($collection_tail eq "CVS") {
388    &gsprintf(STDOUT, "{mkcol.bad_name_cvs}\n");
389    die "\n";
390    }
391
392    if ($collection_tail eq ".svn") {
393    &gsprintf(STDOUT, "{mkcol.bad_name_svn}\n");
394    die "\n";
395    }
396
397    if (defined($creator) && (!defined($maintainer) || $maintainer eq "")) {
398    $maintainer = $creator;
399    }
400
401    $public = "true" unless defined $public;
402
403    if (!defined($title) || $title eq "") {
404    $title = $collection_tail;
405    }
406
407    if ($gs3mode && $group) {
408    &gsprintf(STDERR,"{mkcol.group_not_valid_in_gs3}\n");
409    die "\n";
410    }
411
412    # get the strings to include.
413    $pluginstring = "";
414    foreach my $plug (@plugin) {
415    $pluginstring .= "plugin         $plug\n";
416    }
417
418    if ($gs3mode) {
419    if (!defined $site) {
420        print STDERR "Warning: -gs3mode is deprecated.\n";
421        print STDERR "Use -site <name> instead to create a Greenstone 3 collection\n";
422    }
423    }
424    else {
425    # gs3mode not set
426    if (defined $site && $site =~ /\w/) {
427        # Using -site, so -gs3mode implicitly is true
428        $gs3mode = 1;
429    }
430    }
431
432    my $mdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", "modelcol");
433    my $cdir;
434    if (defined $collectdir && $collectdir =~ /\w/) {
435    if (!-d $collectdir) {
436        &gsprintf(STDOUT, "{mkcol.no_collectdir}\n", $collectdir);
437        die "\n";
438    }
439    $cdir = &util::filename_cat ($collectdir, $collection);
440    } else {
441      if (!$gs3mode) {
442    $cdir = &util::filename_cat ($ENV{'GSDLHOME'}, "collect", $collection);
443      }else {
444      if (defined $site && $site =~ /\w/) {
445          die "GSDL3HOME not set\n" unless defined $ENV{'GSDL3HOME'};
446
447          $cdir  = &util::filename_cat($ENV{'GSDL3HOME'}, "sites", $site, "collect");
448          if (!-d $cdir) {
449          &gsprintf(STDOUT, "{mkcol.no_collectdir}\n", $cdir);
450          die "\n";
451          }
452          $cdir = &util::filename_cat ($cdir, $collection);
453      } else {
454        &gsprintf(STDOUT, "{mkcol.no_collectdir_specified}\n");
455        die "\n";
456    }
457      }
458    }
459
460    # make sure the model collection exists
461    (&gsprintf(STDERR, "{mkcol.cannot_find_modelcol}\n", $mdir) && die) unless (-d $mdir);
462
463    # make sure this collection does not already exist
464    if (-e $cdir) {
465    &gsprintf(STDOUT, "{mkcol.col_already_exists}\n");
466    die "\n";
467    }
468
469    # start creating the collection
470    &gsprintf(STDOUT, "\n{mkcol.creating_col}...\n", $collection)
471    unless $quiet;
472
473    &traverse_dir ($mdir, $cdir);
474    &gsprintf(STDOUT, "\n{mkcol.success}\n", $cdir)
475    unless $quiet;
476}
477
478
Note: See TracBrowser for help on using the browser.