source: trunk/gsdl/bin/script/mgppbuildcol.pl@ 946

Last change on this file since 946 was 933, checked in by kjm18, 24 years ago

new collection building program for mgpp

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.4 KB
Line 
1#!/usr/local/bin/perl5 -w
2
3###########################################################################
4#
5# mgppbuildcol.pl -- This program will build a particular collection
6# incorporates mgpp.
7# A component of the Greenstone digital library software
8# from the New Zealand Digital Library Project at the
9# University of Waikato, New Zealand.
10#
11# Copyright (C) 1999 New Zealand Digital Library Project
12#
13# This program is free software; you can redistribute it and/or modify
14# it under the terms of the GNU General Public License as published by
15# the Free Software Foundation; either version 2 of the License, or
16# (at your option) any later version.
17#
18# This program is distributed in the hope that it will be useful,
19# but WITHOUT ANY WARRANTY; without even the implied warranty of
20# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21# GNU General Public License for more details.
22#
23# You should have received a copy of the GNU General Public License
24# along with this program; if not, write to the Free Software
25# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
26#
27###########################################################################
28
29BEGIN {
30 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
31 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
32 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
33 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
34}
35 #$ENV{'PATH'} = "$ENV{'GSDLHOME'}/src/mgpp/text:".$ENV{'PATH'};
36
37use colcfg;
38use parsargv;
39use util;
40
41&main();
42
43sub print_usage {
44 print STDERR "\n usage: $0 [options] collection-name\n\n";
45 print STDERR " options:\n";
46 print STDERR " -verbosity number 0=none, 3=lots\n";
47 print STDERR " -archivedir directory Where the archives live\n";
48 print STDERR " -cachedir directory Where to cache the archives\n";
49 print STDERR " -builddir directory Where to put the built indexes\n";
50 print STDERR " -maxdocs number Maximum number of documents to build\n";
51 print STDERR " -debug Print output to STDOUT\n";
52 print STDERR " -mode all|compress_text|build_index|infodb\n";
53 print STDERR " -index indexname Index to build (will build all in\n";
54 print STDERR " config file if not set)\n";
55 print STDERR " -keepold will not destroy the current contents of the\n";
56 print STDERR " building directory\n";
57 print STDERR " -allclassifications Don't remove empty classifications\n\n";
58}
59
60
61sub main
62{
63 my ($verbosity, $archivedir, $cachedir, $builddir, $maxdocs,
64 $debug, $mode, $indexname, $keepold, $allclassifications);
65 if (!parsargv::parse(\@ARGV,
66 'verbosity/\d+/2', \$verbosity,
67 'archivedir/.*/', \$archivedir,
68 'cachedir/.*/', \$cachedir,
69 'builddir/.*/', \$builddir,
70 'maxdocs/^\-?\d+/-1', \$maxdocs,
71 'debug', \$debug,
72 'mode/^(all|compress_text|build_index|infodb)$/all', \$mode,
73 'index/.*/', \$indexname,
74 'keepold', \$keepold,
75 'allclassifications', \$allclassifications)) {
76 &print_usage();
77 die "\n";
78 }
79
80 # get and check the collection
81 # this sets up GSDLCOLLECTION (eg demo) and GSDLCOLLECTDIR (eg
82 # GSDLHOME/collect/demo )
83 if (($collection = &util::use_collection(@ARGV)) eq "") {
84 &print_usage();
85 die "\n";
86 }
87
88 # read the configuration file
89 $textindex = "text";
90 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc/collect.cfg");
91 if (-e $configfilename) {
92 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
93 if (defined $collectcfg->{'textcompress'}) {
94 $textindex = $collectcfg->{'textcompress'};
95 }
96 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
97 $archivedir = $collectcfg->{'archivedir'};
98 }
99 if (defined $collectcfg->{'cachedir'} && $cachedir eq "") {
100 $cachedir = $collectcfg->{'cachedir'};
101 }
102 if (defined $collectcfg->{'builddir'} && $builddir eq "") {
103 $builddir = $collectcfg->{'builddir'};
104 }
105 } else {
106 die "Couldn't find the configuration file $configfilename\n";
107 }
108
109 # fill in the default archives and building directories if none
110 # were supplied, turn all \ into / and remove trailing /
111 $archivedir = "$ENV{'GSDLCOLLECTDIR'}/archives" if $archivedir eq "";
112 $archivedir =~ s/[\\\/]+/\//g;
113 $archivedir =~ s/\/$//;
114 $builddir = "$ENV{'GSDLCOLLECTDIR'}/building" if $builddir eq "";
115 $builddir =~ s/[\\\/]+/\//g;
116 $builddir =~ s/\/$//;
117
118 # update the archive cache if needed
119 if ($cachedir) {
120 print STDERR "Updating archive cache\n" if ($verbosity >= 1);
121
122 $cachedir =~ s/[\\\/]+$//;
123 $cachedir .= "/collect/$collection" unless
124 $cachedir =~ /collect\/$collection/;
125
126 $realarchivedir = "$cachedir/archives";
127 $realbuilddir = "$cachedir/building";
128 &util::mk_all_dir ($realarchivedir);
129 &util::mk_all_dir ($realbuilddir);
130 &util::cachedir ($archivedir, $realarchivedir, $verbosity);
131
132 } else {
133 $realarchivedir = $archivedir;
134 $realbuilddir = $builddir;
135 }
136
137 # build it in realbuilddir
138 &util::mk_all_dir ($realbuilddir);
139
140
141 # if a builder class has been created for this collection, use it
142 # otherwise, use the mg builder
143 if (-e "$ENV{'GSDLCOLLECTDIR'}/perllib/${collection}builder.pm") {
144 $builderdir = "$ENV{'GSDLCOLLECTDIR'}/perllib";
145 $buildertype = "${collection}builder";
146 } else {
147 $builderdir = "$ENV{'GSDLHOME'}/perllib";
148 $buildertype = "mgppbuilder";
149 }
150
151 require "$builderdir/$buildertype.pm";
152
153 eval("\$builder = new $buildertype(\$collection, " .
154 "\$realarchivedir, \$realbuilddir, \$verbosity, " .
155 "\$maxdocs, \$debug, \$keepold, \$allclassifications)");
156 die "$@" if $@;
157
158 $builder->init();
159
160
161 print STDERR "text index=$textindex, indexname= $indexname\n";
162 if ($mode =~ /^all$/i) {
163# $builder->build_collection($textindex, $indexname);
164 $builder->compress_text($textindex);
165 $builder->build_indexes($indexname);
166 $builder->make_infodatabase();
167 $builder->collect_specific();
168 } elsif ($mode =~ /^compress_text$/i) {
169 $builder->compress_text($textindex);
170 } elsif ($mode =~ /^build_index$/i) {
171 $builder->build_indexes($indexname);
172 } elsif ($mode =~ /^infodb$/i) {
173 $builder->make_infodatabase();
174 } else {
175 die "unknown mode: $mode\n";
176 }
177
178 $builder->make_auxiliary_files() if !$debug;
179 $builder->deinit();
180
181 if (($realbuilddir ne $builddir) && !$debug) {
182 print STDERR "Copying back the cached build\n" if ($verbosity >= 1);
183 &util::rm_r ($builddir);
184 &util::cp_r ($realbuilddir, $builddir);
185 }
186}
187
188
Note: See TracBrowser for help on using the repository browser.