source: trunk/gsdl/bin/script/import.pl@ 1452

Last change on this file since 1452 was 1431, checked in by sjboddie, 24 years ago

Made a few minor adjustments to perl building code for use with
collectoraction

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 7.3 KB
RevLine 
[1031]1#!/usr/bin/perl -w
[4]2
[538]3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
[4]29# This program will import a number of files into a particular collection
30
[1424]31package import;
32
[4]33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
[9]36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
[946]38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
[4]39}
40
41use arcinfo;
42use colcfg;
43use plugin;
[783]44use docprint;
[130]45use util;
46use parsargv;
[1424]47use FileHandle;
[4]48
49sub print_usage {
[130]50 print STDERR "\n usage: $0 [options] collection-name\n\n";
51 print STDERR " options:\n";
[169]52 print STDERR " -verbosity number 0=none, 3=lots\n";
[130]53 print STDERR " -importdir directory Where the original material lives\n";
54 print STDERR " -archivedir directory Where the converted material ends up\n";
55 print STDERR " -keepold Will not destroy the current contents of the\n";
56 print STDERR " archives directory (the default)\n";
57 print STDERR " -removeold Will remove the old contents of the archives\n";
[314]58 print STDERR " directory -- use with care\n";
[433]59 print STDERR " -gzip Use gzip to compress resulting gml documents\n";
[1269]60 print STDERR " (don't forget to include ZIPPlug in your plugin\n";
61 print STDERR " list when building from compressed documents)\n";
[843]62 print STDERR " -maxdocs number Maximum number of documents to import\n";
63 print STDERR " -groupsize number Number of GML documents to group into one file\n";
[1287]64 print STDERR " -sortmeta metadata Sort documents alphabetically by metadata for\n";
65 print STDERR " building. This will be disabled if groupsize > 1\n";
[1424]66 print STDERR " -debug Print imported text to STDOUT\n";
67 print STDERR " -out Filename or handle to print output status to.\n";
68 print STDERR " The default is STDERR\n\n";
[4]69}
70
[1424]71&main();
[4]72
73sub main {
[783]74 my ($verbosity, $importdir, $archivedir, $keepold,
[848]75 $removeold, $gzip, $groupsize, $debug, $maxdocs, $collection,
[1287]76 $configfilename, $collectcfg, $pluginfo, $sortmeta,
[1424]77 $archive_info_filename, $archive_info, $processor, $out);
[130]78 if (!parsargv::parse(\@ARGV,
[169]79 'verbosity/\d+/2', \$verbosity,
[130]80 'importdir/.*/', \$importdir,
81 'archivedir/.*/', \$archivedir,
82 'keepold', \$keepold,
[314]83 'removeold', \$removeold,
[433]84 'gzip', \$gzip,
[843]85 'groupsize/\d+/1', \$groupsize,
[1287]86 'sortmeta/.*/', \$sortmeta,
[783]87 'debug', \$debug,
[1424]88 'maxdocs/^\-?\d+/-1', \$maxdocs,
89 'out/.*/STDERR', \$out)) {
[4]90 &print_usage();
91 die "\n";
92 }
[130]93
[1424]94 my $close_out = 0;
95 if ($out !~ /^(STDERR|STDOUT)$/i) {
[1431]96 open (OUT, ">$out") || die "Couldn't open output file $out\n";
[1424]97 $out = 'import::OUT';
98 $close_out = 1;
99 }
100 $out->autoflush(1);
101
[130]102 # set removeold to false if it has been defined
103 $removeold = 0 if ($keepold);
104
105 # get and check the collection name
106 if (($collection = &util::use_collection(@ARGV)) eq "") {
107 &print_usage();
[4]108 die "\n";
109 }
[130]110
[1287]111 # check sortmeta
112 $sortmeta = undef unless defined $sortmeta && $sortmeta =~ /\S/;
113 if (defined $sortmeta && $groupsize > 1) {
[1424]114 print $out "WARNING: import.pl cannot sort documents when groupsize > 1\n";
115 print $out " sortmeta option will be ignored\n\n";
[1287]116 $sortmeta = undef;
117 }
118
[843]119 # dynamically load 'docsave' module so it can pick up on a collection
120 # specific docsave.pm is specified.
121
122 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
123 require docsave;
124
125
[4]126 # get the list of plugins for this collection
[814]127 my $plugins = [];
[130]128 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc/collect.cfg");
129 if (-e $configfilename) {
130 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
[814]131 if (defined $collectcfg->{'plugin'}) {
132 $plugins = $collectcfg->{'plugin'};
[4]133 }
[130]134 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
135 $importdir = $collectcfg->{'importdir'};
136 }
137 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
138 $archivedir = $collectcfg->{'archivedir'};
139 }
140 if (defined $collectcfg->{'removeold'}) {
141 if ($collectcfg->{'removeold'} =~ /^true$/i && !$keepold) {
142 $removeold = 1;
143 }
144 if ($collectcfg->{'removeold'} =~ /^false$/i && !$removeold) {
145 $removeold = 0;
146 }
147 }
[98]148 } else {
[130]149 die "Couldn't find the configuration file $configfilename\n";
[4]150 }
151
[130]152 # fill in the default import and archives directories if none
153 # were supplied, turn all \ into / and remove trailing /
154 $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq "";
155 $importdir =~ s/[\\\/]+/\//g;
156 $importdir =~ s/\/$//;
157 $archivedir = "$ENV{'GSDLCOLLECTDIR'}/archives" if $archivedir eq "";
158 $archivedir =~ s/[\\\/]+/\//g;
159 $archivedir =~ s/\/$//;
[4]160
161 # load all the plugins
[1424]162 $pluginfo = &plugin::load_plugins ($plugins, $verbosity, $out);
[4]163 if (scalar(@$pluginfo) == 0) {
[1424]164 print $out "No plugins were loaded.\n";
[4]165 die "\n";
166 }
[843]167
[130]168 # remove the old contents of the archives directory if needed
169 if ($removeold && -e $archivedir) {
[1424]170 print $out "Warning - removing current contents of the archives directory\n";
171 print $out " in preparation for the import\n";
[130]172 sleep(5); # just in case...
173 &util::rm_r ($archivedir);
174 }
[843]175
[4]176 # read the archive information file
[783]177 if (!$debug) {
178 $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
179 $archive_info = new arcinfo ();
180 $archive_info->load_info ($archive_info_filename);
[4]181
[783]182 # create a docsave object to process the documents
[1424]183 $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize, $out);
[783]184 $processor->setarchivedir ($archivedir);
[1287]185 $processor->set_sortmeta ($sortmeta) if defined $sortmeta;
[783]186 } else {
187 $processor = new docprint ();
188 }
[4]189
[843]190 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs);
191
[4]192 # process the import directory
[843]193 &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs);
[4]194
[843]195 &plugin::end($pluginfo);
[896]196
[4]197 # write out the archive information file
[783]198 if (!$debug) {
[1287]199 $processor->close_file_output() if $groupsize > 1;
[783]200 $archive_info->save_info($archive_info_filename);
201 }
[1424]202 close OUT if $close_out;
[4]203}
Note: See TracBrowser for help on using the repository browser.