root/trunk/gsdl/bin/script/import.pl @ 946

Revision 946, 6.0 KB (checked in by sjboddie, 20 years ago)

Fixed bug - classify directory was not included in @INC

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/local/bin/perl5 -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31BEGIN {
32    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
34    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
35    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
37}
38
39use strict;
40use arcinfo;
41use colcfg;
42use plugin;
43use docprint;
44use util;
45use parsargv;
46
47sub print_usage {
48    print STDERR "\n  usage: $0 [options] collection-name\n\n";
49    print STDERR "  options:\n";
50    print STDERR "   -verbosity number      0=none, 3=lots\n";
51    print STDERR "   -importdir directory   Where the original material lives\n";
52    print STDERR "   -archivedir directory  Where the converted material ends up\n";
53    print STDERR "   -keepold               Will not destroy the current contents of the\n";
54    print STDERR "                          archives directory (the default)\n";
55    print STDERR "   -removeold             Will remove the old contents of the archives\n";
56    print STDERR "                          directory -- use with care\n";
57    print STDERR "   -gzip                  Use gzip to compress resulting gml documents\n";
58    print STDERR "   -maxdocs number        Maximum number of documents to import\n";
59    print STDERR "   -groupsize number      Number of GML documents to group into one file\n";
60    print STDERR "   -debug                 Print imported text to STDOUT\n\n";
61}
62
63
64&main ();
65
66sub main {
67    my ($verbosity, $importdir, $archivedir, $keepold,
68    $removeold, $gzip, $groupsize, $debug, $maxdocs, $collection,
69    $configfilename, $collectcfg, $pluginfo,
70    $archive_info_filename, $archive_info, $processor);
71    if (!parsargv::parse(\@ARGV,
72             'verbosity/\d+/2', \$verbosity,
73             'importdir/.*/', \$importdir,
74             'archivedir/.*/', \$archivedir,
75             'keepold', \$keepold,
76             'removeold', \$removeold,
77             'gzip', \$gzip,
78             'groupsize/\d+/1', \$groupsize,
79             'debug', \$debug,
80             'maxdocs/^\-?\d+/-1', \$maxdocs)) {
81    &print_usage();
82    die "\n";
83    }
84
85    # set removeold to false if it has been defined
86    $removeold = 0 if ($keepold);
87
88    # get and check the collection name
89    if (($collection = &util::use_collection(@ARGV)) eq "") {
90    &print_usage();
91    die "\n";
92    }
93
94    # dynamically load 'docsave' module so it can pick up on a collection
95    # specific docsave.pm is specified.
96
97    unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
98    require docsave;
99
100
101    # get the list of plugins for this collection
102    my $plugins = [];
103    $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc/collect.cfg");
104    if (-e $configfilename) {
105    $collectcfg = &colcfg::read_collect_cfg ($configfilename);
106    if (defined $collectcfg->{'plugin'}) {
107        $plugins = $collectcfg->{'plugin'};
108    }
109    if (defined $collectcfg->{'importdir'} && $importdir eq "") {
110        $importdir = $collectcfg->{'importdir'};
111    }
112    if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
113        $archivedir = $collectcfg->{'archivedir'};
114    }
115    if (defined $collectcfg->{'removeold'}) {
116        if ($collectcfg->{'removeold'} =~ /^true$/i && !$keepold) {
117        $removeold = 1;
118        }
119        if ($collectcfg->{'removeold'} =~ /^false$/i && !$removeold) {
120        $removeold = 0;
121        }
122    }
123    } else {
124    die "Couldn't find the configuration file $configfilename\n";
125    }
126   
127    # fill in the default import and archives directories if none
128    # were supplied, turn all \ into / and remove trailing /
129    $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq "";
130    $importdir =~ s/[\\\/]+/\//g;
131    $importdir =~ s/\/$//;
132    $archivedir = "$ENV{'GSDLCOLLECTDIR'}/archives" if $archivedir eq "";
133    $archivedir =~ s/[\\\/]+/\//g;
134    $archivedir =~ s/\/$//;
135
136    # load all the plugins
137    $pluginfo = &plugin::load_plugins ($plugins);
138    if (scalar(@$pluginfo) == 0) {
139    print STDERR "No plugins were loaded.\n";
140    die "\n";
141    }
142   
143    # remove the old contents of the archives directory if needed
144    if ($removeold && -e $archivedir) {
145    print STDERR "Warning - removing current contents of the archives directory\n";
146    print STDERR "          in preparation for the import\n";
147    sleep(5); # just in case...
148    &util::rm_r ($archivedir);
149    }
150   
151    # read the archive information file
152    if (!$debug) {
153    $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
154    $archive_info = new arcinfo ();
155    $archive_info->load_info ($archive_info_filename);
156
157    # create a docsave object to process the documents
158    $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize);
159    $processor->setarchivedir ($archivedir);
160    } else {
161    $processor = new docprint ();
162    }
163
164    &plugin::begin($pluginfo, $importdir, $processor, $maxdocs);
165
166    # process the import directory
167    &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs);
168   
169    &plugin::end($pluginfo);
170   
171    # write out the archive information file
172    if (!$debug) {
173    $processor->close_file_output();
174    $archive_info->save_info($archive_info_filename);
175    }
176}
177
178
179
180
181
Note: See TracBrowser for help on using the browser.