source: branches/New_Config_Format-branch/gsdl/bin/script/import.pl@ 1279

Last change on this file since 1279 was 1279, checked in by sjboddie, 24 years ago

merged changes to trunk into New_Config_Format branch

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 6.2 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
34 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/classify");
37}
38
39use strict;
40use arcinfo;
41use colcfg;
42use plugin;
43use docprint;
44use util;
45use parsargv;
46
47sub print_usage {
48 print STDERR "\n usage: $0 [options] collection-name\n\n";
49 print STDERR " options:\n";
50 print STDERR " -verbosity number 0=none, 3=lots\n";
51 print STDERR " -importdir directory Where the original material lives\n";
52 print STDERR " -archivedir directory Where the converted material ends up\n";
53 print STDERR " -keepold Will not destroy the current contents of the\n";
54 print STDERR " archives directory (the default)\n";
55 print STDERR " -removeold Will remove the old contents of the archives\n";
56 print STDERR " directory -- use with care\n";
57 print STDERR " -gzip Use gzip to compress resulting gml documents\n";
58 print STDERR " (don't forget to include ZIPPlug in your plugin\n";
59 print STDERR " list when building from compressed documents)\n";
60 print STDERR " -maxdocs number Maximum number of documents to import\n";
61 print STDERR " -groupsize number Number of GML documents to group into one file\n";
62 print STDERR " -debug Print imported text to STDOUT\n\n";
63}
64
65
66&main ();
67
68sub main {
69 my ($verbosity, $importdir, $archivedir, $keepold,
70 $removeold, $gzip, $groupsize, $debug, $maxdocs, $collection,
71 $configfilename, $collectcfg, $pluginfo,
72 $archive_info_filename, $archive_info, $processor);
73 if (!parsargv::parse(\@ARGV,
74 'verbosity/\d+/2', \$verbosity,
75 'importdir/.*/', \$importdir,
76 'archivedir/.*/', \$archivedir,
77 'keepold', \$keepold,
78 'removeold', \$removeold,
79 'gzip', \$gzip,
80 'groupsize/\d+/1', \$groupsize,
81 'debug', \$debug,
82 'maxdocs/^\-?\d+/-1', \$maxdocs)) {
83 &print_usage();
84 die "\n";
85 }
86
87 # set removeold to false if it has been defined
88 $removeold = 0 if ($keepold);
89
90 # get and check the collection name
91 if (($collection = &util::use_collection(@ARGV)) eq "") {
92 &print_usage();
93 die "\n";
94 }
95
96 # dynamically load 'docsave' module so it can pick up on a collection
97 # specific docsave.pm is specified.
98
99 unshift (@INC, "$ENV{'GSDLCOLLECTDIR'}/perllib");
100 require docsave;
101
102
103 # get the list of plugins for this collection
104 my $plugins = [];
105 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc/collect.cfg");
106 if (-e $configfilename) {
107 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
108 if (defined $collectcfg->{'plugin'}) {
109 $plugins = $collectcfg->{'plugin'};
110 }
111 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
112 $importdir = $collectcfg->{'importdir'};
113 }
114 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
115 $archivedir = $collectcfg->{'archivedir'};
116 }
117 if (defined $collectcfg->{'removeold'}) {
118 if ($collectcfg->{'removeold'} =~ /^true$/i && !$keepold) {
119 $removeold = 1;
120 }
121 if ($collectcfg->{'removeold'} =~ /^false$/i && !$removeold) {
122 $removeold = 0;
123 }
124 }
125 } else {
126 die "Couldn't find the configuration file $configfilename\n";
127 }
128
129 # fill in the default import and archives directories if none
130 # were supplied, turn all \ into / and remove trailing /
131 $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq "";
132 $importdir =~ s/[\\\/]+/\//g;
133 $importdir =~ s/\/$//;
134 $archivedir = "$ENV{'GSDLCOLLECTDIR'}/archives" if $archivedir eq "";
135 $archivedir =~ s/[\\\/]+/\//g;
136 $archivedir =~ s/\/$//;
137
138 # load all the plugins
139 $pluginfo = &plugin::load_plugins ($plugins, $verbosity);
140 if (scalar(@$pluginfo) == 0) {
141 print STDERR "No plugins were loaded.\n";
142 die "\n";
143 }
144
145 # remove the old contents of the archives directory if needed
146 if ($removeold && -e $archivedir) {
147 print STDERR "Warning - removing current contents of the archives directory\n";
148 print STDERR " in preparation for the import\n";
149 sleep(5); # just in case...
150 &util::rm_r ($archivedir);
151 }
152
153 # read the archive information file
154 if (!$debug) {
155 $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
156 $archive_info = new arcinfo ();
157 $archive_info->load_info ($archive_info_filename);
158
159 # create a docsave object to process the documents
160 $processor = new docsave ($collection, $archive_info, $verbosity, $gzip, $groupsize);
161 $processor->setarchivedir ($archivedir);
162 } else {
163 $processor = new docprint ();
164 }
165
166 &plugin::begin($pluginfo, $importdir, $processor, $maxdocs);
167
168 # process the import directory
169 &plugin::read ($pluginfo, $importdir, "", {}, $processor, $maxdocs);
170
171 &plugin::end($pluginfo);
172
173 # write out the archive information file
174 if (!$debug) {
175 $processor->close_file_output();
176 $archive_info->save_info($archive_info_filename);
177 }
178}
179
180
181
182
183
Note: See TracBrowser for help on using the repository browser.