source: main/trunk/greenstone2/bin/script/import.pl@ 26536

Last change on this file since 26536 was 26536, checked in by davidb, 11 years ago

Introduction of two new OIDtype values (hash_on_full_filename and full_filename) designed to help provide more stable document IDs for collections that are rebuilt over time, including rebuilt after the Greenstone install has been upgraded

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.3 KB
RevLine 
[14031]1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
[14957]40
41 if (defined $ENV{'GSDLEXTS'}) {
42 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43 foreach my $e (@extensions) {
44 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46 unshift (@INC, "$ext_prefix/perllib");
47 unshift (@INC, "$ext_prefix/perllib/cpan");
[16788]48 unshift (@INC, "$ext_prefix/perllib/plugins");
49 unshift (@INC, "$ext_prefix/perllib/plugouts");
[14957]50 }
51 }
[21291]52 if (defined $ENV{'GSDL3EXTS'}) {
53 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54 foreach my $e (@extensions) {
55 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57 unshift (@INC, "$ext_prefix/perllib");
58 unshift (@INC, "$ext_prefix/perllib/cpan");
59 unshift (@INC, "$ext_prefix/perllib/plugins");
60 unshift (@INC, "$ext_prefix/perllib/plugouts");
61 }
62 }
[23372]63
64 if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65 binmode(STDERR,":utf8");
66 }
[14031]67}
68
[14957]69use strict;
[18456]70use inexport;
[14031]71
72my $oidtype_list =
73 [ { 'name' => "hash",
74 'desc' => "{import.OIDtype.hash}" },
[26536]75 { 'name' => "hash_on_full_filename",
76 'desc' => "{import.OIDtype.hash_on_full_filename}" },
[14031]77 { 'name' => "assigned",
78 'desc' => "{import.OIDtype.assigned}" },
79 { 'name' => "incremental",
80 'desc' => "{import.OIDtype.incremental}" },
81 { 'name' => "dirname",
[26536]82 'desc' => "{import.OIDtype.dirname}" },
83 { 'name' => "full_filename",
84 'desc' => "{import.OIDtype.full_filename}" } ];
[14031]85
[14957]86
87# used to control output file format
[14031]88my $saveas_list =
[17751]89 [ { 'name' => "GreenstoneXML",
90 'desc' => "{export.saveas.GreenstoneXML}"},
[14957]91 { 'name' => "GreenstoneMETS",
92 'desc' => "{export.saveas.GreenstoneMETS}"},
[17038]93 ];
[14031]94
95
96# Possible attributes for each argument
97# name: The name of the argument
98# desc: A description (or more likely a reference to a description) for this argument
99# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
100# reqd: Is this argument required?
101# hiddengli: Is this argument hidden in GLI?
102# modegli: The lowest detail mode this argument is visible at in GLI
103
[14957]104my $saveas_argument
105 = { 'name' => "saveas",
106 'desc' => "{import.saveas}",
107 'type' => "enum",
108 'list' => $saveas_list,
[17751]109 'deft' => "GreenstoneXML",
[14957]110 'reqd' => "no",
111 'modegli' => "3" };
112
113
[14031]114my $arguments =
[14957]115 [
116 $saveas_argument,
117 { 'name' => "archivedir",
[14031]118 'desc' => "{import.archivedir}",
119 'type' => "string",
120 'reqd' => "no",
121 'hiddengli' => "yes" },
[14957]122 { 'name' => "importdir",
123 'desc' => "{import.importdir}",
124 'type' => "string",
125 'reqd' => "no",
126 'hiddengli' => "yes" },
[14031]127 { 'name' => "collectdir",
128 'desc' => "{import.collectdir}",
129 'type' => "string",
130 # parsearg left "" as default
131 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
132 'deft' => "",
133 'reqd' => "no",
134 'hiddengli' => "yes" },
[14925]135 { 'name' => "site",
136 'desc' => "{import.site}",
137 'type' => "string",
138 'deft' => "",
139 'reqd' => "no",
140 'hiddengli' => "yes" },
[14031]141 { 'name' => "manifest",
142 'desc' => "{import.manifest}",
143 'type' => "string",
144 'deft' => "",
145 'reqd' => "no",
146 'hiddengli' => "yes" },
147 { 'name' => "debug",
148 'desc' => "{import.debug}",
149 'type' => "flag",
150 'reqd' => "no",
151 'hiddengli' => "yes" },
152 { 'name' => "faillog",
153 'desc' => "{import.faillog}",
154 'type' => "string",
155 # parsearg left "" as default
156 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
157 'deft' => "",
158 'reqd' => "no",
[18590]159 'modegli' => "3" },
[14031]160 { 'name' => "incremental",
161 'desc' => "{import.incremental}",
162 'type' => "flag",
163 'hiddengli' => "yes" },
164 { 'name' => "keepold",
165 'desc' => "{import.keepold}",
166 'type' => "flag",
167 'reqd' => "no",
168 'hiddengli' => "yes" },
169 { 'name' => "removeold",
170 'desc' => "{import.removeold}",
171 'type' => "flag",
172 'reqd' => "no",
173 'hiddengli' => "yes" },
174 { 'name' => "language",
175 'desc' => "{scripts.language}",
176 'type' => "string",
177 'reqd' => "no",
178 'hiddengli' => "yes" },
179 { 'name' => "maxdocs",
180 'desc' => "{import.maxdocs}",
181 'type' => "int",
182 'reqd' => "no",
183 # parsearg left "" as default
184 #'deft' => "-1",
185 'range' => "1,",
186 'modegli' => "1" },
[17038]187 # don't set the default to hash - want to allow this to come from
188 # entry in collect.cfg but want to override it here
[14031]189 { 'name' => "OIDtype",
190 'desc' => "{import.OIDtype}",
191 'type' => "enum",
192 'list' => $oidtype_list,
193 # parsearg left "" as default
194 #'deft' => "hash",
195 'reqd' => "no",
196 'modegli' => "2" },
197 { 'name' => "OIDmetadata",
198 'desc' => "{import.OIDmetadata}",
[19625]199 'type' => "string",
[20685]200 #'type' => "metadata", #doesn't work properly in GLI
[18528]201 # parsearg left "" as default
202 #'deft' => "dc.Identifier",
[14031]203 'reqd' => "no",
204 'modegli' => "2" },
205 { 'name' => "out",
206 'desc' => "{import.out}",
207 'type' => "string",
208 'deft' => "STDERR",
209 'reqd' => "no",
210 'hiddengli' => "yes" },
211 { 'name' => "sortmeta",
212 'desc' => "{import.sortmeta}",
[19625]213 'type' => "string",
214 #'type' => "metadata", #doesn't work properly in GLI
[14031]215 'reqd' => "no",
[18590]216 'modegli' => "2" },
[14031]217 { 'name' => "removeprefix",
218 'desc' => "{BasClas.removeprefix}",
219 'type' => "regexp",
220 'deft' => "",
221 'reqd' => "no",
222 'modegli' => "3" },
223 { 'name' => "removesuffix",
224 'desc' => "{BasClas.removesuffix}",
225 'type' => "regexp",
226 'deft' => "",
227 'reqd' => "no",
228 'modegli' => "3" },
229 { 'name' => "groupsize",
230 'desc' => "{import.groupsize}",
231 'type' => "int",
232 'deft' => "1",
233 'reqd' => "no",
[18590]234 'modegli' => "2" },
[14031]235 { 'name' => "gzip",
236 'desc' => "{import.gzip}",
237 'type' => "flag",
238 'reqd' => "no",
[18590]239 'modegli' => "3" },
[14031]240 { 'name' => "statsfile",
241 'desc' => "{import.statsfile}",
242 'type' => "string",
243 'deft' => "STDERR",
244 'reqd' => "no",
245 'hiddengli' => "yes" },
246 { 'name' => "verbosity",
247 'desc' => "{import.verbosity}",
248 'type' => "int",
249 'range' => "0,",
250 # parsearg left "" as default
[22421]251 # 'deft' => "2",
[14031]252 'reqd' => "no",
[18590]253 'modegli' => "3" },
[14031]254 { 'name' => "gli",
[17142]255 'desc' => "{scripts.gli}",
[14031]256 'type' => "flag",
257 'reqd' => "no",
258 'hiddengli' => "yes" },
259 { 'name' => "xml",
260 'desc' => "{scripts.xml}",
261 'type' => "flag",
262 'reqd' => "no",
263 'hiddengli' => "yes" }];
264
265my $options = { 'name' => "import.pl",
266 'desc' => "{import.desc}",
267 'args' => $arguments };
268
269
270
[22413]271sub main
272{
[22421]273 my $inexport = new inexport("import",\@ARGV,$options);
[14031]274
[22413]275 my $collection = $inexport->get_collection();
[14031]276
[22459]277 if (defined $collection) {
278 my ($config_filename,$collect_cfg)
279 = $inexport->read_collection_cfg($collection,$options);
280
281 $inexport->set_collection_options($collect_cfg);
282
283 my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
284
285 $inexport->generate_statistics($pluginfo);
286 }
[22413]287}
[14031]288
289
[22413]290&main();
Note: See TracBrowser for help on using the repository browser.