source: main/trunk/greenstone2/bin/script/import.pl@ 24375

Last change on this file since 24375 was 23372, checked in by davidb, 13 years ago

debuggin support for handling filenames that go above ASCII values

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 8.2 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41 if (defined $ENV{'GSDLEXTS'}) {
42 my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43 foreach my $e (@extensions) {
44 my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46 unshift (@INC, "$ext_prefix/perllib");
47 unshift (@INC, "$ext_prefix/perllib/cpan");
48 unshift (@INC, "$ext_prefix/perllib/plugins");
49 unshift (@INC, "$ext_prefix/perllib/plugouts");
50 }
51 }
52 if (defined $ENV{'GSDL3EXTS'}) {
53 my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54 foreach my $e (@extensions) {
55 my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57 unshift (@INC, "$ext_prefix/perllib");
58 unshift (@INC, "$ext_prefix/perllib/cpan");
59 unshift (@INC, "$ext_prefix/perllib/plugins");
60 unshift (@INC, "$ext_prefix/perllib/plugouts");
61 }
62 }
63
64 if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65 binmode(STDERR,":utf8");
66 }
67}
68
69use strict;
70use inexport;
71
72my $oidtype_list =
73 [ { 'name' => "hash",
74 'desc' => "{import.OIDtype.hash}" },
75 { 'name' => "assigned",
76 'desc' => "{import.OIDtype.assigned}" },
77 { 'name' => "incremental",
78 'desc' => "{import.OIDtype.incremental}" },
79 { 'name' => "dirname",
80 'desc' => "{import.OIDtype.dirname}" } ];
81
82
83# used to control output file format
84my $saveas_list =
85 [ { 'name' => "GreenstoneXML",
86 'desc' => "{export.saveas.GreenstoneXML}"},
87 { 'name' => "GreenstoneMETS",
88 'desc' => "{export.saveas.GreenstoneMETS}"},
89 ];
90
91
92# Possible attributes for each argument
93# name: The name of the argument
94# desc: A description (or more likely a reference to a description) for this argument
95# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
96# reqd: Is this argument required?
97# hiddengli: Is this argument hidden in GLI?
98# modegli: The lowest detail mode this argument is visible at in GLI
99
100my $saveas_argument
101 = { 'name' => "saveas",
102 'desc' => "{import.saveas}",
103 'type' => "enum",
104 'list' => $saveas_list,
105 'deft' => "GreenstoneXML",
106 'reqd' => "no",
107 'modegli' => "3" };
108
109
110my $arguments =
111 [
112 $saveas_argument,
113 { 'name' => "archivedir",
114 'desc' => "{import.archivedir}",
115 'type' => "string",
116 'reqd' => "no",
117 'hiddengli' => "yes" },
118 { 'name' => "importdir",
119 'desc' => "{import.importdir}",
120 'type' => "string",
121 'reqd' => "no",
122 'hiddengli' => "yes" },
123 { 'name' => "collectdir",
124 'desc' => "{import.collectdir}",
125 'type' => "string",
126 # parsearg left "" as default
127 #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
128 'deft' => "",
129 'reqd' => "no",
130 'hiddengli' => "yes" },
131 { 'name' => "site",
132 'desc' => "{import.site}",
133 'type' => "string",
134 'deft' => "",
135 'reqd' => "no",
136 'hiddengli' => "yes" },
137 { 'name' => "manifest",
138 'desc' => "{import.manifest}",
139 'type' => "string",
140 'deft' => "",
141 'reqd' => "no",
142 'hiddengli' => "yes" },
143 { 'name' => "debug",
144 'desc' => "{import.debug}",
145 'type' => "flag",
146 'reqd' => "no",
147 'hiddengli' => "yes" },
148 { 'name' => "faillog",
149 'desc' => "{import.faillog}",
150 'type' => "string",
151 # parsearg left "" as default
152 #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
153 'deft' => "",
154 'reqd' => "no",
155 'modegli' => "3" },
156 { 'name' => "incremental",
157 'desc' => "{import.incremental}",
158 'type' => "flag",
159 'hiddengli' => "yes" },
160 { 'name' => "keepold",
161 'desc' => "{import.keepold}",
162 'type' => "flag",
163 'reqd' => "no",
164 'hiddengli' => "yes" },
165 { 'name' => "removeold",
166 'desc' => "{import.removeold}",
167 'type' => "flag",
168 'reqd' => "no",
169 'hiddengli' => "yes" },
170 { 'name' => "language",
171 'desc' => "{scripts.language}",
172 'type' => "string",
173 'reqd' => "no",
174 'hiddengli' => "yes" },
175 { 'name' => "maxdocs",
176 'desc' => "{import.maxdocs}",
177 'type' => "int",
178 'reqd' => "no",
179 # parsearg left "" as default
180 #'deft' => "-1",
181 'range' => "1,",
182 'modegli' => "1" },
183 # don't set the default to hash - want to allow this to come from
184 # entry in collect.cfg but want to override it here
185 { 'name' => "OIDtype",
186 'desc' => "{import.OIDtype}",
187 'type' => "enum",
188 'list' => $oidtype_list,
189 # parsearg left "" as default
190 #'deft' => "hash",
191 'reqd' => "no",
192 'modegli' => "2" },
193 { 'name' => "OIDmetadata",
194 'desc' => "{import.OIDmetadata}",
195 'type' => "string",
196 #'type' => "metadata", #doesn't work properly in GLI
197 # parsearg left "" as default
198 #'deft' => "dc.Identifier",
199 'reqd' => "no",
200 'modegli' => "2" },
201 { 'name' => "out",
202 'desc' => "{import.out}",
203 'type' => "string",
204 'deft' => "STDERR",
205 'reqd' => "no",
206 'hiddengli' => "yes" },
207 { 'name' => "sortmeta",
208 'desc' => "{import.sortmeta}",
209 'type' => "string",
210 #'type' => "metadata", #doesn't work properly in GLI
211 'reqd' => "no",
212 'modegli' => "2" },
213 { 'name' => "removeprefix",
214 'desc' => "{BasClas.removeprefix}",
215 'type' => "regexp",
216 'deft' => "",
217 'reqd' => "no",
218 'modegli' => "3" },
219 { 'name' => "removesuffix",
220 'desc' => "{BasClas.removesuffix}",
221 'type' => "regexp",
222 'deft' => "",
223 'reqd' => "no",
224 'modegli' => "3" },
225 { 'name' => "groupsize",
226 'desc' => "{import.groupsize}",
227 'type' => "int",
228 'deft' => "1",
229 'reqd' => "no",
230 'modegli' => "2" },
231 { 'name' => "gzip",
232 'desc' => "{import.gzip}",
233 'type' => "flag",
234 'reqd' => "no",
235 'modegli' => "3" },
236 { 'name' => "statsfile",
237 'desc' => "{import.statsfile}",
238 'type' => "string",
239 'deft' => "STDERR",
240 'reqd' => "no",
241 'hiddengli' => "yes" },
242 { 'name' => "verbosity",
243 'desc' => "{import.verbosity}",
244 'type' => "int",
245 'range' => "0,",
246 # parsearg left "" as default
247 # 'deft' => "2",
248 'reqd' => "no",
249 'modegli' => "3" },
250 { 'name' => "gli",
251 'desc' => "{scripts.gli}",
252 'type' => "flag",
253 'reqd' => "no",
254 'hiddengli' => "yes" },
255 { 'name' => "xml",
256 'desc' => "{scripts.xml}",
257 'type' => "flag",
258 'reqd' => "no",
259 'hiddengli' => "yes" }];
260
261my $options = { 'name' => "import.pl",
262 'desc' => "{import.desc}",
263 'args' => $arguments };
264
265
266
267sub main
268{
269 my $inexport = new inexport("import",\@ARGV,$options);
270
271 my $collection = $inexport->get_collection();
272
273 if (defined $collection) {
274 my ($config_filename,$collect_cfg)
275 = $inexport->read_collection_cfg($collection,$options);
276
277 $inexport->set_collection_options($collect_cfg);
278
279 my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
280
281 $inexport->generate_statistics($pluginfo);
282 }
283}
284
285
286&main();
Note: See TracBrowser for help on using the repository browser.