root/main/trunk/greenstone2/bin/script/import.pl @ 23372

Revision 23372, 8.2 KB (checked in by davidb, 9 years ago)

debuggin support for handling filenames that go above ASCII values

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41    if (defined $ENV{'GSDLEXTS'}) {
42    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43    foreach my $e (@extensions) {
44        my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46        unshift (@INC, "$ext_prefix/perllib");
47        unshift (@INC, "$ext_prefix/perllib/cpan");
48        unshift (@INC, "$ext_prefix/perllib/plugins");
49        unshift (@INC, "$ext_prefix/perllib/plugouts");
50    }
51    }
52    if (defined $ENV{'GSDL3EXTS'}) {
53    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54    foreach my $e (@extensions) {
55        my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57        unshift (@INC, "$ext_prefix/perllib");
58        unshift (@INC, "$ext_prefix/perllib/cpan");
59        unshift (@INC, "$ext_prefix/perllib/plugins");
60        unshift (@INC, "$ext_prefix/perllib/plugouts");
61    }
62    }
63
64    if ((defined $ENV{'DEBUG_UNICODE'}) && (defined $ENV{'DEBUG_UNICODE'})) {
65    binmode(STDERR,":utf8");
66    }
67}
68
69use strict;
70use inexport;
71
72my $oidtype_list =
73    [ { 'name' => "hash",
74        'desc' => "{import.OIDtype.hash}" },
75      { 'name' => "assigned",
76        'desc' => "{import.OIDtype.assigned}" },
77      { 'name' => "incremental",
78        'desc' => "{import.OIDtype.incremental}" },
79      { 'name' => "dirname",
80        'desc' => "{import.OIDtype.dirname}" } ];
81
82
83# used to control output file format
84my $saveas_list =
85    [ { 'name' => "GreenstoneXML",
86        'desc' => "{export.saveas.GreenstoneXML}"},
87      { 'name' => "GreenstoneMETS",
88        'desc' => "{export.saveas.GreenstoneMETS}"},
89      ];
90
91
92# Possible attributes for each argument
93# name: The name of the argument
94# desc: A description (or more likely a reference to a description) for this argument
95# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
96# reqd: Is this argument required?
97# hiddengli: Is this argument hidden in GLI?
98# modegli: The lowest detail mode this argument is visible at in GLI
99
100my $saveas_argument
101    = { 'name' => "saveas",
102    'desc' => "{import.saveas}",
103    'type' => "enum",
104    'list' => $saveas_list,
105    'deft' => "GreenstoneXML",
106    'reqd' => "no",
107    'modegli' => "3" };
108
109
110my $arguments =
111    [
112      $saveas_argument,
113      { 'name' => "archivedir",
114    'desc' => "{import.archivedir}",
115    'type' => "string",
116    'reqd' => "no",
117        'hiddengli' => "yes" },
118      { 'name' => "importdir",
119    'desc' => "{import.importdir}",
120    'type' => "string",
121    'reqd' => "no",
122        'hiddengli' => "yes" },
123      { 'name' => "collectdir",
124    'desc' => "{import.collectdir}",
125    'type' => "string",
126    # parsearg left "" as default
127    #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
128    'deft' => "",
129    'reqd' => "no",
130        'hiddengli' => "yes" },
131      { 'name' => "site",
132    'desc' => "{import.site}",
133    'type' => "string",
134    'deft' => "",
135    'reqd' => "no",
136        'hiddengli' => "yes" },
137      { 'name' => "manifest",
138    'desc' => "{import.manifest}",
139    'type' => "string",
140    'deft' => "",
141    'reqd' => "no",
142        'hiddengli' => "yes" },
143      { 'name' => "debug",
144    'desc' => "{import.debug}",
145    'type' => "flag",
146    'reqd' => "no",
147        'hiddengli' => "yes" },
148      { 'name' => "faillog",
149    'desc' => "{import.faillog}",
150    'type' => "string",
151    # parsearg left "" as default
152    #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
153    'deft' => "",
154    'reqd' => "no",
155        'modegli' => "3" },
156      { 'name' => "incremental",
157    'desc' => "{import.incremental}",
158    'type' => "flag",
159    'hiddengli' => "yes" },
160      { 'name' => "keepold",
161    'desc' => "{import.keepold}",
162    'type' => "flag",
163    'reqd' => "no",
164    'hiddengli' => "yes" },
165      { 'name' => "removeold",
166    'desc' => "{import.removeold}",
167    'type' => "flag",
168    'reqd' => "no",
169    'hiddengli' => "yes" },
170      { 'name' => "language",
171    'desc' => "{scripts.language}",
172    'type' => "string",
173    'reqd' => "no",
174    'hiddengli' => "yes" },
175      { 'name' => "maxdocs",
176    'desc' => "{import.maxdocs}",
177    'type' => "int",
178    'reqd' => "no",
179    # parsearg left "" as default
180    #'deft' => "-1",
181    'range' => "1,",
182    'modegli' => "1" },
183      # don't set the default to hash - want to allow this to come from
184      # entry in collect.cfg but want to override it here
185      { 'name' => "OIDtype",
186    'desc' => "{import.OIDtype}",
187    'type' => "enum",
188    'list' => $oidtype_list,
189    # parsearg left "" as default
190    #'deft' => "hash",
191    'reqd' => "no",
192    'modegli' => "2" },
193      { 'name' => "OIDmetadata",
194    'desc' => "{import.OIDmetadata}",
195    'type' => "string",
196     #'type' => "metadata", #doesn't work properly in GLI
197    # parsearg left "" as default
198    #'deft' => "dc.Identifier",
199    'reqd' => "no",
200    'modegli' => "2" },
201      { 'name' => "out",
202    'desc' => "{import.out}",
203    'type' => "string",
204    'deft' => "STDERR",
205    'reqd' => "no",
206        'hiddengli' => "yes" },
207      { 'name' => "sortmeta",
208    'desc' => "{import.sortmeta}",
209    'type' => "string",
210    #'type' => "metadata", #doesn't work properly in GLI
211    'reqd' => "no",
212    'modegli' => "2" },
213      { 'name' => "removeprefix",
214    'desc' => "{BasClas.removeprefix}",
215    'type' => "regexp",
216    'deft' => "",
217    'reqd' => "no",
218    'modegli' => "3" },
219      { 'name' => "removesuffix",
220    'desc' => "{BasClas.removesuffix}",
221    'type' => "regexp",
222    'deft' => "",
223    'reqd' => "no",
224    'modegli' => "3" },
225      { 'name' => "groupsize",
226    'desc' => "{import.groupsize}",
227    'type' => "int",
228    'deft' => "1",
229    'reqd' => "no",
230    'modegli' => "2" },
231      { 'name' => "gzip",
232    'desc' => "{import.gzip}",
233    'type' => "flag",
234    'reqd' => "no",
235    'modegli' => "3" },
236      { 'name' => "statsfile",
237    'desc' => "{import.statsfile}",
238    'type' => "string",
239    'deft' => "STDERR",
240    'reqd' => "no",
241        'hiddengli' => "yes" },
242      { 'name' => "verbosity",
243    'desc' => "{import.verbosity}",
244    'type' => "int",
245    'range' => "0,",
246    # parsearg left "" as default
247    # 'deft' => "2",
248    'reqd' => "no",
249    'modegli' => "3" },
250      { 'name' => "gli",
251    'desc' => "{scripts.gli}",
252    'type' => "flag",
253    'reqd' => "no",
254    'hiddengli' => "yes" },
255      { 'name' => "xml",
256    'desc' => "{scripts.xml}",
257    'type' => "flag",
258    'reqd' => "no",
259    'hiddengli' => "yes" }];
260
261my $options = { 'name' => "import.pl",
262        'desc' => "{import.desc}",
263        'args' => $arguments };
264
265
266
267sub main
268{
269    my $inexport = new inexport("import",\@ARGV,$options);
270   
271    my $collection = $inexport->get_collection();
272
273    if (defined $collection) {
274    my ($config_filename,$collect_cfg)
275        = $inexport->read_collection_cfg($collection,$options);   
276
277    $inexport->set_collection_options($collect_cfg);
278   
279    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
280   
281    $inexport->generate_statistics($pluginfo);
282    }
283}
284
285
286&main();
Note: See TracBrowser for help on using the browser.