root/main/trunk/greenstone2/bin/script/import.pl @ 22421

Revision 22421, 8.0 KB (checked in by davidb, 10 years ago)

Continued work on refactoring code to have better shared support for import.pl and export.pl

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
29# This program will import a number of files into a particular collection
30
31package import;
32
33BEGIN {
34    die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
35    die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
36    unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
37    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan");
38    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
39    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugouts");
40
41    if (defined $ENV{'GSDLEXTS'}) {
42    my @extensions = split(/:/,$ENV{'GSDLEXTS'});
43    foreach my $e (@extensions) {
44        my $ext_prefix = "$ENV{'GSDLHOME'}/ext/$e";
45
46        unshift (@INC, "$ext_prefix/perllib");
47        unshift (@INC, "$ext_prefix/perllib/cpan");
48        unshift (@INC, "$ext_prefix/perllib/plugins");
49        unshift (@INC, "$ext_prefix/perllib/plugouts");
50    }
51    }
52    if (defined $ENV{'GSDL3EXTS'}) {
53    my @extensions = split(/:/,$ENV{'GSDL3EXTS'});
54    foreach my $e (@extensions) {
55        my $ext_prefix = "$ENV{'GSDL3SRCHOME'}/ext/$e";
56
57        unshift (@INC, "$ext_prefix/perllib");
58        unshift (@INC, "$ext_prefix/perllib/cpan");
59        unshift (@INC, "$ext_prefix/perllib/plugins");
60        unshift (@INC, "$ext_prefix/perllib/plugouts");
61    }
62    }
63}
64
65use strict;
66use inexport;
67
68my $oidtype_list =
69    [ { 'name' => "hash",
70        'desc' => "{import.OIDtype.hash}" },
71      { 'name' => "assigned",
72        'desc' => "{import.OIDtype.assigned}" },
73      { 'name' => "incremental",
74        'desc' => "{import.OIDtype.incremental}" },
75      { 'name' => "dirname",
76        'desc' => "{import.OIDtype.dirname}" } ];
77
78
79# used to control output file format
80my $saveas_list =
81    [ { 'name' => "GreenstoneXML",
82        'desc' => "{export.saveas.GreenstoneXML}"},
83      { 'name' => "GreenstoneMETS",
84        'desc' => "{export.saveas.GreenstoneMETS}"},
85      ];
86
87
88# Possible attributes for each argument
89# name: The name of the argument
90# desc: A description (or more likely a reference to a description) for this argument
91# type: The type of control used to represent the argument. Options include: string, int, flag, regexp, metadata, language, enum etc
92# reqd: Is this argument required?
93# hiddengli: Is this argument hidden in GLI?
94# modegli: The lowest detail mode this argument is visible at in GLI
95
96my $saveas_argument
97    = { 'name' => "saveas",
98    'desc' => "{import.saveas}",
99    'type' => "enum",
100    'list' => $saveas_list,
101    'deft' => "GreenstoneXML",
102    'reqd' => "no",
103    'modegli' => "3" };
104
105
106my $arguments =
107    [
108      $saveas_argument,
109      { 'name' => "archivedir",
110    'desc' => "{import.archivedir}",
111    'type' => "string",
112    'reqd' => "no",
113        'hiddengli' => "yes" },
114      { 'name' => "importdir",
115    'desc' => "{import.importdir}",
116    'type' => "string",
117    'reqd' => "no",
118        'hiddengli' => "yes" },
119      { 'name' => "collectdir",
120    'desc' => "{import.collectdir}",
121    'type' => "string",
122    # parsearg left "" as default
123    #'deft' => &util::filename_cat ($ENV{'GSDLHOME'}, "collect"),
124    'deft' => "",
125    'reqd' => "no",
126        'hiddengli' => "yes" },
127      { 'name' => "site",
128    'desc' => "{import.site}",
129    'type' => "string",
130    'deft' => "",
131    'reqd' => "no",
132        'hiddengli' => "yes" },
133      { 'name' => "manifest",
134    'desc' => "{import.manifest}",
135    'type' => "string",
136    'deft' => "",
137    'reqd' => "no",
138        'hiddengli' => "yes" },
139      { 'name' => "debug",
140    'desc' => "{import.debug}",
141    'type' => "flag",
142    'reqd' => "no",
143        'hiddengli' => "yes" },
144      { 'name' => "faillog",
145    'desc' => "{import.faillog}",
146    'type' => "string",
147    # parsearg left "" as default
148    #'deft' => &util::filename_cat("<collectdir>", "colname", "etc", "fail.log"),
149    'deft' => "",
150    'reqd' => "no",
151        'modegli' => "3" },
152      { 'name' => "incremental",
153    'desc' => "{import.incremental}",
154    'type' => "flag",
155    'hiddengli' => "yes" },
156      { 'name' => "keepold",
157    'desc' => "{import.keepold}",
158    'type' => "flag",
159    'reqd' => "no",
160    'hiddengli' => "yes" },
161      { 'name' => "removeold",
162    'desc' => "{import.removeold}",
163    'type' => "flag",
164    'reqd' => "no",
165    'hiddengli' => "yes" },
166      { 'name' => "language",
167    'desc' => "{scripts.language}",
168    'type' => "string",
169    'reqd' => "no",
170    'hiddengli' => "yes" },
171      { 'name' => "maxdocs",
172    'desc' => "{import.maxdocs}",
173    'type' => "int",
174    'reqd' => "no",
175    # parsearg left "" as default
176    #'deft' => "-1",
177    'range' => "1,",
178    'modegli' => "1" },
179      # don't set the default to hash - want to allow this to come from
180      # entry in collect.cfg but want to override it here
181      { 'name' => "OIDtype",
182    'desc' => "{import.OIDtype}",
183    'type' => "enum",
184    'list' => $oidtype_list,
185    # parsearg left "" as default
186    #'deft' => "hash",
187    'reqd' => "no",
188    'modegli' => "2" },
189      { 'name' => "OIDmetadata",
190    'desc' => "{import.OIDmetadata}",
191    'type' => "string",
192     #'type' => "metadata", #doesn't work properly in GLI
193    # parsearg left "" as default
194    #'deft' => "dc.Identifier",
195    'reqd' => "no",
196    'modegli' => "2" },
197      { 'name' => "out",
198    'desc' => "{import.out}",
199    'type' => "string",
200    'deft' => "STDERR",
201    'reqd' => "no",
202        'hiddengli' => "yes" },
203      { 'name' => "sortmeta",
204    'desc' => "{import.sortmeta}",
205    'type' => "string",
206    #'type' => "metadata", #doesn't work properly in GLI
207    'reqd' => "no",
208    'modegli' => "2" },
209      { 'name' => "removeprefix",
210    'desc' => "{BasClas.removeprefix}",
211    'type' => "regexp",
212    'deft' => "",
213    'reqd' => "no",
214    'modegli' => "3" },
215      { 'name' => "removesuffix",
216    'desc' => "{BasClas.removesuffix}",
217    'type' => "regexp",
218    'deft' => "",
219    'reqd' => "no",
220    'modegli' => "3" },
221      { 'name' => "groupsize",
222    'desc' => "{import.groupsize}",
223    'type' => "int",
224    'deft' => "1",
225    'reqd' => "no",
226    'modegli' => "2" },
227      { 'name' => "gzip",
228    'desc' => "{import.gzip}",
229    'type' => "flag",
230    'reqd' => "no",
231    'modegli' => "3" },
232      { 'name' => "statsfile",
233    'desc' => "{import.statsfile}",
234    'type' => "string",
235    'deft' => "STDERR",
236    'reqd' => "no",
237        'hiddengli' => "yes" },
238      { 'name' => "verbosity",
239    'desc' => "{import.verbosity}",
240    'type' => "int",
241    'range' => "0,",
242    # parsearg left "" as default
243    # 'deft' => "2",
244    'reqd' => "no",
245    'modegli' => "3" },
246      { 'name' => "gli",
247    'desc' => "{scripts.gli}",
248    'type' => "flag",
249    'reqd' => "no",
250    'hiddengli' => "yes" },
251      { 'name' => "xml",
252    'desc' => "{scripts.xml}",
253    'type' => "flag",
254    'reqd' => "no",
255    'hiddengli' => "yes" }];
256
257my $options = { 'name' => "import.pl",
258        'desc' => "{import.desc}",
259        'args' => $arguments };
260
261
262
263sub main
264{
265    my $inexport = new inexport("import",\@ARGV,$options);
266   
267    my $collection = $inexport->get_collection();
268    my ($config_filename,$collect_cfg) = $inexport->read_collection_cfg($collection,$options);   
269    $inexport->set_collection_options($collect_cfg);
270   
271    my $pluginfo = $inexport->process_files($config_filename,$collect_cfg);
272
273    $inexport->generate_statistics($pluginfo);
274}
275
276
277&main();
Note: See TracBrowser for help on using the browser.