source: trunk/gsdl/bin/script/import.pl@ 546

Last change on this file since 546 was 538, checked in by sjboddie, 25 years ago

added GPL header

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.1 KB
RevLine 
[4]1#!/usr/local/bin/perl5 -w
2
[538]3###########################################################################
4#
5# import.pl --
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 1999 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28
[4]29# This program will import a number of files into a particular collection
30
31BEGIN {
32 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
33 die "GSDLOS not set\n" unless defined $ENV{'GSDLOS'};
[9]34 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
35 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/plugins");
[4]36}
37
38use arcinfo;
39use colcfg;
40use plugin;
41use docsave;
[130]42use util;
43use parsargv;
[4]44
45sub print_usage {
[130]46 print STDERR "\n usage: $0 [options] collection-name\n\n";
47 print STDERR " options:\n";
[169]48 print STDERR " -verbosity number 0=none, 3=lots\n";
[130]49 print STDERR " -importdir directory Where the original material lives\n";
50 print STDERR " -archivedir directory Where the converted material ends up\n";
51 print STDERR " -keepold Will not destroy the current contents of the\n";
52 print STDERR " archives directory (the default)\n";
53 print STDERR " -removeold Will remove the old contents of the archives\n";
[314]54 print STDERR " directory -- use with care\n";
[433]55 print STDERR " -gzip Use gzip to compress resulting gml documents\n";
[314]56 print STDERR " -maxdocs number Maximum number of documents to import\n\n";
[4]57}
58
59
60&main ();
61
62sub main {
[130]63 if (!parsargv::parse(\@ARGV,
[169]64 'verbosity/\d+/2', \$verbosity,
[130]65 'importdir/.*/', \$importdir,
66 'archivedir/.*/', \$archivedir,
67 'keepold', \$keepold,
[314]68 'removeold', \$removeold,
[433]69 'gzip', \$gzip,
[314]70 'maxdocs/\d+/', \$maxdocs)) {
[4]71 &print_usage();
72 die "\n";
73 }
[130]74
75 # set removeold to false if it has been defined
76 $removeold = 0 if ($keepold);
77
78 # get and check the collection name
79 if (($collection = &util::use_collection(@ARGV)) eq "") {
80 &print_usage();
[4]81 die "\n";
82 }
[130]83
[4]84 # get the list of plugins for this collection
[130]85 @plugins = ();
86 $configfilename = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "etc/collect.cfg");
87 if (-e $configfilename) {
88 $collectcfg = &colcfg::read_collect_cfg ($configfilename);
[4]89 if (defined $collectcfg->{'plugins'}) {
90 @plugins = @{$collectcfg->{'plugins'}};
91 }
[130]92 if (defined $collectcfg->{'importdir'} && $importdir eq "") {
93 $importdir = $collectcfg->{'importdir'};
94 }
95 if (defined $collectcfg->{'archivedir'} && $archivedir eq "") {
96 $archivedir = $collectcfg->{'archivedir'};
97 }
98 if (defined $collectcfg->{'removeold'}) {
99 if ($collectcfg->{'removeold'} =~ /^true$/i && !$keepold) {
100 $removeold = 1;
101 }
102 if ($collectcfg->{'removeold'} =~ /^false$/i && !$removeold) {
103 $removeold = 0;
104 }
105 }
[98]106 } else {
[130]107 die "Couldn't find the configuration file $configfilename\n";
[4]108 }
109
[130]110 # fill in the default import and archives directories if none
111 # were supplied, turn all \ into / and remove trailing /
112 $importdir = "$ENV{'GSDLCOLLECTDIR'}/import" if $importdir eq "";
113 $importdir =~ s/[\\\/]+/\//g;
114 $importdir =~ s/\/$//;
115 $archivedir = "$ENV{'GSDLCOLLECTDIR'}/archives" if $archivedir eq "";
116 $archivedir =~ s/[\\\/]+/\//g;
117 $archivedir =~ s/\/$//;
[4]118
119 # load all the plugins
120 $pluginfo = &plugin::load_plugins ($collection, \@plugins);
121 if (scalar(@$pluginfo) == 0) {
122 print STDERR "No plugins were loaded.\n";
123 die "\n";
124 }
125
[130]126 # remove the old contents of the archives directory if needed
127 if ($removeold && -e $archivedir) {
128 print STDERR "Warning - removing current contents of the archives directory\n";
129 print STDERR " in preparation for the import\n";
130 sleep(5); # just in case...
131 &util::rm_r ($archivedir);
132 }
133
[4]134 # read the archive information file
[130]135 $archive_info_filename = &util::filename_cat ($archivedir, "archives.inf");
[4]136 $archive_info = new arcinfo ();
137 $archive_info->load_info ($archive_info_filename);
138
139 # create a docsave object to process the documents
[433]140 $processor = new docsave ($collection, $archive_info, $verbosity, $gzip);
[130]141 $processor->setarchivedir ($archivedir);
[4]142
143 # process the import directory
[130]144 &plugin::read ($pluginfo, $importdir,
[314]145 "", {}, $processor, $maxdocs);
[4]146
147 # write out the archive information file
148 $archive_info->save_info($archive_info_filename);
149}
Note: See TracBrowser for help on using the repository browser.