source: main/trunk/greenstone2/bin/script/g2f-deletecol.pl@ 31888

Last change on this file since 31888 was 31756, checked in by ak19, 7 years ago

For gs-fedora scripts, check for JRE_HOME as fallback if JAVA_HOME is not set

  • Property svn:executable set to *
File size: 8.6 KB
Line 
1#!/usr/bin/perl -w
2
3BEGIN
4{
5 if (!defined $ENV{'GSDLHOME'}) {
6 print STDERR "Environment variable GSDLHOME not set.\n";
7 print STDERR " Have you sourced Greenstone's 'setup.bash' file?\n";
8 exit 1;
9 }
10
11 if (!defined $ENV{'JAVA_HOME'} && !defined $ENV{'JRE_HOME'}) {
12 print STDERR "Neither JAVA_HOME nor JRE_HOME set.\n";
13 print STDERR "Java needed by Fedora command line scripts.\n";
14 exit 1;
15 }
16
17 $ENV{'FEDORA_HOSTNAME'} = "localhost" if (!defined $ENV{'FEDORA_HOSTNAME'});
18 $ENV{'FEDORA_SERVER_PORT'} = "8080" if (!defined $ENV{'FEDORA_SERVER_PORT'});
19 $ENV{'FEDORA_USER'} = "fedoraAdmin" if (!defined $ENV{'FEDORA_USER'});
20 $ENV{'FEDORA_PASS'} = "fedoraAdmin" if (!defined $ENV{'FEDORA_PASS'});
21 $ENV{'FEDORA_PROTOCOL'} = "http" if (!defined $ENV{'FEDORA_PROTOCOL'});
22 $ENV{'FEDORA_PID_NAMESPACE'} = "greenstone" if (!defined $ENV{'FEDORA_PID_NAMESPACE'});
23 $ENV{'FEDORA_PREFIX'} = "/fedora" if (!defined $ENV{'FEDORA_PREFIX'});
24
25 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/");
26
27}
28
29
30use strict;
31no strict 'refs'; # allow filehandles to be variables and vice versa
32no strict 'subs'; # allow barewords (e.g. STDERR) as function arguments
33
34use util;
35use gsprintf 'gsprintf';
36use printusage;
37use parse2;
38use cfgread;
39use colcfg;
40
41use g2futil;
42
43use dbutil;
44
45my $arguments =
46 [
47 { 'name' => "verbosity",
48 'desc' => "Level of verbosity generated",
49 'type' => "string",
50 'deft' => '1',
51 'reqd' => "no",
52 'hiddengli' => "no" },
53 { 'name' => "hostname",
54 'desc' => "Domain hostname of Fedora server",
55 'type' => "string",
56 'deft' => $ENV{'FEDORA_HOSTNAME'},
57 'reqd' => "no",
58 'hiddengli' => "no" },
59 { 'name' => "port",
60 'desc' => "Port that the Fedora server is running on.",
61 'type' => "string",
62 'deft' => $ENV{'FEDORA_SERVER_PORT'},
63 'reqd' => "no",
64 'hiddengli' => "no" },
65 { 'name' => "username",
66 'desc' => "Fedora admin username",
67 'type' => "string",
68 'deft' => $ENV{'FEDORA_USER'},
69 'reqd' => "no",
70 'hiddengli' => "no" },
71 { 'name' => "password",
72 'desc' => "Fedora admin password",
73 'type' => "string",
74 'deft' => $ENV{'FEDORA_PASS'},
75 'reqd' => "no",
76 'hiddengli' => "no" },
77 { 'name' => "protocol",
78 'desc' => "Fedora protocol, e.g. 'http' or 'https'",
79 'type' => "string",
80 'deft' => $ENV{'FEDORA_PROTOCOL'},
81 'reqd' => "no",
82 'hiddengli' => "no" },
83 { 'name' => "pidnamespace",
84 'desc' => "Fedora prefix for PIDs",
85 'type' => "string",
86 'deft' => $ENV{'FEDORA_PID_NAMESPACE'},
87 'reqd' => "no",
88 'hiddengli' => "no" },
89 { 'name' => "gli",
90 'desc' => "",
91 'type' => "flag",
92 'reqd' => "no",
93 'hiddengli' => "yes" },
94 { 'name' => "xml",
95 'desc' => "{scripts.xml}",
96 'type' => "flag",
97 'reqd' => "no",
98 'hiddengli' => "yes" },
99 { 'name' => "language",
100 'desc' => "{scripts.language}",
101 'type' => "string",
102 'reqd' => "no",
103 'modegli' => "3" },
104 { 'name' => "collectdir",
105 'desc' => "{import.collectdir}",
106 'type' => "string",
107 'deft' => "",
108 'reqd' => "no",
109 'hiddengli' => "yes" }
110 ];
111
112my $prog_options
113 = { 'name' => "g2f-deletecol.pl",
114 'desc' => "Delete a collection and its FedoraMETS documents ingested into Fedora",
115 'args' => $arguments };
116
117
118sub main
119{
120 my (@ARGV) = @_;
121
122 my $GSDLHOME = $ENV{'GSDLHOME'};
123
124
125 my $options = {};
126 # general options available to all plugins
127 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$options,"allow_extra_options");
128
129 # Something went wrong with parsing
130 if ($intArgLeftinAfterParsing ==-1)
131 {
132 &PrintUsage::print_txt_usage($prog_options, "[options] greenstone-col");
133 die "\n";
134 }
135
136 my $xml = $options->{'xml'};
137 my $gli = $options->{'gli'};
138
139 if ($intArgLeftinAfterParsing != 1)
140 {
141 if ($xml) {
142 &PrintUsage::print_xml_usage($prog_options);
143 print "\n";
144 return;
145 }
146 else {
147 &PrintUsage::print_txt_usage($prog_options, "[options] greenstone-col");
148 print "\n";
149 return;
150 }
151
152 }
153
154 my $gs_col = $ARGV[0];
155
156 my $verbosity = $options->{'verbosity'};
157 my $hostname = $options->{'hostname'};
158 my $port = $options->{'port'};
159 my $username = $options->{'username'};
160 my $password = $options->{'password'};
161 my $protocol = $options->{'protocol'};
162 my $pid_namespace = $options->{'pidnamespace'};
163
164
165 my $collectdir = $options->{'collectdir'};
166
167 if (!$collectdir) {
168 if($ENV{'GSDL3HOME'}) {
169 $collectdir = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite","collect");
170 } else {
171 $collectdir = &util::filename_cat($ENV{'GSDLHOME'},"collect");
172 }
173 }
174
175 my $full_gs_col = &util::filename_cat($collectdir,$gs_col);
176
177
178 if (!-e $full_gs_col ) {
179 print STDERR "Unable to find Greenstone collection $full_gs_col\n";
180 }
181
182
183 my $export_dir = &util::filename_cat($full_gs_col,"export");
184
185
186 print "***\n";
187 print "* Deleting files of Fedora collection namespace: $pid_namespace\n";
188 print "***\n";
189
190 # => Delete individually!
191
192 # set up fedoragsearch for updating the index upon ingesting documents
193 my $fedoragsearch_webapp = &g2futil::gsearch_webapp_folder();
194
195 # need the username and password preset in order to run fedoraGSearch's RESTClient script
196 # this assumes that the fedoragsearch authentication details are the same as for fedora
197 if (defined $fedoragsearch_webapp) {
198 $ENV{'fgsUserName'} = $options->{'username'};
199 $ENV{'fgsPassword'} = $options->{'password'};
200 }
201
202 if (opendir(DIR, $export_dir)) {
203 my @xml_files = grep { $_ =~ m/^greenstone-http.*\.xml$/ } readdir(DIR);
204 closedir DIR;
205
206 # 1. purge all the (URL,hashID) metadata files that we inserted
207 # into fedora at the end of g2f-buildcol.pl
208 # convert the filenames into fedora-pids
209 # filename = greenstone-http=tmpcol-http-__test1-html.xml -> fpid = greenstone-http:tmpcol-http-__test1.html
210 foreach my $file (@xml_files) {
211 my $fedora_pid = $file;
212 $fedora_pid =~ s/\.xml$//;
213 $fedora_pid =~ s/\=/:/;
214 $fedora_pid =~ s/(.*)-(.*)$/$1.$2/;
215
216 print STDERR "#### fedora_pid: $fedora_pid\n";
217 &g2futil::run_purge($fedora_pid,$options); # displays error message if first time (nothing to purge)
218 # these weren't indexed into fedoragsearch, so don't need to remove their pids from gsearch
219 }
220
221
222 ## my @hash_dirs = grep { /\.dir$/ } readdir(DIR);
223 my @hash_dirs = &g2futil::get_all_hash_dirs($export_dir);
224
225
226 # 2. for each hash dir, purge its respective PID (includes fedora collection file)
227 foreach my $hd (@hash_dirs) {
228
229 my $hash_id = &g2futil::get_hash_id($hd);
230
231 if (defined $hash_id) {
232
233 my $pid = "$pid_namespace:$gs_col-$hash_id";
234
235 my $dsinfo_status = &g2futil::run_datastore_info($pid,$options);
236
237 if ($dsinfo_status == 0) {
238 # first remove the doc from the gsearch index before removing it from the fedora repository
239 print " deleting $pid from GSearch index\n";
240 &g2futil::run_delete_from_index($fedoragsearch_webapp,$pid,$options) if defined $fedoragsearch_webapp;
241
242 print " $pid being deleted.\n";
243 &g2futil::run_purge($pid,$options);
244 }
245 else {
246 print " $pid not present.\n";
247 }
248 }
249 }
250 }
251 else {
252 print STDERR "*** Unable to open directory $export_dir: $!\n";
253
254 # 3. even when there's no documents ingested into the collection (no export dir),
255 # the collection file still exists, so purge just the collection file now
256
257 my $pid = "$pid_namespace:$gs_col-collection";
258 my $dsinfo_status = &g2futil::run_datastore_info($pid,$options);
259
260 if ($dsinfo_status == 0) {
261 # first remove the doc from the gsearch index before removing it from the fedora repository
262 print " deleting $pid from GSearch index\n";
263 &g2futil::run_delete_from_index($fedoragsearch_webapp,$pid,$options) if defined $fedoragsearch_webapp;
264
265 print " collection $pid being deleted.\n";
266 &g2futil::run_purge($pid,$options);
267 }
268 else {
269 print " collection $pid not present.\n";
270 }
271 }
272
273 # 4. need to specially delete the collection file when using FLI
274 if($gli) {
275 my $pid = "$pid_namespace:$gs_col-collection";
276 my $dsinfo_status = &g2futil::run_datastore_info($pid,$options);
277
278 if ($dsinfo_status == 0) {
279 # first remove the doc from the gsearch index before removing it from the fedora repository
280 print " deleting $pid from GSearch index\n";
281 &g2futil::run_delete_from_index($fedoragsearch_webapp,$pid,$options) if defined $fedoragsearch_webapp;
282
283 print " collection $pid being deleted.\n";
284 &g2futil::run_purge($pid,$options);
285 }
286 }
287
288 my $collection = &util::filename_cat($collectdir, $gs_col);
289
290 if(&util::dir_exists($collection)) {
291 print "\n**** If you wish the collection directory $collection to be removed, delete this manually.\n\n";
292 }
293}
294
295&main(@ARGV);
296
297
298
Note: See TracBrowser for help on using the repository browser.