source: main/trunk/greenstone2/bin/script/g2f-deletecol.pl@ 26418

Last change on this file since 26418 was 26333, checked in by ak19, 12 years ago
  1. Introduced new perl script for deleting a fedora collection (g2f-deletecol.pl) because, when deleting a collection that's been ingested into fedora, both the fedora collection file and the documents in the collection (METS files) that have been ingested into fedora need to purged. Also these docs need to be removed from the fedoragsearch index if it gsearch is installed. The command line script does not delete the Greenstone collection directory, that has to be done manually at this stage, since GLI already does that part. 2. FLI calls the new perl script g2f-deletecol.pl when the user chooses to have a collection deleted.
  • Property svn:executable set to *
File size: 8.5 KB
Line 
1#!/usr/bin/perl -w
2
3BEGIN
4{
5 if (!defined $ENV{'GSDLHOME'}) {
6 print STDERR "Environment variable GSDLHOME not set.\n";
7 print STDERR " Have you sourced Greenstone's 'setup.bash' file?\n";
8 exit 1;
9 }
10
11 if (!defined $ENV{'JAVA_HOME'}) {
12 print STDERR "Environment variable JAVA_HOME not set.\n";
13 print STDERR "Needed by Fedora command line scripts.\n";
14 exit 1;
15 }
16
17 $ENV{'FEDORA_HOSTNAME'} = "localhost" if (!defined $ENV{'FEDORA_HOSTNAME'});
18 $ENV{'FEDORA_SERVER_PORT'} = "8080" if (!defined $ENV{'FEDORA_SERVER_PORT'});
19 $ENV{'FEDORA_USER'} = "fedoraAdmin" if (!defined $ENV{'FEDORA_USER'});
20 $ENV{'FEDORA_PASS'} = "fedoraAdmin" if (!defined $ENV{'FEDORA_PASS'});
21 $ENV{'FEDORA_PROTOCOL'} = "http" if (!defined $ENV{'FEDORA_PROTOCOL'});
22 $ENV{'FEDORA_PID_NAMESPACE'} = "greenstone" if (!defined $ENV{'FEDORA_PID_NAMESPACE'});
23 $ENV{'FEDORA_PREFIX'} = "/fedora" if (!defined $ENV{'FEDORA_PREFIX'});
24
25 unshift (@INC, "$ENV{'GSDLHOME'}/perllib/");
26
27}
28
29
30use strict;
31no strict 'refs'; # allow filehandles to be variables and vice versa
32no strict 'subs'; # allow barewords (e.g. STDERR) as function arguments
33
34use util;
35use gsprintf 'gsprintf';
36use printusage;
37use parse2;
38use cfgread;
39use colcfg;
40
41use g2futil;
42
43use dbutil;
44
45my $arguments =
46 [
47 { 'name' => "verbosity",
48 'desc' => "Level of verbosity generated",
49 'type' => "string",
50 'deft' => '1',
51 'reqd' => "no",
52 'hiddengli' => "no" },
53 { 'name' => "hostname",
54 'desc' => "Domain hostname of Fedora server",
55 'type' => "string",
56 'deft' => $ENV{'FEDORA_HOSTNAME'},
57 'reqd' => "no",
58 'hiddengli' => "no" },
59 { 'name' => "port",
60 'desc' => "Port that the Fedora server is running on.",
61 'type' => "string",
62 'deft' => $ENV{'FEDORA_SERVER_PORT'},
63 'reqd' => "no",
64 'hiddengli' => "no" },
65 { 'name' => "username",
66 'desc' => "Fedora admin username",
67 'type' => "string",
68 'deft' => $ENV{'FEDORA_USER'},
69 'reqd' => "no",
70 'hiddengli' => "no" },
71 { 'name' => "password",
72 'desc' => "Fedora admin password",
73 'type' => "string",
74 'deft' => $ENV{'FEDORA_PASS'},
75 'reqd' => "no",
76 'hiddengli' => "no" },
77 { 'name' => "protocol",
78 'desc' => "Fedora protocol, e.g. 'http' or 'https'",
79 'type' => "string",
80 'deft' => $ENV{'FEDORA_PROTOCOL'},
81 'reqd' => "no",
82 'hiddengli' => "no" },
83 { 'name' => "pidnamespace",
84 'desc' => "Fedora prefix for PIDs",
85 'type' => "string",
86 'deft' => $ENV{'FEDORA_PID_NAMESPACE'},
87 'reqd' => "no",
88 'hiddengli' => "no" },
89 { 'name' => "gli",
90 'desc' => "",
91 'type' => "flag",
92 'reqd' => "no",
93 'hiddengli' => "yes" },
94 { 'name' => "xml",
95 'desc' => "{scripts.xml}",
96 'type' => "flag",
97 'reqd' => "no",
98 'hiddengli' => "yes" },
99 { 'name' => "language",
100 'desc' => "{scripts.language}",
101 'type' => "string",
102 'reqd' => "no",
103 'modegli' => "3" },
104 { 'name' => "collectdir",
105 'desc' => "{import.collectdir}",
106 'type' => "string",
107 'deft' => "",
108 'reqd' => "no",
109 'hiddengli' => "yes" }
110 ];
111
112my $prog_options
113 = { 'name' => "g2f-deletecol.pl",
114 'desc' => "Delete a collection and its FedoraMETS documents ingested into Fedora",
115 'args' => $arguments };
116
117
118sub main
119{
120 my (@ARGV) = @_;
121
122 my $GSDLHOME = $ENV{'GSDLHOME'};
123
124
125 my $options = {};
126 # general options available to all plugins
127 my $intArgLeftinAfterParsing = parse2::parse(\@ARGV,$arguments,$options,"allow_extra_options");
128
129 # Something went wrong with parsing
130 if ($intArgLeftinAfterParsing ==-1)
131 {
132 &PrintUsage::print_txt_usage($prog_options, "[options] greenstone-col");
133 die "\n";
134 }
135
136 my $xml = $options->{'xml'};
137 my $gli = $options->{'gli'};
138
139 if ($intArgLeftinAfterParsing != 1)
140 {
141 if ($xml) {
142 &PrintUsage::print_xml_usage($prog_options);
143 print "\n";
144 return;
145 }
146 else {
147 &PrintUsage::print_txt_usage($prog_options, "[options] greenstone-col");
148 print "\n";
149 return;
150 }
151
152 }
153
154 my $gs_col = $ARGV[0];
155
156 my $verbosity = $options->{'verbosity'};
157 my $hostname = $options->{'hostname'};
158 my $port = $options->{'port'};
159 my $username = $options->{'username'};
160 my $password = $options->{'password'};
161 my $protocol = $options->{'protocol'};
162 my $pid_namespace = $options->{'pidnamespace'};
163
164
165 my $collectdir = $options->{'collectdir'};
166
167 if (!$collectdir) {
168 if($ENV{'GSDL3HOME'}) {
169 $collectdir = &util::filename_cat($ENV{'GSDL3HOME'},"sites","localsite","collect");
170 } else {
171 $collectdir = &util::filename_cat($ENV{'GSDLHOME'},"collect");
172 }
173 }
174
175 my $full_gs_col = &util::filename_cat($collectdir,$gs_col);
176
177
178 if (!-e $full_gs_col ) {
179 print STDERR "Unable to find Greenstone collection $full_gs_col\n";
180 }
181
182
183 my $export_dir = &util::filename_cat($full_gs_col,"export");
184
185
186 print "***\n";
187 print "* Deleting files of Fedora collection $pid_namespace\n";
188 print "***\n";
189
190 # => Delete individually!
191
192 # set up fedoragsearch for updating the index upon ingesting documents
193 my $fedoragsearch_webapp = &g2futil::gsearch_webapp_folder();
194
195 # need the username and password preset in order to run fedoraGSearch's RESTClient script
196 # this assumes that the fedoragsearch authentication details are the same as for fedora
197 if (defined $fedoragsearch_webapp) {
198 $ENV{'fgsUserName'} = $options->{'username'};
199 $ENV{'fgsPassword'} = $options->{'password'};
200 }
201
202 if (opendir(DIR, $export_dir)) {
203 my @xml_files = grep { $_ =~ m/^greenstone-http.*\.xml$/ } readdir(DIR);
204 closedir DIR;
205
206 # 1. purge all the (URL,hashID) metadata files that we inserted
207 # into fedora at the end of g2f-buildcol.pl
208 # convert the filenames into fedora-pids
209 # filename = greenstone-http=tmpcol-http-__test1-html.xml -> fpid = greenstone-http:tmpcol-http-__test1.html
210 foreach my $file (@xml_files) {
211 my $fedora_pid = $file;
212 $fedora_pid =~ s/\.xml$//;
213 $fedora_pid =~ s/\=/:/;
214 $fedora_pid =~ s/(.*)-(.*)$/$1.$2/;
215
216 print STDERR "#### fedora_pid: $fedora_pid\n";
217 &g2futil::run_purge($fedora_pid,$options); # displays error message if first time (nothing to purge)
218 # these weren't indexed into fedoragsearch, so don't need to remove their pids from gsearch
219 }
220
221
222 ## my @hash_dirs = grep { /\.dir$/ } readdir(DIR);
223 my @hash_dirs = &g2futil::get_all_hash_dirs($export_dir);
224
225
226 # 2. for each hash dir, purge its respective PID (includes fedora collection file)
227 foreach my $hd (@hash_dirs) {
228
229 my $hash_id = &g2futil::get_hash_id($hd);
230
231 if (defined $hash_id) {
232
233 my $pid = "$pid_namespace:$gs_col-$hash_id";
234
235 my $dsinfo_status = &g2futil::run_datastore_info($pid,$options);
236
237 if ($dsinfo_status == 0) {
238 # first remove the doc from the gsearch index before removing it from the fedora repository
239 print " deleting $pid from GSearch index\n";
240 &g2futil::run_delete_from_index($fedoragsearch_webapp,$pid,$options) if defined $fedoragsearch_webapp;
241
242 print " $pid being deleted.\n";
243 &g2futil::run_purge($pid,$options);
244 }
245 else {
246 print " $pid not present.\n";
247 }
248 }
249 }
250 }
251 else {
252 print STDERR "*** Unable to open directory $export_dir: $!\n";
253
254 # 3. even when there's no documents ingested into the collection (no export dir),
255 # the collection file still exists, so purge just the collection file now
256
257 my $pid = "$pid_namespace:$gs_col-collection";
258 my $dsinfo_status = &g2futil::run_datastore_info($pid,$options);
259
260 if ($dsinfo_status == 0) {
261 # first remove the doc from the gsearch index before removing it from the fedora repository
262 print " deleting $pid from GSearch index\n";
263 &g2futil::run_delete_from_index($fedoragsearch_webapp,$pid,$options) if defined $fedoragsearch_webapp;
264
265 print " collection $pid being deleted.\n";
266 &g2futil::run_purge($pid,$options);
267 }
268 else {
269 print " collection $pid not present.\n";
270 }
271 }
272
273 # 4. need to specially delete the collection file when using FLI
274 if($gli) {
275 my $pid = "$pid_namespace:$gs_col-collection";
276 my $dsinfo_status = &g2futil::run_datastore_info($pid,$options);
277
278 if ($dsinfo_status == 0) {
279 # first remove the doc from the gsearch index before removing it from the fedora repository
280 print " deleting $pid from GSearch index\n";
281 &g2futil::run_delete_from_index($fedoragsearch_webapp,$pid,$options) if defined $fedoragsearch_webapp;
282
283 print " collection $pid being deleted.\n";
284 &g2futil::run_purge($pid,$options);
285 }
286 }
287
288 my $collection = &util::filename_cat($collectdir, $gs_col);
289
290 if(&util::dir_exists($collection)) {
291 print "\n**** If you wish the collection directory $collection to be removed, delete this manually.\n\n";
292 }
293}
294
295&main(@ARGV);
296
297
298
Note: See TracBrowser for help on using the repository browser.