source: gs2-extensions/parallel-building/trunk/src/perllib/cgiactions/explodeaction.pm@ 24626

Last change on this file since 24626 was 24626, checked in by jmt12, 13 years ago

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

File size: 13.7 KB
Line 
1###########################################################################
2#
3# explodeaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package explodeaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34use util;
35
36use JSON;
37
38use File::Basename;
39
40BEGIN {
41# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
42 require XML::Rules;
43}
44
45
46@explodeaction::ISA = ('baseaction');
47
48
49# 'a' for action, and 'c' for collection are also compulsorary, and
50# added in automatically by baseaction
51
52my $action_table =
53{
54 "explode-document" => { 'compulsory-args' => ["d"],
55 'optional-args' => [] },
56 "delete-document" => { 'compulsory-args' => ["d"],
57 'optional-args' => [ "onlyadd" ] },
58 "delete-document-array" => { 'compulsory-args' => ["json"],
59 'optional-args' => [ "onlyadd" ] }
60
61
62};
63
64
65sub new
66{
67 my $class = shift (@_);
68 my ($gsdl_cgi,$iis6_mode) = @_;
69
70 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
71
72 return bless $self, $class;
73}
74
75
76sub get_infodb_type
77{
78 my ($opt_site,$collect_home,$collect) = @_;
79
80 my $out = "STDERR";
81
82 $collect = &colcfg::use_collection($opt_site, $collect, $collect_home);
83
84 if ($collect eq "") {
85 print STDERR "Error: failed to find collection $collect in $collect_home\n";
86 print STDOUT "Content-type:text/plain\n\n";
87 print STDOUT "ERROR: Failed to find collection $collect\n";
88 exit 0;
89
90 }
91
92 # Read in the collection configuration file.
93 my ($config_filename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
94 my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode);
95
96 return $collectcfg->{'infodbtype'};
97}
98
99
100sub docid_to_import_filenames
101{
102 my $self = shift @_;
103
104 my @docids = @_;
105
106 my $collect = $self->{'collect'};
107 my $gsdl_cgi = $self->{'gsdl_cgi'};
108 my $infodb_type = $self->{'infodbtype'};
109
110 # Derive the archives dir
111 my $site = $self->{'site'};
112 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
113 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
114 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
115
116 my $arcinfo_doc_filename
117 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
118 $archive_dir);
119
120 my %all_import_file_keys = ();
121
122 foreach my $docid (@docids) {
123 # Obtain the src and associated files specified docID
124
125 my $doc_rec
126 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
127 $docid);
128
129 my $src_files = $doc_rec->{'src-file'};
130 my $assoc_files = $doc_rec->{'assoc-file'};
131
132 if (defined $src_files) {
133 foreach my $if (@$src_files) {
134 $all_import_file_keys{$if} = 1;
135 }
136 }
137
138 if (defined $assoc_files) {
139 foreach my $if (@$assoc_files) {
140 $all_import_file_keys{$if} = 1;
141 }
142 }
143 }
144
145 my @all_import_files = keys %all_import_file_keys;
146
147 return \@all_import_files;
148}
149
150
151sub import_filenames_to_docids
152{
153 my $self = shift @_;
154 my ($import_filenames) = @_;
155
156 my $collect = $self->{'collect'};
157 my $gsdl_cgi = $self->{'gsdl_cgi'};
158 my $infodb_type = $self->{'infodbtype'};
159
160 # Derive the archives dir
161 my $site = $self->{'site'};
162 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
163 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
164 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
165
166 # Obtain the oids for the specified import filenames
167 my $arcinfo_src_filename
168 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
169 $archive_dir);
170
171 my %all_oid_keys = ();
172
173 foreach my $if (@$import_filenames) {
174 $if = &util::upgrade_if_dos_filename($if);
175
176 print STDERR "*** looking import filename key \"$if\"\n";
177
178 my $src_rec
179 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,
180 $if);
181 my $oids = $src_rec->{'oid'};
182
183 foreach my $o (@$oids) {
184 $all_oid_keys{$o} = 1;
185 }
186 }
187
188 my @all_oids = keys %all_oid_keys;
189
190 return \@all_oids;
191}
192
193
194sub remove_import_filenames
195{
196 my $self = shift @_;
197 my ($expanded_import_filenames) = @_;
198
199 foreach my $f (@$expanded_import_filenames) {
200 # If this document has been exploded before then
201 # its original source files will have already been removed
202 if (-e $f) {
203 &util::rm($f);
204 }
205 }
206}
207
208sub move_docoids_to_import
209{
210 my $self = shift @_;
211 my ($docids) = @_;
212
213 my $collect = $self->{'collect'};
214 my $gsdl_cgi = $self->{'gsdl_cgi'};
215 my $infodb_type = $self->{'infodbtype'};
216
217 # Derive the archives and import directories
218 my $site = $self->{'site'};
219 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
220
221 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
222 my $import_dir = &util::filename_cat($collect_dir,$collect,"import");
223
224 # Obtain the doc.xml path for the specified docID
225 my $arcinfo_doc_filename
226 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
227 $archive_dir);
228
229 foreach my $docid (@$docids) {
230
231 my $doc_rec
232 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
233 $docid);
234
235 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
236
237 # The $doc_xml_file is relative to the archives, so need to do
238 # a bit more work to make sure the right folder containing this
239 # is moved to the right place in the import folder
240
241 my $assoc_path = dirname($doc_xml_file);
242 my $import_assoc_dir = &util::filename_cat($import_dir,$assoc_path);
243 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
244
245 # If assoc_path involves more than one sub directory, then need to make
246 # sure the necessary directories exist in the import area also.
247 # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to
248 # exists before moving "archives/a/b/c.dir" -> "import/a/b"
249 my $import_target_parent_dir = dirname($import_assoc_dir);
250
251 if (-d $import_assoc_dir) {
252 # detected version from previous explode => remove it
253 &util::rm_r($import_assoc_dir);
254 }
255 else {
256 # First time => make sure parent directory exists to move
257 # "c.dir" (see above) into
258
259 &util::mk_all_dir($import_target_parent_dir);
260 }
261
262 &util::cp_r($archive_assoc_dir,$import_target_parent_dir)
263 }
264}
265
266
267sub remove_docoids
268{
269 my $self = shift @_;
270 my ($docids) = @_;
271
272 my $collect = $self->{'collect'};
273 my $gsdl_cgi = $self->{'gsdl_cgi'};
274 my $infodb_type = $self->{'infodbtype'};
275
276 # Derive the archives and import directories
277 my $site = $self->{'site'};
278 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
279
280 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
281
282 # Obtain the doc.xml path for the specified docID
283 my $arcinfo_doc_filename
284 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
285 $archive_dir);
286
287 foreach my $docid (@$docids) {
288
289 my $doc_rec
290 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
291 $docid);
292
293 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
294
295 # The $doc_xml_file is relative to the archives, so need to do
296 # a bit more work to make sure the right folder containing this
297 # is moved to the right place in the import folder
298
299 my $assoc_path = dirname($doc_xml_file);
300 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
301
302 &util::rm_r($archive_assoc_dir)
303 }
304}
305
306
307sub explode_document
308{
309 my $self = shift @_;
310
311 my $username = $self->{'username'};
312 my $collect = $self->{'collect'};
313 my $gsdl_cgi = $self->{'gsdl_cgi'};
314 my $gsdl_home = $self->{'gsdlhome'};
315
316 # Authenticate user if it is enabled
317 if ($baseaction::authentication_enabled) {
318 # Ensure the user is allowed to edit this collection
319 &authenticate_user($gsdl_cgi, $username, $collect);
320 }
321
322 # Derive the archives dir
323 my $site = $self->{'site'};
324 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
325
326 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
327 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
328
329 # Make sure the collection isn't locked by someone else
330 $self->lock_collection($username, $collect);
331
332 # look up additional args
333 my $docid = $self->{'d'};
334 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
335 $self->unlock_collection($username, $collect);
336 $gsdl_cgi->generate_error("No docid (d=...) specified.");
337 }
338
339 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
340
341 my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
342 my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
343 my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
344
345 $self->remove_import_filenames($expanded_import_filenames);
346 $self->move_docoids_to_import($docid_keys);
347
348 # Release the lock once it is done
349 $self->unlock_collection($username, $collect);
350
351 my $mess = "Base Doc ID: $docid_root\n-----\n";
352 $mess .= join("\n",@$expanded_import_filenames);
353
354 $gsdl_cgi->generate_ok_message($mess);
355
356}
357
358
359sub delete_document_entry
360{
361 my $self = shift @_;
362 my ($docid_root,$opt_onlyadd) = @_;
363
364 my $docid_keys = [];
365 if ((defined $opt_onlyadd) && ($opt_onlyadd==1)) {
366 # delete docoid archive folder
367 push(@$docid_keys,$docid_root);
368 }
369 else {
370 print STDERR "**** Not currently implemented for the general case!!\nDeleting 'archive' version only.";
371
372 push(@$docid_keys,$docid_root);
373
374 #my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
375 #$docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
376 #my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
377
378 # need to remove only the files that are not
379
380 #$self->remove_import_filenames($expanded_import_filenames);
381 }
382
383 $self->remove_docoids($docid_keys);
384}
385
386
387sub delete_document
388{
389 my $self = shift @_;
390
391 my $username = $self->{'username'};
392 my $collect = $self->{'collect'};
393 my $gsdl_cgi = $self->{'gsdl_cgi'};
394 my $gsdl_home = $self->{'gsdlhome'};
395
396 # Authenticate user if it is enabled
397 if ($baseaction::authentication_enabled) {
398 # Ensure the user is allowed to edit this collection
399 &authenticate_user($gsdl_cgi, $username, $collect);
400 }
401
402 # Derive the archives dir
403 my $site = $self->{'site'};
404 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
405
406 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
407 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
408
409 # Make sure the collection isn't locked by someone else
410 $self->lock_collection($username, $collect);
411
412 # look up additional args
413 my $docid = $self->{'d'};
414 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
415 $self->unlock_collection($username, $collect);
416 $gsdl_cgi->generate_error("No docid (d=...) specified.");
417 }
418
419 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
420
421 my $onlyadd = $self->{'onlyadd'};
422
423 my $status = $self->delete_document_entry($docid_root,$onlyadd);
424
425 # Release the lock once it is done
426 $self->unlock_collection($username, $collect);
427
428 my $mess = "delete-document successful: Key[$docid_root]\n";
429 $gsdl_cgi->generate_ok_message($mess);
430
431}
432
433
434sub delete_document_array
435{
436 my $self = shift @_;
437
438 my $username = $self->{'username'};
439 my $collect = $self->{'collect'};
440 my $gsdl_cgi = $self->{'gsdl_cgi'};
441 my $gsdlhome = $self->{'gsdlhome'};
442
443 if ($baseaction::authentication_enabled) {
444 # Ensure the user is allowed to edit this collection
445 &authenticate_user($gsdl_cgi, $username, $collect);
446 }
447
448 my $site = $self->{'site'};
449 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
450
451 $gsdl_cgi->checked_chdir($collect_dir);
452
453 # Obtain the collect dir
454 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
455
456 # Make sure the collection isn't locked by someone else
457 $self->lock_collection($username, $collect);
458
459 # look up additional args
460
461 my $json_str = $self->{'json'};
462 my $doc_array = decode_json $json_str;
463
464 my $onlyadd = $self->{'onlyadd'};
465
466
467 my $global_status = 0;
468 my $global_mess = "";
469
470 my @all_docids = ();
471
472 foreach my $doc_array_rec ( @$doc_array ) {
473
474 my $docid = $doc_array_rec->{'docid'};
475
476 push(@all_docids,$docid);
477
478 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
479
480 my $status = $self->delete_document_entry($docid_root,$onlyadd);
481
482 if ($status != 0) {
483 # Catch error if set infodb entry failed
484 $global_status = $status;
485 $global_mess .= "Failed to delete document key: $docid\n";
486 $global_mess .= "Exit status: $status\n";
487 $global_mess .= "System Error Message: $!\n";
488 $global_mess .= "-" x 20;
489 }
490 }
491
492 if ($global_status != 0) {
493 $global_mess .= "PATH: $ENV{'PATH'}\n";
494 $gsdl_cgi->generate_error($global_mess);
495 }
496 else {
497 my $mess = "delete-document-array successful: Keys[ ".join(", ",@all_docids)."]\n";
498 $gsdl_cgi->generate_ok_message($mess);
499 }
500
501 # Release the lock once it is done
502 $self->unlock_collection($username, $collect);
503}
504
505
506
5071;
Note: See TracBrowser for help on using the repository browser.