source: main/trunk/greenstone2/perllib/cgiactions/explodeaction.pm@ 24071

Last change on this file since 24071 was 24071, checked in by davidb, 13 years ago

Introduction of actions that take an array of items (e.g. an array of OIDs or filenames). In adding in this ability, we have started to make use of JSON.

Another action added in is the ability to control building using a manifest file (its fields passed in using JSON). Also the ability to delete files in the archives directory (i.e. when a collection is beeing used in an 'onlyadd' way). Still needs to the more general case to be implemented.

/DB/

File size: 13.7 KB
Line 
1###########################################################################
2#
3# explodeaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package explodeaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34use util;
35
36use JSON;
37
38use File::Basename;
39
40BEGIN {
41# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
42 require XML::Rules;
43}
44
45
46@explodeaction::ISA = ('baseaction');
47
48
49# 'a' for action, and 'c' for collection are also compulsorary, and
50# added in automatically by baseaction
51
52my $action_table =
53{
54 "explode-document" => { 'compulsory-args' => ["d"],
55 'optional-args' => [] },
56 "delete-document" => { 'compulsory-args' => ["d"],
57 'optional-args' => [ "onlyadd" ] },
58 "delete-document-array" => { 'compulsory-args' => ["json"],
59 'optional-args' => [ "onlyadd" ] }
60
61
62};
63
64
65sub new
66{
67 my $class = shift (@_);
68 my ($gsdl_cgi,$iis6_mode) = @_;
69
70 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
71
72 return bless $self, $class;
73}
74
75
76sub get_infodb_type
77{
78 my ($opt_site,$collect_home,$collect) = @_;
79
80 my $out = "STDERR";
81
82 $collect = &colcfg::use_collection($opt_site, $collect, $collect_home);
83
84 if ($collect eq "") {
85 print STDERR "Error: failed to find collection $collect in $collect_home\n";
86 print STDOUT "Content-type:text/plain\n\n";
87 print STDOUT "ERROR: Failed to find collection $collect\n";
88 exit 0;
89
90 }
91
92 # Read in the collection configuration file.
93 my ($config_filename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
94 my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode);
95
96 return $collectcfg->{'infodbtype'};
97}
98
99
100sub docid_to_import_filenames
101{
102 my $self = shift @_;
103
104 my @docids = @_;
105
106 my $collect = $self->{'collect'};
107 my $gsdl_cgi = $self->{'gsdl_cgi'};
108 my $infodb_type = $self->{'infodbtype'};
109
110 # Derive the archives dir
111 my $site = $self->{'site'};
112 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
113 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
114 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
115
116 my $arcinfo_doc_filename
117 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
118 $archive_dir);
119
120 my %all_import_file_keys = ();
121
122 foreach my $docid (@docids) {
123 # Obtain the src and associated files specified docID
124
125 my $doc_rec
126 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
127 $docid);
128
129 my $src_files = $doc_rec->{'src-file'};
130 my $assoc_files = $doc_rec->{'assoc-file'};
131
132 if (defined $src_files) {
133 foreach my $if (@$src_files) {
134 $all_import_file_keys{$if} = 1;
135 }
136 }
137
138 if (defined $assoc_files) {
139 foreach my $if (@$assoc_files) {
140 $all_import_file_keys{$if} = 1;
141 }
142 }
143 }
144
145 my @all_import_files = keys %all_import_file_keys;
146
147 return \@all_import_files;
148}
149
150
151sub import_filenames_to_docids
152{
153 my $self = shift @_;
154 my ($import_filenames) = @_;
155
156 my $collect = $self->{'collect'};
157 my $gsdl_cgi = $self->{'gsdl_cgi'};
158 my $infodb_type = $self->{'infodbtype'};
159
160 # Derive the archives dir
161 my $site = $self->{'site'};
162 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
163 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
164 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
165
166 # Obtain the oids for the specified import filenames
167 my $arcinfo_src_filename
168 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
169 $archive_dir);
170
171 my %all_oid_keys = ();
172
173 foreach my $if (@$import_filenames) {
174 $if = &util::upgrade_if_dos_filename($if);
175
176 print STDERR "*** looking import filename key \"$if\"\n";
177
178 my $src_rec
179 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,
180 $if);
181 my $oids = $src_rec->{'oid'};
182
183 foreach my $o (@$oids) {
184 $all_oid_keys{$o} = 1;
185 }
186 }
187
188 my @all_oids = keys %all_oid_keys;
189
190 return \@all_oids;
191}
192
193
194sub remove_import_filenames
195{
196 my $self = shift @_;
197 my ($expanded_import_filenames) = @_;
198
199 foreach my $f (@$expanded_import_filenames) {
200 # If this document has been exploded before then
201 # its original source files will have already been removed
202 if (-e $f) {
203 &util::rm($f);
204 }
205 }
206}
207
208sub move_docoids_to_import
209{
210 my $self = shift @_;
211 my ($docids) = @_;
212
213 my $collect = $self->{'collect'};
214 my $gsdl_cgi = $self->{'gsdl_cgi'};
215 my $infodb_type = $self->{'infodbtype'};
216
217 # Derive the archives and import directories
218 my $site = $self->{'site'};
219 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
220
221 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
222 my $import_dir = &util::filename_cat($collect_dir,$collect,"import");
223
224 # Obtain the doc.xml path for the specified docID
225 my $arcinfo_doc_filename
226 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
227 $archive_dir);
228
229 foreach my $docid (@$docids) {
230
231 my $doc_rec
232 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
233 $docid);
234
235 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
236
237 # The $doc_xml_file is relative to the archives, so need to do
238 # a bit more work to make sure the right folder containing this
239 # is moved to the right place in the import folder
240
241 my $assoc_path = dirname($doc_xml_file);
242 my $import_assoc_dir = &util::filename_cat($import_dir,$assoc_path);
243 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
244
245 # If assoc_path involves more than one sub directory, then need to make
246 # sure the necessary directories exist in the import area also.
247 # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to
248 # exists before moving "archives/a/b/c.dir" -> "import/a/b"
249 my $import_target_parent_dir = dirname($import_assoc_dir);
250
251 if (-d $import_assoc_dir) {
252 # detected version from previous explode => remove it
253 &util::rm_r($import_assoc_dir);
254 }
255 else {
256 # First time => make sure parent directory exists to move
257 # "c.dir" (see above) into
258
259 &util::mk_all_dir($import_target_parent_dir);
260 }
261
262 &util::cp_r($archive_assoc_dir,$import_target_parent_dir)
263 }
264}
265
266
267sub remove_docoids
268{
269 my $self = shift @_;
270 my ($docids) = @_;
271
272 my $collect = $self->{'collect'};
273 my $gsdl_cgi = $self->{'gsdl_cgi'};
274 my $infodb_type = $self->{'infodbtype'};
275
276 # Derive the archives and import directories
277 my $site = $self->{'site'};
278 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
279
280 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
281
282 # Obtain the doc.xml path for the specified docID
283 my $arcinfo_doc_filename
284 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
285 $archive_dir);
286
287 foreach my $docid (@$docids) {
288
289 my $doc_rec
290 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
291 $docid);
292
293 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
294
295 # The $doc_xml_file is relative to the archives, so need to do
296 # a bit more work to make sure the right folder containing this
297 # is moved to the right place in the import folder
298
299 my $assoc_path = dirname($doc_xml_file);
300 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
301
302 &util::rm_r($archive_assoc_dir)
303 }
304}
305
306
307sub explode_document
308{
309 my $self = shift @_;
310
311 my $username = $self->{'username'};
312 my $collect = $self->{'collect'};
313 my $gsdl_cgi = $self->{'gsdl_cgi'};
314 my $gsdl_home = $self->{'gsdlhome'};
315
316 # Authenticate user if it is enabled
317 if ($baseaction::authentication_enabled) {
318 # Ensure the user is allowed to edit this collection
319 &authenticate_user($gsdl_cgi, $username, $collect);
320 }
321
322 # Derive the archives dir
323 my $site = $self->{'site'};
324 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
325
326 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
327 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
328
329 # Make sure the collection isn't locked by someone else
330 $self->lock_collection($username, $collect);
331
332 # look up additional args
333 my $docid = $self->{'d'};
334 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
335 $self->unlock_collection($username, $collect);
336 $gsdl_cgi->generate_error("No docid (d=...) specified.");
337 }
338
339 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
340
341 my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
342 my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
343 my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
344
345 $self->remove_import_filenames($expanded_import_filenames);
346 $self->move_docoids_to_import($docid_keys);
347
348 # Release the lock once it is done
349 $self->unlock_collection($username, $collect);
350
351 my $mess = "Base Doc ID: $docid_root\n-----\n";
352 $mess .= join("\n",@$expanded_import_filenames);
353
354 $gsdl_cgi->generate_ok_message($mess);
355
356}
357
358
359sub delete_document_entry
360{
361 my $self = shift @_;
362 my ($docid_root,$opt_onlyadd) = @_;
363
364 my $docid_keys = [];
365 if ((defined $opt_onlyadd) && ($opt_onlyadd==1)) {
366 # delete docoid archive folder
367 push(@$docid_keys,$docid_root);
368 }
369 else {
370 print STDERR "**** Not currently implemented for the general case!!\nDeleting 'archive' version only.";
371
372 push(@$docid_keys,$docid_root);
373
374 #my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
375 #$docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
376 #my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
377
378 # need to remove only the files that are not
379
380 #$self->remove_import_filenames($expanded_import_filenames);
381 }
382
383 $self->remove_docoids($docid_keys);
384}
385
386
387sub delete_document
388{
389 my $self = shift @_;
390
391 my $username = $self->{'username'};
392 my $collect = $self->{'collect'};
393 my $gsdl_cgi = $self->{'gsdl_cgi'};
394 my $gsdl_home = $self->{'gsdlhome'};
395
396 # Authenticate user if it is enabled
397 if ($baseaction::authentication_enabled) {
398 # Ensure the user is allowed to edit this collection
399 &authenticate_user($gsdl_cgi, $username, $collect);
400 }
401
402 # Derive the archives dir
403 my $site = $self->{'site'};
404 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
405
406 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
407 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
408
409 # Make sure the collection isn't locked by someone else
410 $self->lock_collection($username, $collect);
411
412 # look up additional args
413 my $docid = $self->{'d'};
414 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
415 $self->unlock_collection($username, $collect);
416 $gsdl_cgi->generate_error("No docid (d=...) specified.");
417 }
418
419 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
420
421 my $onlyadd = $self->{'onlyadd'};
422
423 my $status = $self->delete_document_entry($docid_root,$onlyadd);
424
425 # Release the lock once it is done
426 $self->unlock_collection($username, $collect);
427
428 my $mess = "delete-document successful: Key[$docid_root]\n";
429 $gsdl_cgi->generate_ok_message($mess);
430
431}
432
433
434sub delete_document_array
435{
436 my $self = shift @_;
437
438 my $username = $self->{'username'};
439 my $collect = $self->{'collect'};
440 my $gsdl_cgi = $self->{'gsdl_cgi'};
441 my $gsdlhome = $self->{'gsdlhome'};
442
443 if ($baseaction::authentication_enabled) {
444 # Ensure the user is allowed to edit this collection
445 &authenticate_user($gsdl_cgi, $username, $collect);
446 }
447
448 my $site = $self->{'site'};
449 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
450
451 $gsdl_cgi->checked_chdir($collect_dir);
452
453 # Obtain the collect dir
454 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
455
456 # Make sure the collection isn't locked by someone else
457 $self->lock_collection($username, $collect);
458
459 # look up additional args
460
461 my $json_str = $self->{'json'};
462 my $doc_array = decode_json $json_str;
463
464 my $onlyadd = $self->{'onlyadd'};
465
466
467 my $global_status = 0;
468 my $global_mess = "";
469
470 my @all_docids = ();
471
472 foreach my $doc_array_rec ( @$doc_array ) {
473
474 my $docid = $doc_array_rec->{'docid'};
475
476 push(@all_docids,$docid);
477
478 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
479
480 my $status = $self->delete_document_entry($docid_root,$onlyadd);
481
482 if ($status != 0) {
483 # Catch error if set infodb entry failed
484 $global_status = $status;
485 $global_mess .= "Failed to delete document key: $docid\n";
486 $global_mess .= "Exit status: $status\n";
487 $global_mess .= "System Error Message: $!\n";
488 $global_mess .= "-" x 20;
489 }
490 }
491
492 if ($global_status != 0) {
493 $global_mess .= "PATH: $ENV{'PATH'}\n";
494 $gsdl_cgi->generate_error($global_mess);
495 }
496 else {
497 my $mess = "delete-document-array successful: Keys[ ".join(", ",@all_docids)."]\n";
498 $gsdl_cgi->generate_ok_message($mess);
499 }
500
501 # Release the lock once it is done
502 $self->unlock_collection($username, $collect);
503}
504
505
506
5071;
Note: See TracBrowser for help on using the repository browser.