source: main/trunk/greenstone2/perllib/cgiactions/explodeaction.pm@ 28211

Last change on this file since 28211 was 28211, checked in by ak19, 11 years ago

No more absolute paths in archiveinf-doc.gdb and archiveinf-src.gdb

File size: 14.0 KB
Line 
1###########################################################################
2#
3# explodeaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package explodeaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34use util;
35
36use JSON;
37
38use File::Basename;
39
40BEGIN {
41# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
42 require XML::Rules;
43}
44
45
46@explodeaction::ISA = ('baseaction');
47
48
49# 'a' for action, and 'c' for collection are also compulsorary, and
50# added in automatically by baseaction
51
52my $action_table =
53{
54 "explode-document" => { 'compulsory-args' => ["d"],
55 'optional-args' => [] },
56 "delete-document" => { 'compulsory-args' => ["d"],
57 'optional-args' => [ "onlyadd" ] },
58 "delete-document-array" => { 'compulsory-args' => ["json"],
59 'optional-args' => [ "onlyadd" ] }
60
61
62};
63
64
65sub new
66{
67 my $class = shift (@_);
68 my ($gsdl_cgi,$iis6_mode) = @_;
69
70 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
71
72 return bless $self, $class;
73}
74
75
76sub get_infodb_type
77{
78 my ($opt_site,$collect_home,$collect) = @_;
79
80 my $out = "STDERR";
81
82 $collect = &colcfg::use_collection($opt_site, $collect, $collect_home);
83
84 if ($collect eq "") {
85 print STDERR "Error: failed to find collection $collect in $collect_home\n";
86 print STDOUT "Content-type:text/plain\n\n";
87 print STDOUT "ERROR: Failed to find collection $collect\n";
88 exit 0;
89
90 }
91
92 # Read in the collection configuration file.
93 my $gs_mode = "gs2";
94 if ((defined $site) && ($site ne "")) { # GS3
95 $gs_mode = "gs3";
96 }
97 my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
98 my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode);
99
100 return $collectcfg->{'infodbtype'};
101}
102
103
104sub docid_to_import_filenames
105{
106 my $self = shift @_;
107
108 my @docids = @_;
109
110 my $collect = $self->{'collect'};
111 my $gsdl_cgi = $self->{'gsdl_cgi'};
112 my $infodb_type = $self->{'infodbtype'};
113
114 # Derive the archives dir
115 my $site = $self->{'site'};
116 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
117 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
118 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
119
120 my $arcinfo_doc_filename
121 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
122 $archive_dir);
123
124 my %all_import_file_keys = ();
125
126 foreach my $docid (@docids) {
127 # Obtain the src and associated files specified docID
128
129 my $doc_rec
130 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
131 $docid);
132
133 my $src_files = $doc_rec->{'src-file'};
134 my $assoc_files = $doc_rec->{'assoc-file'};
135
136 if (defined $src_files) {
137 foreach my $ifile (@$src_files) {
138 $ifile = &util::placeholders_to_abspath($ifile);
139 $all_import_file_keys{$ifile} = 1;
140 }
141 }
142
143 if (defined $assoc_files) {
144 foreach my $ifile (@$assoc_files) {
145 $ifile = &util::placeholders_to_abspath($ifile);
146 $all_import_file_keys{$ifile} = 1;
147 }
148 }
149 }
150
151 my @all_import_files = keys %all_import_file_keys;
152
153 return \@all_import_files;
154}
155
156
157sub import_filenames_to_docids
158{
159 my $self = shift @_;
160 my ($import_filenames) = @_;
161
162 my $collect = $self->{'collect'};
163 my $gsdl_cgi = $self->{'gsdl_cgi'};
164 my $infodb_type = $self->{'infodbtype'};
165
166 # Derive the archives dir
167 my $site = $self->{'site'};
168 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
169 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
170 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
171
172 # Obtain the oids for the specified import filenames
173 my $arcinfo_src_filename
174 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
175 $archive_dir);
176
177 my %all_oid_keys = ();
178
179 foreach my $ifile (@$import_filenames) {
180 $ifile = &util::upgrade_if_dos_filename($ifile);
181 $ifile = &util::abspath_to_placeholders($ifile);
182
183 print STDERR "*** looking up import filename key \"$ifile\"\n";
184
185 my $src_rec
186 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,
187 $ifile);
188
189 my $oids = $src_rec->{'oid'};
190
191 foreach my $o (@$oids) {
192 $all_oid_keys{$o} = 1;
193 }
194 }
195
196 my @all_oids = keys %all_oid_keys;
197
198 return \@all_oids;
199}
200
201
202sub remove_import_filenames
203{
204 my $self = shift @_;
205 my ($expanded_import_filenames) = @_;
206
207 foreach my $f (@$expanded_import_filenames) {
208 # If this document has been exploded before then
209 # its original source files will have already been removed
210 if (-e $f) {
211 &util::rm($f);
212 }
213 }
214}
215
216sub move_docoids_to_import
217{
218 my $self = shift @_;
219 my ($docids) = @_;
220
221 my $collect = $self->{'collect'};
222 my $gsdl_cgi = $self->{'gsdl_cgi'};
223 my $infodb_type = $self->{'infodbtype'};
224
225 # Derive the archives and import directories
226 my $site = $self->{'site'};
227 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
228
229 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
230 my $import_dir = &util::filename_cat($collect_dir,$collect,"import");
231
232 # Obtain the doc.xml path for the specified docID
233 my $arcinfo_doc_filename
234 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
235 $archive_dir);
236
237 foreach my $docid (@$docids) {
238
239 my $doc_rec
240 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
241 $docid);
242
243 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
244
245 # The $doc_xml_file is relative to the archives, so need to do
246 # a bit more work to make sure the right folder containing this
247 # is moved to the right place in the import folder
248
249 my $assoc_path = dirname($doc_xml_file);
250 my $import_assoc_dir = &util::filename_cat($import_dir,$assoc_path);
251 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
252
253 # If assoc_path involves more than one sub directory, then need to make
254 # sure the necessary directories exist in the import area also.
255 # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to
256 # exists before moving "archives/a/b/c.dir" -> "import/a/b"
257 my $import_target_parent_dir = dirname($import_assoc_dir);
258
259 if (-d $import_assoc_dir) {
260 # detected version from previous explode => remove it
261 &util::rm_r($import_assoc_dir);
262 }
263 else {
264 # First time => make sure parent directory exists to move
265 # "c.dir" (see above) into
266
267 &util::mk_all_dir($import_target_parent_dir);
268 }
269
270 &util::cp_r($archive_assoc_dir,$import_target_parent_dir)
271 }
272}
273
274
275sub remove_docoids
276{
277 my $self = shift @_;
278 my ($docids) = @_;
279
280 my $collect = $self->{'collect'};
281 my $gsdl_cgi = $self->{'gsdl_cgi'};
282 my $infodb_type = $self->{'infodbtype'};
283
284 # Derive the archives and import directories
285 my $site = $self->{'site'};
286 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
287
288 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
289
290 # Obtain the doc.xml path for the specified docID
291 my $arcinfo_doc_filename
292 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
293 $archive_dir);
294
295 foreach my $docid (@$docids) {
296
297 my $doc_rec
298 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
299 $docid);
300
301 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
302
303 # The $doc_xml_file is relative to the archives, so need to do
304 # a bit more work to make sure the right folder containing this
305 # is moved to the right place in the import folder
306
307 my $assoc_path = dirname($doc_xml_file);
308 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
309
310 &util::rm_r($archive_assoc_dir)
311 }
312}
313
314
315sub explode_document
316{
317 my $self = shift @_;
318
319 my $username = $self->{'username'};
320 my $collect = $self->{'collect'};
321 my $gsdl_cgi = $self->{'gsdl_cgi'};
322 my $gsdl_home = $self->{'gsdlhome'};
323
324 # Authenticate user if it is enabled
325 if ($baseaction::authentication_enabled) {
326 # Ensure the user is allowed to edit this collection
327 &authenticate_user($gsdl_cgi, $username, $collect);
328 }
329
330 # Derive the archives dir
331 my $site = $self->{'site'};
332 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
333
334 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
335 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
336
337 # Make sure the collection isn't locked by someone else
338 $self->lock_collection($username, $collect);
339
340 # look up additional args
341 my $docid = $self->{'d'};
342 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
343 $self->unlock_collection($username, $collect);
344 $gsdl_cgi->generate_error("No docid (d=...) specified.");
345 }
346
347 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
348
349 my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
350 my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
351 my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
352
353 $self->remove_import_filenames($expanded_import_filenames);
354 $self->move_docoids_to_import($docid_keys);
355
356 # Release the lock once it is done
357 $self->unlock_collection($username, $collect);
358
359 my $mess = "Base Doc ID: $docid_root\n-----\n";
360 $mess .= join("\n",@$expanded_import_filenames);
361
362 $gsdl_cgi->generate_ok_message($mess);
363
364}
365
366
367sub delete_document_entry
368{
369 my $self = shift @_;
370 my ($docid_root,$opt_onlyadd) = @_;
371
372 my $docid_keys = [];
373 if ((defined $opt_onlyadd) && ($opt_onlyadd==1)) {
374 # delete docoid archive folder
375 push(@$docid_keys,$docid_root);
376 }
377 else {
378 print STDERR "**** Not currently implemented for the general case!!\nDeleting 'archive' version only.";
379
380 push(@$docid_keys,$docid_root);
381
382 #my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
383 #$docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
384 #my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
385
386 # need to remove only the files that are not
387
388 #$self->remove_import_filenames($expanded_import_filenames);
389 }
390
391 $self->remove_docoids($docid_keys);
392}
393
394
395sub delete_document
396{
397 my $self = shift @_;
398
399 my $username = $self->{'username'};
400 my $collect = $self->{'collect'};
401 my $gsdl_cgi = $self->{'gsdl_cgi'};
402 my $gsdl_home = $self->{'gsdlhome'};
403
404 # Authenticate user if it is enabled
405 if ($baseaction::authentication_enabled) {
406 # Ensure the user is allowed to edit this collection
407 &authenticate_user($gsdl_cgi, $username, $collect);
408 }
409
410 # Derive the archives dir
411 my $site = $self->{'site'};
412 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
413
414 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
415 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
416
417 # Make sure the collection isn't locked by someone else
418 $self->lock_collection($username, $collect);
419
420 # look up additional args
421 my $docid = $self->{'d'};
422 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
423 $self->unlock_collection($username, $collect);
424 $gsdl_cgi->generate_error("No docid (d=...) specified.");
425 }
426
427 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
428
429 my $onlyadd = $self->{'onlyadd'};
430
431 my $status = $self->delete_document_entry($docid_root,$onlyadd);
432
433 # Release the lock once it is done
434 $self->unlock_collection($username, $collect);
435
436 my $mess = "delete-document successful: Key[$docid_root]\n";
437 $gsdl_cgi->generate_ok_message($mess);
438
439}
440
441
442sub delete_document_array
443{
444 my $self = shift @_;
445
446 my $username = $self->{'username'};
447 my $collect = $self->{'collect'};
448 my $gsdl_cgi = $self->{'gsdl_cgi'};
449 my $gsdlhome = $self->{'gsdlhome'};
450
451 if ($baseaction::authentication_enabled) {
452 # Ensure the user is allowed to edit this collection
453 &authenticate_user($gsdl_cgi, $username, $collect);
454 }
455
456 my $site = $self->{'site'};
457 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
458
459 $gsdl_cgi->checked_chdir($collect_dir);
460
461 # Obtain the collect dir
462 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
463
464 # Make sure the collection isn't locked by someone else
465 $self->lock_collection($username, $collect);
466
467 # look up additional args
468
469 my $json_str = $self->{'json'};
470 my $doc_array = decode_json $json_str;
471
472 my $onlyadd = $self->{'onlyadd'};
473
474
475 my $global_status = 0;
476 my $global_mess = "";
477
478 my @all_docids = ();
479
480 foreach my $doc_array_rec ( @$doc_array ) {
481
482 my $docid = $doc_array_rec->{'docid'};
483
484 push(@all_docids,$docid);
485
486 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
487
488 my $status = $self->delete_document_entry($docid_root,$onlyadd);
489
490 if ($status != 0) {
491 # Catch error if set infodb entry failed
492 $global_status = $status;
493 $global_mess .= "Failed to delete document key: $docid\n";
494 $global_mess .= "Exit status: $status\n";
495 $global_mess .= "System Error Message: $!\n";
496 $global_mess .= "-" x 20;
497 }
498 }
499
500 if ($global_status != 0) {
501 $global_mess .= "PATH: $ENV{'PATH'}\n";
502 $gsdl_cgi->generate_error($global_mess);
503 }
504 else {
505 my $mess = "delete-document-array successful: Keys[ ".join(", ",@all_docids)."]\n";
506 $gsdl_cgi->generate_ok_message($mess);
507 }
508
509 # Release the lock once it is done
510 $self->unlock_collection($username, $collect);
511}
512
513
514
5151;
Note: See TracBrowser for help on using the repository browser.