source: main/trunk/greenstone2/perllib/cgiactions/explodeaction.pm@ 28561

Last change on this file since 28561 was 28561, checked in by kjdon, 10 years ago

changing some util:: methods to FileUtils:: methods

File size: 14.3 KB
Line 
1###########################################################################
2#
3# explodeaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package explodeaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34use util;
35use FileUtils;
36
37use JSON;
38
39use File::Basename;
40
41BEGIN {
42# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
43 require XML::Rules;
44}
45
46
47@explodeaction::ISA = ('baseaction');
48
49
50# 'a' for action, and 'c' for collection are also compulsorary, and
51# added in automatically by baseaction
52
53my $action_table =
54{
55 "explode-document" => { 'compulsory-args' => ["d"],
56 'optional-args' => [] },
57 "delete-document" => { 'compulsory-args' => ["d"],
58 'optional-args' => [ "onlyadd" ] },
59 "delete-document-array" => { 'compulsory-args' => ["json"],
60 'optional-args' => [ "onlyadd" ] }
61
62
63};
64
65
66sub new
67{
68 my $class = shift (@_);
69 my ($gsdl_cgi,$iis6_mode) = @_;
70
71 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
72
73 return bless $self, $class;
74}
75
76
77sub get_infodb_type
78{
79 my ($opt_site,$collect_home,$collect) = @_;
80
81 my $out = "STDERR";
82
83 $collect = &colcfg::use_collection($opt_site, $collect, $collect_home);
84
85 if ($collect eq "") {
86 print STDERR "Error: failed to find collection $collect in $collect_home\n";
87 print STDOUT "Content-type:text/plain\n\n";
88 print STDOUT "ERROR: Failed to find collection $collect\n";
89 exit 0;
90
91 }
92
93 # Read in the collection configuration file.
94 my $gs_mode = "gs2";
95 if ((defined $site) && ($site ne "")) { # GS3
96 $gs_mode = "gs3";
97 }
98 my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
99 my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode);
100
101 return $collectcfg->{'infodbtype'};
102}
103
104
105sub docid_to_import_filenames
106{
107 my $self = shift @_;
108
109 my @docids = @_;
110
111 my $collect = $self->{'collect'};
112 my $gsdl_cgi = $self->{'gsdl_cgi'};
113 my $infodb_type = $self->{'infodbtype'};
114
115 # Derive the archives dir
116 my $site = $self->{'site'};
117 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
118 my $archive_dir = &FileUtils::filenameConcatenate($collect_dir,$collect,"archives");
119 ##my $archive_dir = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"archives");
120
121 my $arcinfo_doc_filename
122 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
123 $archive_dir);
124
125 my %all_import_file_keys = ();
126
127 foreach my $docid (@docids) {
128 # Obtain the src and associated files specified docID
129
130 my $doc_rec
131 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
132 $docid);
133
134 my $src_files = $doc_rec->{'src-file'};
135 my $assoc_files = $doc_rec->{'assoc-file'};
136
137 if (defined $src_files) {
138 foreach my $ifile (@$src_files) {
139 $ifile = &util::placeholders_to_abspath($ifile);
140 $all_import_file_keys{$ifile} = 1;
141 }
142 }
143
144 if (defined $assoc_files) {
145 foreach my $ifile (@$assoc_files) {
146 $ifile = &util::placeholders_to_abspath($ifile);
147 $all_import_file_keys{$ifile} = 1;
148 }
149 }
150 }
151
152 my @all_import_files = keys %all_import_file_keys;
153
154 return \@all_import_files;
155}
156
157
158sub import_filenames_to_docids
159{
160 my $self = shift @_;
161 my ($import_filenames) = @_;
162
163 my $collect = $self->{'collect'};
164 my $gsdl_cgi = $self->{'gsdl_cgi'};
165 my $infodb_type = $self->{'infodbtype'};
166
167 # Derive the archives dir
168 my $site = $self->{'site'};
169 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
170 my $archive_dir = &FileUtils::filenameConcatenate($collect_dir,$collect,"archives");
171 ##my $archive_dir = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"archives");
172
173 # Obtain the oids for the specified import filenames
174 my $arcinfo_src_filename
175 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
176 $archive_dir);
177
178 my %all_oid_keys = ();
179
180 foreach my $ifile (@$import_filenames) {
181 $ifile = &util::upgrade_if_dos_filename($ifile);
182 $ifile = &util::abspath_to_placeholders($ifile);
183
184 print STDERR "*** looking up import filename key \"$ifile\"\n";
185
186 my $src_rec
187 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,
188 $ifile);
189
190 my $oids = $src_rec->{'oid'};
191
192 foreach my $o (@$oids) {
193 $all_oid_keys{$o} = 1;
194 }
195 }
196
197 my @all_oids = keys %all_oid_keys;
198
199 return \@all_oids;
200}
201
202
203sub remove_import_filenames
204{
205 my $self = shift @_;
206 my ($expanded_import_filenames) = @_;
207
208 foreach my $f (@$expanded_import_filenames) {
209 # If this document has been exploded before then
210 # its original source files will have already been removed
211 if (-e $f) {
212 &FileUtils::removeFiles($f);
213 }
214 }
215}
216
217sub move_docoids_to_import
218{
219 my $self = shift @_;
220 my ($docids) = @_;
221
222 my $collect = $self->{'collect'};
223 my $gsdl_cgi = $self->{'gsdl_cgi'};
224 my $infodb_type = $self->{'infodbtype'};
225
226 # Derive the archives and import directories
227 my $site = $self->{'site'};
228 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
229
230 my $archive_dir = &FileUtils::filenameConcatenate($collect_dir,$collect,"archives");
231 my $import_dir = &FileUtils::filenameConcatenate($collect_dir,$collect,"import");
232
233 # Obtain the doc.xml path for the specified docID
234 my $arcinfo_doc_filename
235 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
236 $archive_dir);
237
238 foreach my $docid (@$docids) {
239
240 my $doc_rec
241 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
242 $docid);
243
244 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
245
246 # The $doc_xml_file is relative to the archives, so need to do
247 # a bit more work to make sure the right folder containing this
248 # is moved to the right place in the import folder
249
250 my $assoc_path = dirname($doc_xml_file);
251 my $import_assoc_dir = &FileUtils::filenameConcatenate($import_dir,$assoc_path);
252 my $archive_assoc_dir = &FileUtils::filenameConcatenate($archive_dir,$assoc_path);
253
254 # If assoc_path involves more than one sub directory, then need to make
255 # sure the necessary directories exist in the import area also.
256 # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to
257 # exists before moving "archives/a/b/c.dir" -> "import/a/b"
258 my $import_target_parent_dir = dirname($import_assoc_dir);
259
260 if (-d $import_assoc_dir) {
261 # detected version from previous explode => remove it
262 &FileUtils::removeFilesRecursive($import_assoc_dir);
263 }
264 else {
265 # First time => make sure parent directory exists to move
266 # "c.dir" (see above) into
267
268 &FileUtils::makeAllDirectories($import_target_parent_dir);
269 }
270
271 &FileUtils::copyFilesRecursive($archive_assoc_dir,$import_target_parent_dir)
272 }
273}
274
275
276sub remove_docoids
277{
278 my $self = shift @_;
279 my ($docids) = @_;
280
281 my $collect = $self->{'collect'};
282 my $gsdl_cgi = $self->{'gsdl_cgi'};
283 my $infodb_type = $self->{'infodbtype'};
284
285 # Derive the archives and import directories
286 my $site = $self->{'site'};
287 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
288
289 my $archive_dir = &FileUtils::filenameConcatenate($collect_dir,$collect,"archives");
290
291 # Obtain the doc.xml path for the specified docID
292 my $arcinfo_doc_filename
293 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
294 $archive_dir);
295
296 foreach my $docid (@$docids) {
297
298 my $doc_rec
299 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
300 $docid);
301
302 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
303
304 # The $doc_xml_file is relative to the archives, so need to do
305 # a bit more work to make sure the right folder containing this
306 # is moved to the right place in the import folder
307
308 my $assoc_path = dirname($doc_xml_file);
309 my $archive_assoc_dir = &FileUtils::filenameConcatenate($archive_dir,$assoc_path);
310
311 &FileUtils::removeFilesRecursive($archive_assoc_dir)
312 }
313}
314
315
316sub explode_document
317{
318 my $self = shift @_;
319
320 my $username = $self->{'username'};
321 my $collect = $self->{'collect'};
322 my $gsdl_cgi = $self->{'gsdl_cgi'};
323 my $gsdl_home = $self->{'gsdlhome'};
324
325 # Authenticate user if it is enabled
326 if ($baseaction::authentication_enabled) {
327 # Ensure the user is allowed to edit this collection
328 &authenticate_user($gsdl_cgi, $username, $collect);
329 }
330
331 # Derive the archives dir
332 my $site = $self->{'site'};
333 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
334
335 my $archive_dir = &FileUtils::filenameConcatenate($collect_dir,$collect,"archives");
336 ##my $archive_dir = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"archives");
337
338 # Make sure the collection isn't locked by someone else
339 $self->lock_collection($username, $collect);
340
341 # look up additional args
342 my $docid = $self->{'d'};
343 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
344 $self->unlock_collection($username, $collect);
345 $gsdl_cgi->generate_error("No docid (d=...) specified.");
346 }
347
348 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
349
350 my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
351 my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
352 my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
353
354 $self->remove_import_filenames($expanded_import_filenames);
355 $self->move_docoids_to_import($docid_keys);
356
357 # Release the lock once it is done
358 $self->unlock_collection($username, $collect);
359
360 my $mess = "Base Doc ID: $docid_root\n-----\n";
361 $mess .= join("\n",@$expanded_import_filenames);
362
363 $gsdl_cgi->generate_ok_message($mess);
364
365}
366
367
368sub delete_document_entry
369{
370 my $self = shift @_;
371 my ($docid_root,$opt_onlyadd) = @_;
372
373 my $docid_keys = [];
374 if ((defined $opt_onlyadd) && ($opt_onlyadd==1)) {
375 # delete docoid archive folder
376 push(@$docid_keys,$docid_root);
377 }
378 else {
379 print STDERR "**** Not currently implemented for the general case!!\nDeleting 'archive' version only.";
380
381 push(@$docid_keys,$docid_root);
382
383 #my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
384 #$docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
385 #my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
386
387 # need to remove only the files that are not
388
389 #$self->remove_import_filenames($expanded_import_filenames);
390 }
391
392 $self->remove_docoids($docid_keys);
393}
394
395
396sub delete_document
397{
398 my $self = shift @_;
399
400 my $username = $self->{'username'};
401 my $collect = $self->{'collect'};
402 my $gsdl_cgi = $self->{'gsdl_cgi'};
403 my $gsdl_home = $self->{'gsdlhome'};
404
405 # Authenticate user if it is enabled
406 if ($baseaction::authentication_enabled) {
407 # Ensure the user is allowed to edit this collection
408 &authenticate_user($gsdl_cgi, $username, $collect);
409 }
410
411 # Derive the archives dir
412 my $site = $self->{'site'};
413 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
414
415 my $archive_dir = &FileUtils::filenameConcatenate($collect_dir,$collect,"archives");
416 ##my $archive_dir = &FileUtils::filenameConcatenate($ENV{'GSDLCOLLECTDIR'},"archives");
417
418 # Make sure the collection isn't locked by someone else
419 $self->lock_collection($username, $collect);
420
421 # look up additional args
422 my $docid = $self->{'d'};
423 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
424 $self->unlock_collection($username, $collect);
425 $gsdl_cgi->generate_error("No docid (d=...) specified.");
426 }
427
428 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
429
430 my $onlyadd = $self->{'onlyadd'};
431
432 my $status = $self->delete_document_entry($docid_root,$onlyadd);
433
434 # Release the lock once it is done
435 $self->unlock_collection($username, $collect);
436
437 my $mess = "delete-document successful: Key[$docid_root]\n";
438 $gsdl_cgi->generate_ok_message($mess);
439
440}
441
442
443sub delete_document_array
444{
445 my $self = shift @_;
446
447 my $username = $self->{'username'};
448 my $collect = $self->{'collect'};
449 my $gsdl_cgi = $self->{'gsdl_cgi'};
450 my $gsdlhome = $self->{'gsdlhome'};
451
452 if ($baseaction::authentication_enabled) {
453 # Ensure the user is allowed to edit this collection
454 &authenticate_user($gsdl_cgi, $username, $collect);
455 }
456
457 my $site = $self->{'site'};
458 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
459
460 $gsdl_cgi->checked_chdir($collect_dir);
461
462 # Obtain the collect dir
463 ## my $collect_dir = &FileUtils::filenameConcatenate($gsdlhome, "collect");
464
465 # Make sure the collection isn't locked by someone else
466 $self->lock_collection($username, $collect);
467
468 # look up additional args
469
470 my $json_str = $self->{'json'};
471 my $doc_array = decode_json $json_str;
472
473 my $onlyadd = $self->{'onlyadd'};
474
475
476 my $global_status = 0;
477 my $global_mess = "";
478
479 my @all_docids = ();
480
481 foreach my $doc_array_rec ( @$doc_array ) {
482
483 my $docid = $doc_array_rec->{'docid'};
484
485 push(@all_docids,$docid);
486
487 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
488
489 my $status = $self->delete_document_entry($docid_root,$onlyadd);
490
491 if ($status != 0) {
492 # Catch error if set infodb entry failed
493 $global_status = $status;
494 $global_mess .= "Failed to delete document key: $docid\n";
495 $global_mess .= "Exit status: $status\n";
496 $global_mess .= "System Error Message: $!\n";
497 $global_mess .= "-" x 20;
498 }
499 }
500
501 if ($global_status != 0) {
502 $global_mess .= "PATH: $ENV{'PATH'}\n";
503 $gsdl_cgi->generate_error($global_mess);
504 }
505 else {
506 my $mess = "delete-document-array successful: Keys[ ".join(", ",@all_docids)."]\n";
507 $gsdl_cgi->generate_ok_message($mess);
508 }
509
510 # Release the lock once it is done
511 $self->unlock_collection($username, $collect);
512}
513
514
515
5161;
Note: See TracBrowser for help on using the repository browser.