source: main/trunk/greenstone2/perllib/cgiactions/explodeaction.pm@ 26567

Last change on this file since 26567 was 26567, checked in by ak19, 11 years ago

When a GS2 collection contains both collect.cfg and collectionConfig.xml (as advanced beatles does) the old code used to end up reading in the GS3 collectionConfig.xml instead of the GS2 collect.cfg and set the GS_mode to GS3. Now colcfg::get_collect_cfg_name takes the gs_mode (instead of determining this and returning it) and works out the collectcfg file name for the gs_mode. That means that the calling functions now need to work out the gs_mode. They do so by setting the gs_mode to gs3 if the site flag is present in the commandline, if not then it defaults to gs2. So from now on, the site flag must be specified for GS3 collections.

File size: 13.8 KB
Line 
1###########################################################################
2#
3# explodeaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package explodeaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34use util;
35
36use JSON;
37
38use File::Basename;
39
40BEGIN {
41# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
42 require XML::Rules;
43}
44
45
46@explodeaction::ISA = ('baseaction');
47
48
49# 'a' for action, and 'c' for collection are also compulsorary, and
50# added in automatically by baseaction
51
52my $action_table =
53{
54 "explode-document" => { 'compulsory-args' => ["d"],
55 'optional-args' => [] },
56 "delete-document" => { 'compulsory-args' => ["d"],
57 'optional-args' => [ "onlyadd" ] },
58 "delete-document-array" => { 'compulsory-args' => ["json"],
59 'optional-args' => [ "onlyadd" ] }
60
61
62};
63
64
65sub new
66{
67 my $class = shift (@_);
68 my ($gsdl_cgi,$iis6_mode) = @_;
69
70 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
71
72 return bless $self, $class;
73}
74
75
76sub get_infodb_type
77{
78 my ($opt_site,$collect_home,$collect) = @_;
79
80 my $out = "STDERR";
81
82 $collect = &colcfg::use_collection($opt_site, $collect, $collect_home);
83
84 if ($collect eq "") {
85 print STDERR "Error: failed to find collection $collect in $collect_home\n";
86 print STDOUT "Content-type:text/plain\n\n";
87 print STDOUT "ERROR: Failed to find collection $collect\n";
88 exit 0;
89
90 }
91
92 # Read in the collection configuration file.
93 my $gs_mode = "gs2";
94 if ((defined $site) && ($site ne "")) { # GS3
95 $gs_mode = "gs3";
96 }
97 my $config_filename = &colcfg::get_collect_cfg_name($out, $gs_mode);
98 my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode);
99
100 return $collectcfg->{'infodbtype'};
101}
102
103
104sub docid_to_import_filenames
105{
106 my $self = shift @_;
107
108 my @docids = @_;
109
110 my $collect = $self->{'collect'};
111 my $gsdl_cgi = $self->{'gsdl_cgi'};
112 my $infodb_type = $self->{'infodbtype'};
113
114 # Derive the archives dir
115 my $site = $self->{'site'};
116 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
117 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
118 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
119
120 my $arcinfo_doc_filename
121 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
122 $archive_dir);
123
124 my %all_import_file_keys = ();
125
126 foreach my $docid (@docids) {
127 # Obtain the src and associated files specified docID
128
129 my $doc_rec
130 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
131 $docid);
132
133 my $src_files = $doc_rec->{'src-file'};
134 my $assoc_files = $doc_rec->{'assoc-file'};
135
136 if (defined $src_files) {
137 foreach my $if (@$src_files) {
138 $all_import_file_keys{$if} = 1;
139 }
140 }
141
142 if (defined $assoc_files) {
143 foreach my $if (@$assoc_files) {
144 $all_import_file_keys{$if} = 1;
145 }
146 }
147 }
148
149 my @all_import_files = keys %all_import_file_keys;
150
151 return \@all_import_files;
152}
153
154
155sub import_filenames_to_docids
156{
157 my $self = shift @_;
158 my ($import_filenames) = @_;
159
160 my $collect = $self->{'collect'};
161 my $gsdl_cgi = $self->{'gsdl_cgi'};
162 my $infodb_type = $self->{'infodbtype'};
163
164 # Derive the archives dir
165 my $site = $self->{'site'};
166 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
167 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
168 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
169
170 # Obtain the oids for the specified import filenames
171 my $arcinfo_src_filename
172 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
173 $archive_dir);
174
175 my %all_oid_keys = ();
176
177 foreach my $if (@$import_filenames) {
178 $if = &util::upgrade_if_dos_filename($if);
179
180 print STDERR "*** looking import filename key \"$if\"\n";
181
182 my $src_rec
183 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,
184 $if);
185 my $oids = $src_rec->{'oid'};
186
187 foreach my $o (@$oids) {
188 $all_oid_keys{$o} = 1;
189 }
190 }
191
192 my @all_oids = keys %all_oid_keys;
193
194 return \@all_oids;
195}
196
197
198sub remove_import_filenames
199{
200 my $self = shift @_;
201 my ($expanded_import_filenames) = @_;
202
203 foreach my $f (@$expanded_import_filenames) {
204 # If this document has been exploded before then
205 # its original source files will have already been removed
206 if (-e $f) {
207 &util::rm($f);
208 }
209 }
210}
211
212sub move_docoids_to_import
213{
214 my $self = shift @_;
215 my ($docids) = @_;
216
217 my $collect = $self->{'collect'};
218 my $gsdl_cgi = $self->{'gsdl_cgi'};
219 my $infodb_type = $self->{'infodbtype'};
220
221 # Derive the archives and import directories
222 my $site = $self->{'site'};
223 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
224
225 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
226 my $import_dir = &util::filename_cat($collect_dir,$collect,"import");
227
228 # Obtain the doc.xml path for the specified docID
229 my $arcinfo_doc_filename
230 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
231 $archive_dir);
232
233 foreach my $docid (@$docids) {
234
235 my $doc_rec
236 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
237 $docid);
238
239 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
240
241 # The $doc_xml_file is relative to the archives, so need to do
242 # a bit more work to make sure the right folder containing this
243 # is moved to the right place in the import folder
244
245 my $assoc_path = dirname($doc_xml_file);
246 my $import_assoc_dir = &util::filename_cat($import_dir,$assoc_path);
247 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
248
249 # If assoc_path involves more than one sub directory, then need to make
250 # sure the necessary directories exist in the import area also.
251 # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to
252 # exists before moving "archives/a/b/c.dir" -> "import/a/b"
253 my $import_target_parent_dir = dirname($import_assoc_dir);
254
255 if (-d $import_assoc_dir) {
256 # detected version from previous explode => remove it
257 &util::rm_r($import_assoc_dir);
258 }
259 else {
260 # First time => make sure parent directory exists to move
261 # "c.dir" (see above) into
262
263 &util::mk_all_dir($import_target_parent_dir);
264 }
265
266 &util::cp_r($archive_assoc_dir,$import_target_parent_dir)
267 }
268}
269
270
271sub remove_docoids
272{
273 my $self = shift @_;
274 my ($docids) = @_;
275
276 my $collect = $self->{'collect'};
277 my $gsdl_cgi = $self->{'gsdl_cgi'};
278 my $infodb_type = $self->{'infodbtype'};
279
280 # Derive the archives and import directories
281 my $site = $self->{'site'};
282 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
283
284 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
285
286 # Obtain the doc.xml path for the specified docID
287 my $arcinfo_doc_filename
288 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
289 $archive_dir);
290
291 foreach my $docid (@$docids) {
292
293 my $doc_rec
294 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
295 $docid);
296
297 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
298
299 # The $doc_xml_file is relative to the archives, so need to do
300 # a bit more work to make sure the right folder containing this
301 # is moved to the right place in the import folder
302
303 my $assoc_path = dirname($doc_xml_file);
304 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
305
306 &util::rm_r($archive_assoc_dir)
307 }
308}
309
310
311sub explode_document
312{
313 my $self = shift @_;
314
315 my $username = $self->{'username'};
316 my $collect = $self->{'collect'};
317 my $gsdl_cgi = $self->{'gsdl_cgi'};
318 my $gsdl_home = $self->{'gsdlhome'};
319
320 # Authenticate user if it is enabled
321 if ($baseaction::authentication_enabled) {
322 # Ensure the user is allowed to edit this collection
323 &authenticate_user($gsdl_cgi, $username, $collect);
324 }
325
326 # Derive the archives dir
327 my $site = $self->{'site'};
328 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
329
330 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
331 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
332
333 # Make sure the collection isn't locked by someone else
334 $self->lock_collection($username, $collect);
335
336 # look up additional args
337 my $docid = $self->{'d'};
338 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
339 $self->unlock_collection($username, $collect);
340 $gsdl_cgi->generate_error("No docid (d=...) specified.");
341 }
342
343 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
344
345 my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
346 my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
347 my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
348
349 $self->remove_import_filenames($expanded_import_filenames);
350 $self->move_docoids_to_import($docid_keys);
351
352 # Release the lock once it is done
353 $self->unlock_collection($username, $collect);
354
355 my $mess = "Base Doc ID: $docid_root\n-----\n";
356 $mess .= join("\n",@$expanded_import_filenames);
357
358 $gsdl_cgi->generate_ok_message($mess);
359
360}
361
362
363sub delete_document_entry
364{
365 my $self = shift @_;
366 my ($docid_root,$opt_onlyadd) = @_;
367
368 my $docid_keys = [];
369 if ((defined $opt_onlyadd) && ($opt_onlyadd==1)) {
370 # delete docoid archive folder
371 push(@$docid_keys,$docid_root);
372 }
373 else {
374 print STDERR "**** Not currently implemented for the general case!!\nDeleting 'archive' version only.";
375
376 push(@$docid_keys,$docid_root);
377
378 #my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
379 #$docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
380 #my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
381
382 # need to remove only the files that are not
383
384 #$self->remove_import_filenames($expanded_import_filenames);
385 }
386
387 $self->remove_docoids($docid_keys);
388}
389
390
391sub delete_document
392{
393 my $self = shift @_;
394
395 my $username = $self->{'username'};
396 my $collect = $self->{'collect'};
397 my $gsdl_cgi = $self->{'gsdl_cgi'};
398 my $gsdl_home = $self->{'gsdlhome'};
399
400 # Authenticate user if it is enabled
401 if ($baseaction::authentication_enabled) {
402 # Ensure the user is allowed to edit this collection
403 &authenticate_user($gsdl_cgi, $username, $collect);
404 }
405
406 # Derive the archives dir
407 my $site = $self->{'site'};
408 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
409
410 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
411 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
412
413 # Make sure the collection isn't locked by someone else
414 $self->lock_collection($username, $collect);
415
416 # look up additional args
417 my $docid = $self->{'d'};
418 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
419 $self->unlock_collection($username, $collect);
420 $gsdl_cgi->generate_error("No docid (d=...) specified.");
421 }
422
423 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
424
425 my $onlyadd = $self->{'onlyadd'};
426
427 my $status = $self->delete_document_entry($docid_root,$onlyadd);
428
429 # Release the lock once it is done
430 $self->unlock_collection($username, $collect);
431
432 my $mess = "delete-document successful: Key[$docid_root]\n";
433 $gsdl_cgi->generate_ok_message($mess);
434
435}
436
437
438sub delete_document_array
439{
440 my $self = shift @_;
441
442 my $username = $self->{'username'};
443 my $collect = $self->{'collect'};
444 my $gsdl_cgi = $self->{'gsdl_cgi'};
445 my $gsdlhome = $self->{'gsdlhome'};
446
447 if ($baseaction::authentication_enabled) {
448 # Ensure the user is allowed to edit this collection
449 &authenticate_user($gsdl_cgi, $username, $collect);
450 }
451
452 my $site = $self->{'site'};
453 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
454
455 $gsdl_cgi->checked_chdir($collect_dir);
456
457 # Obtain the collect dir
458 ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
459
460 # Make sure the collection isn't locked by someone else
461 $self->lock_collection($username, $collect);
462
463 # look up additional args
464
465 my $json_str = $self->{'json'};
466 my $doc_array = decode_json $json_str;
467
468 my $onlyadd = $self->{'onlyadd'};
469
470
471 my $global_status = 0;
472 my $global_mess = "";
473
474 my @all_docids = ();
475
476 foreach my $doc_array_rec ( @$doc_array ) {
477
478 my $docid = $doc_array_rec->{'docid'};
479
480 push(@all_docids,$docid);
481
482 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
483
484 my $status = $self->delete_document_entry($docid_root,$onlyadd);
485
486 if ($status != 0) {
487 # Catch error if set infodb entry failed
488 $global_status = $status;
489 $global_mess .= "Failed to delete document key: $docid\n";
490 $global_mess .= "Exit status: $status\n";
491 $global_mess .= "System Error Message: $!\n";
492 $global_mess .= "-" x 20;
493 }
494 }
495
496 if ($global_status != 0) {
497 $global_mess .= "PATH: $ENV{'PATH'}\n";
498 $gsdl_cgi->generate_error($global_mess);
499 }
500 else {
501 my $mess = "delete-document-array successful: Keys[ ".join(", ",@all_docids)."]\n";
502 $gsdl_cgi->generate_ok_message($mess);
503 }
504
505 # Release the lock once it is done
506 $self->unlock_collection($username, $collect);
507}
508
509
510
5111;
Note: See TracBrowser for help on using the repository browser.