root/gs2-extensions/parallel-building/trunk/src/perllib/cgiactions/explodeaction.pm @ 24626

Revision 24626, 13.7 KB (checked in by jmt12, 8 years ago)

An (almost) complete copy of the perllib directory from a (circa SEP2011) head checkout from Greenstone 2 trunk - in order to try and make merging in this extension a little easier later on (as there have been some major changes to buildcol.pl commited in the main trunk but not in the x64 branch)

Line 
1###########################################################################
2#
3# explodeaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr   te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package explodeaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34use util;
35
36use JSON;
37
38use File::Basename;
39
40BEGIN {
41#    unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
42    require XML::Rules;
43}
44
45
46@explodeaction::ISA = ('baseaction');
47
48
49# 'a' for action, and 'c' for collection are also compulsorary, and
50# added in automatically by baseaction
51
52my $action_table =
53{
54    "explode-document"          => { 'compulsory-args' => ["d"],
55                'optional-args'   => [] },
56    "delete-document"           => { 'compulsory-args' => ["d"],
57                'optional-args'   => [ "onlyadd" ] },
58    "delete-document-array"     => { 'compulsory-args' => ["json"],
59                'optional-args'   => [ "onlyadd" ] }
60               
61               
62};
63
64
65sub new
66{
67    my $class = shift (@_);
68    my ($gsdl_cgi,$iis6_mode) = @_;
69
70    my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
71
72    return bless $self, $class;
73}
74
75
76sub get_infodb_type
77{
78    my ($opt_site,$collect_home,$collect) = @_;
79
80    my $out = "STDERR";
81
82    $collect = &colcfg::use_collection($opt_site, $collect, $collect_home);
83
84    if ($collect eq "") {
85    print STDERR "Error: failed to find collection $collect in $collect_home\n";
86    print STDOUT "Content-type:text/plain\n\n";
87    print STDOUT "ERROR: Failed to find collection $collect\n";
88    exit 0;
89   
90    }
91
92    # Read in the collection configuration file.
93    my ($config_filename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
94    my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode);
95
96    return $collectcfg->{'infodbtype'};
97}
98
99
100sub docid_to_import_filenames
101{
102    my $self = shift @_;
103
104    my @docids = @_;
105
106    my $collect   = $self->{'collect'};
107    my $gsdl_cgi  = $self->{'gsdl_cgi'};
108    my $infodb_type = $self->{'infodbtype'};
109
110    # Derive the archives dir   
111    my $site = $self->{'site'};
112    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
113    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
114    ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
115
116    my $arcinfo_doc_filename
117    = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
118                    $archive_dir);
119
120    my %all_import_file_keys = ();
121   
122    foreach my $docid (@docids) {
123    # Obtain the src and associated files specified docID
124   
125    my $doc_rec
126        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
127                     $docid);
128   
129    my $src_files = $doc_rec->{'src-file'};
130    my $assoc_files = $doc_rec->{'assoc-file'};
131   
132    if (defined $src_files) {
133        foreach my $if (@$src_files) {
134        $all_import_file_keys{$if} = 1;
135        }
136    }
137
138    if (defined $assoc_files) {
139        foreach my $if (@$assoc_files) {
140        $all_import_file_keys{$if} = 1;
141        }
142    }
143    }
144
145    my @all_import_files = keys %all_import_file_keys;
146
147    return \@all_import_files;
148}
149
150
151sub import_filenames_to_docids
152{
153    my $self = shift @_;
154    my ($import_filenames) = @_;
155
156    my $collect   = $self->{'collect'};   
157    my $gsdl_cgi  = $self->{'gsdl_cgi'};
158    my $infodb_type = $self->{'infodbtype'};
159
160    # Derive the archives dir   
161    my $site = $self->{'site'};
162    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
163    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
164    ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
165
166    # Obtain the oids for the specified import filenames
167    my $arcinfo_src_filename
168    = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
169                    $archive_dir);
170
171    my %all_oid_keys = ();
172
173    foreach my $if (@$import_filenames) {
174        $if = &util::upgrade_if_dos_filename($if);
175
176    print STDERR "*** looking import filename key \"$if\"\n";
177
178    my $src_rec
179        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,
180                     $if);
181    my $oids = $src_rec->{'oid'};
182
183    foreach my $o (@$oids) {
184        $all_oid_keys{$o} = 1;
185    }
186    }
187
188    my @all_oids = keys %all_oid_keys;
189
190    return \@all_oids;
191}
192
193
194sub remove_import_filenames
195{
196    my $self = shift @_;
197    my ($expanded_import_filenames) = @_;
198
199    foreach my $f (@$expanded_import_filenames) {
200    # If this document has been exploded before then
201    # its original source files will have already been removed 
202    if (-e $f) {
203        &util::rm($f);
204    }
205    }
206}
207
208sub move_docoids_to_import
209{
210    my $self = shift @_;
211    my ($docids) = @_;
212
213    my $collect   = $self->{'collect'};
214    my $gsdl_cgi  = $self->{'gsdl_cgi'};
215    my $infodb_type = $self->{'infodbtype'};
216
217    # Derive the archives and import directories
218    my $site = $self->{'site'};
219    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
220   
221    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
222    my $import_dir  = &util::filename_cat($collect_dir,$collect,"import");
223
224    # Obtain the doc.xml path for the specified docID
225    my $arcinfo_doc_filename
226    = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
227                    $archive_dir);
228
229    foreach my $docid (@$docids) {
230
231    my $doc_rec
232        = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
233                     $docid);
234
235    my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
236
237    # The $doc_xml_file is relative to the archives, so need to do
238    # a bit more work to make sure the right folder containing this
239    # is moved to the right place in the import folder
240
241    my $assoc_path = dirname($doc_xml_file);
242    my $import_assoc_dir = &util::filename_cat($import_dir,$assoc_path);
243    my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
244
245    # If assoc_path involves more than one sub directory, then need to make
246    # sure the necessary directories exist in the import area also.
247    # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to
248    # exists before moving "archives/a/b/c.dir" -> "import/a/b"
249    my $import_target_parent_dir = dirname($import_assoc_dir);
250
251    if (-d $import_assoc_dir) {
252        # detected version from previous explode => remove it
253        &util::rm_r($import_assoc_dir);
254    }
255    else {
256        # First time => make sure parent directory exists to move
257        # "c.dir" (see above) into
258       
259        &util::mk_all_dir($import_target_parent_dir);
260    }
261
262    &util::cp_r($archive_assoc_dir,$import_target_parent_dir)
263    }
264}
265
266
267sub remove_docoids
268{
269    my $self = shift @_;
270    my ($docids) = @_;
271
272    my $collect   = $self->{'collect'};
273    my $gsdl_cgi  = $self->{'gsdl_cgi'};
274    my $infodb_type = $self->{'infodbtype'};
275
276    # Derive the archives and import directories
277    my $site = $self->{'site'};
278    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
279   
280    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
281
282    # Obtain the doc.xml path for the specified docID
283    my $arcinfo_doc_filename
284    = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
285                    $archive_dir);
286
287    foreach my $docid (@$docids) {
288
289        my $doc_rec
290            = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
291                     $docid);
292
293        my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
294
295        # The $doc_xml_file is relative to the archives, so need to do
296        # a bit more work to make sure the right folder containing this
297        # is moved to the right place in the import folder
298
299        my $assoc_path = dirname($doc_xml_file);
300        my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
301
302        &util::rm_r($archive_assoc_dir)
303    }
304}
305
306
307sub explode_document
308{
309    my $self = shift @_;
310
311    my $username  = $self->{'username'};
312    my $collect   = $self->{'collect'};
313    my $gsdl_cgi  = $self->{'gsdl_cgi'};
314    my $gsdl_home  = $self->{'gsdlhome'};
315
316    # Authenticate user if it is enabled
317    if ($baseaction::authentication_enabled) {
318    # Ensure the user is allowed to edit this collection
319    &authenticate_user($gsdl_cgi, $username, $collect);
320    }
321   
322    # Derive the archives dir   
323    my $site = $self->{'site'};
324    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
325   
326    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
327    ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
328
329    # Make sure the collection isn't locked by someone else
330    $self->lock_collection($username, $collect);
331
332    # look up additional args
333    my $docid  = $self->{'d'};
334    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
335    $self->unlock_collection($username, $collect);
336    $gsdl_cgi->generate_error("No docid (d=...) specified.");
337    }
338
339    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
340
341    my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
342    my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
343    my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
344
345    $self->remove_import_filenames($expanded_import_filenames);
346    $self->move_docoids_to_import($docid_keys);
347
348    # Release the lock once it is done
349    $self->unlock_collection($username, $collect);
350
351    my $mess = "Base Doc ID: $docid_root\n-----\n";
352    $mess .= join("\n",@$expanded_import_filenames);
353
354    $gsdl_cgi->generate_ok_message($mess);
355
356}
357
358
359sub delete_document_entry
360{
361    my $self = shift @_;
362    my ($docid_root,$opt_onlyadd) = @_;
363   
364    my $docid_keys = [];
365    if ((defined $opt_onlyadd) && ($opt_onlyadd==1)) {
366        # delete docoid archive folder
367        push(@$docid_keys,$docid_root);
368    }
369    else {
370        print STDERR "**** Not currently implemented for the general case!!\nDeleting 'archive' version only.";
371       
372        push(@$docid_keys,$docid_root);
373       
374        #my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
375        #$docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
376        #my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
377       
378        # need to remove only the files that are not
379       
380        #$self->remove_import_filenames($expanded_import_filenames);
381    }
382   
383    $self->remove_docoids($docid_keys);
384}
385   
386
387sub delete_document
388{
389    my $self = shift @_;
390
391    my $username  = $self->{'username'};
392    my $collect   = $self->{'collect'};
393    my $gsdl_cgi  = $self->{'gsdl_cgi'};
394    my $gsdl_home  = $self->{'gsdlhome'};
395
396    # Authenticate user if it is enabled
397    if ($baseaction::authentication_enabled) {
398    # Ensure the user is allowed to edit this collection
399    &authenticate_user($gsdl_cgi, $username, $collect);
400    }
401   
402    # Derive the archives dir   
403    my $site = $self->{'site'};
404    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
405   
406    my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
407    ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
408
409    # Make sure the collection isn't locked by someone else
410    $self->lock_collection($username, $collect);
411
412    # look up additional args
413    my $docid  = $self->{'d'};
414    if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
415    $self->unlock_collection($username, $collect);
416    $gsdl_cgi->generate_error("No docid (d=...) specified.");
417    }
418
419    my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
420
421    my $onlyadd = $self->{'onlyadd'};
422
423    my $status = $self->delete_document_entry($docid_root,$onlyadd);   
424
425    # Release the lock once it is done
426    $self->unlock_collection($username, $collect);
427
428    my $mess = "delete-document successful: Key[$docid_root]\n";
429    $gsdl_cgi->generate_ok_message($mess);
430
431}
432
433
434sub delete_document_array
435{
436    my $self = shift @_;
437
438    my $username  = $self->{'username'};
439    my $collect   = $self->{'collect'};
440    my $gsdl_cgi  = $self->{'gsdl_cgi'};
441    my $gsdlhome  = $self->{'gsdlhome'};
442
443    if ($baseaction::authentication_enabled) {
444    # Ensure the user is allowed to edit this collection
445    &authenticate_user($gsdl_cgi, $username, $collect);
446    }
447
448    my $site = $self->{'site'};
449    my $collect_dir = $gsdl_cgi->get_collection_dir($site);
450   
451    $gsdl_cgi->checked_chdir($collect_dir);
452
453    # Obtain the collect dir
454    ## my $collect_dir = &util::filename_cat($gsdlhome, "collect");
455
456    # Make sure the collection isn't locked by someone else
457    $self->lock_collection($username, $collect);
458
459    # look up additional args
460   
461    my $json_str      = $self->{'json'};
462    my $doc_array = decode_json $json_str;
463
464    my $onlyadd = $self->{'onlyadd'};
465   
466   
467    my $global_status = 0;
468    my $global_mess = "";
469   
470    my @all_docids = ();
471   
472    foreach my $doc_array_rec ( @$doc_array ) {
473       
474        my $docid     = $doc_array_rec->{'docid'};
475       
476        push(@all_docids,$docid);
477       
478        my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
479         
480        my $status = $self->delete_document_entry($docid_root,$onlyadd);   
481       
482        if ($status != 0) {
483            # Catch error if set infodb entry failed
484            $global_status = $status;
485            $global_mess .= "Failed to delete document key: $docid\n";
486            $global_mess .= "Exit status: $status\n";
487            $global_mess .= "System Error Message: $!\n";
488            $global_mess .= "-" x 20;
489        }
490    }
491
492    if ($global_status != 0) {
493        $global_mess .= "PATH: $ENV{'PATH'}\n";
494        $gsdl_cgi->generate_error($global_mess);
495    }
496    else {
497        my $mess = "delete-document-array successful: Keys[ ".join(", ",@all_docids)."]\n";
498        $gsdl_cgi->generate_ok_message($mess);
499    }
500   
501    # Release the lock once it is done
502    $self->unlock_collection($username, $collect);
503}
504
505
506
5071;
Note: See TracBrowser for help on using the browser.