source: main/trunk/greenstone2/perllib/cgiactions/explodeaction.pm@ 23768

Last change on this file since 23768 was 23768, checked in by davidb, 13 years ago

Setting of the collect directory changed to be compliant with Greenstone 3 and its 'site' variable

File size: 8.4 KB
Line 
1###########################################################################
2#
3# explodeaction.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 2009 New Zealand Digital Library Project
9#
10# This program is free software; you can redistr te it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package explodeaction;
27
28use strict;
29
30use cgiactions::baseaction;
31
32use dbutil;
33use ghtml;
34use util;
35
36use File::Basename;
37
38BEGIN {
39# unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
40 require XML::Rules;
41}
42
43
44@explodeaction::ISA = ('baseaction');
45
46
47# 'a' for action, and 'c' for collection are also compulsorary, and
48# added in automatically by baseaction
49
50my $action_table =
51{
52 "explode" => { 'compulsory-args' => ["d"],
53 'optional-args' => [] }
54};
55
56
57sub new
58{
59 my $class = shift (@_);
60 my ($gsdl_cgi,$iis6_mode) = @_;
61
62 my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
63
64 return bless $self, $class;
65}
66
67
68sub get_infodb_type
69{
70 my ($opt_site,$collect_home,$collect) = @_;
71
72 my $out = "STDERR";
73
74 $collect = &colcfg::use_collection($opt_site, $collect, $collect_home);
75
76 if ($collect eq "") {
77 print STDERR "Error: failed to find collection $collect in $collect_home\n";
78 print STDOUT "Content-type:text/plain\n\n";
79 print STDOUT "ERROR: Failed to find collection $collect\n";
80 exit 0;
81
82 }
83
84 # Read in the collection configuration file.
85 my ($config_filename, $gs_mode) = &colcfg::get_collect_cfg_name($out);
86 my $collectcfg = &colcfg::read_collection_cfg ($config_filename, $gs_mode);
87
88 return $collectcfg->{'infodbtype'};
89}
90
91
92sub docid_to_import_filenames
93{
94 my $self = shift @_;
95
96 my @docids = @_;
97
98 my $collect = $self->{'collect'};
99 my $gsdl_cgi = $self->{'gsdl_cgi'};
100 my $infodb_type = $self->{'infodbtype'};
101
102 # Derive the archives dir
103 my $site = $self->{'site'};
104 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
105 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
106 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
107
108 my $arcinfo_doc_filename
109 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
110 $archive_dir);
111
112 my %all_import_file_keys = ();
113
114 foreach my $docid (@docids) {
115 # Obtain the src and associated files specified docID
116
117 my $doc_rec
118 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
119 $docid);
120
121 my $src_files = $doc_rec->{'src-file'};
122 my $assoc_files = $doc_rec->{'assoc-file'};
123
124 if (defined $src_files) {
125 foreach my $if (@$src_files) {
126 $all_import_file_keys{$if} = 1;
127 }
128 }
129
130 if (defined $assoc_files) {
131 foreach my $if (@$assoc_files) {
132 $all_import_file_keys{$if} = 1;
133 }
134 }
135 }
136
137 my @all_import_files = keys %all_import_file_keys;
138
139 return \@all_import_files;
140}
141
142
143sub import_filenames_to_docids
144{
145 my $self = shift @_;
146 my ($import_filenames) = @_;
147
148 my $collect = $self->{'collect'};
149 my $gsdl_cgi = $self->{'gsdl_cgi'};
150 my $infodb_type = $self->{'infodbtype'};
151
152 # Derive the archives dir
153 my $site = $self->{'site'};
154 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
155 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
156 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
157
158 # Obtain the oids for the specified import filenames
159 my $arcinfo_src_filename
160 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-src",
161 $archive_dir);
162
163 my %all_oid_keys = ();
164
165 foreach my $if (@$import_filenames) {
166 $if = &util::upgrade_if_dos_filename($if);
167
168 print STDERR "*** looking import filename key \"$if\"\n";
169
170 my $src_rec
171 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_src_filename,
172 $if);
173 my $oids = $src_rec->{'oid'};
174
175 foreach my $o (@$oids) {
176 $all_oid_keys{$o} = 1;
177 }
178 }
179
180 my @all_oids = keys %all_oid_keys;
181
182 return \@all_oids;
183}
184
185
186sub remove_import_filenames
187{
188 my $self = shift @_;
189 my ($expanded_import_filenames) = @_;
190
191 foreach my $f (@$expanded_import_filenames) {
192 # If this document has been exploded before then
193 # its original source files will have already been removed
194 if (-e $f) {
195 &util::rm($f);
196 }
197 }
198}
199
200sub move_docoids_to_import
201{
202 my $self = shift @_;
203 my ($docids) = @_;
204
205 my $collect = $self->{'collect'};
206 my $gsdl_cgi = $self->{'gsdl_cgi'};
207 my $infodb_type = $self->{'infodbtype'};
208
209 # Derive the archives and import directories
210 my $site = $self->{'site'};
211 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
212
213 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
214 my $import_dir = &util::filename_cat($collect_dir,$collect,"import");
215
216 # Obtain the doc.xml path for the specified docID
217 my $arcinfo_doc_filename
218 = &dbutil::get_infodb_file_path($infodb_type, "archiveinf-doc",
219 $archive_dir);
220
221 foreach my $docid (@$docids) {
222
223 my $doc_rec
224 = &dbutil::read_infodb_entry($infodb_type, $arcinfo_doc_filename,
225 $docid);
226
227 my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
228
229 # The $doc_xml_file is relative to the archives, so need to do
230 # a bit more work to make sure the right folder containing this
231 # is moved to the right place in the import folder
232
233 my $assoc_path = dirname($doc_xml_file);
234 my $import_assoc_dir = &util::filename_cat($import_dir,$assoc_path);
235 my $archive_assoc_dir = &util::filename_cat($archive_dir,$assoc_path);
236
237 # If assoc_path involves more than one sub directory, then need to make
238 # sure the necessary directories exist in the import area also.
239 # For example, if assoc_path is "a/b/c.dir" then need "import/a/b" to
240 # exists before moving "archives/a/b/c.dir" -> "import/a/b"
241 my $import_target_parent_dir = dirname($import_assoc_dir);
242
243 if (-d $import_assoc_dir) {
244 # detected version from previous explode => remove it
245 &util::rm_r($import_assoc_dir);
246 }
247 else {
248 # First time => make sure parent directory exists to move
249 # "c.dir" (see above) into
250
251 &util::mk_all_dir($import_target_parent_dir);
252 }
253
254 &util::cp_r($archive_assoc_dir,$import_target_parent_dir)
255 }
256}
257
258
259sub explode
260{
261 my $self = shift @_;
262
263 my $username = $self->{'username'};
264 my $collect = $self->{'collect'};
265 my $gsdl_cgi = $self->{'gsdl_cgi'};
266 my $gsdl_home = $self->{'gsdlhome'};
267
268 # Authenticate user if it is enabled
269 if ($baseaction::authentication_enabled) {
270 # Ensure the user is allowed to edit this collection
271 &authenticate_user($gsdl_cgi, $username, $collect);
272 }
273
274 # Derive the archives dir
275 my $site = $self->{'site'};
276 my $collect_dir = $gsdl_cgi->get_collection_dir($site);
277
278 my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
279 ##my $archive_dir = &util::filename_cat($ENV{'GSDLCOLLECTDIR'},"archives");
280
281 # Make sure the collection isn't locked by someone else
282 $self->lock_collection($username, $collect);
283
284 # look up additional args
285 my $docid = $self->{'d'};
286 if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
287 $self->unlock_collection($username, $collect);
288 $gsdl_cgi->generate_error("No docid (d=...) specified.");
289 }
290
291 my ($docid_root,$docid_secnum) = ($docid =~ m/^(.*?)(\..*)?$/);
292
293 my $orig_import_filenames = $self->docid_to_import_filenames($docid_root);
294 my $docid_keys = $self->import_filenames_to_docids($orig_import_filenames);
295 my $expanded_import_filenames = $self->docid_to_import_filenames(@$docid_keys);
296
297 $self->remove_import_filenames($expanded_import_filenames);
298 $self->move_docoids_to_import($docid_keys);
299
300 # Release the lock once it is done
301 $self->unlock_collection($username, $collect);
302
303 my $mess = "Base Doc ID: $docid_root\n-----\n";
304 $mess .= join("\n",@$expanded_import_filenames);
305
306 $gsdl_cgi->generate_ok_message($mess);
307
308}
309
310
311
3121;
Note: See TracBrowser for help on using the repository browser.