1 | ###########################################################################
|
---|
2 | #
|
---|
3 | # metadataaction.pm --
|
---|
4 | # A component of the Greenstone digital library software
|
---|
5 | # from the New Zealand Digital Library Project at the
|
---|
6 | # University of Waikato, New Zealand.
|
---|
7 | #
|
---|
8 | # Copyright (C) 2009 New Zealand Digital Library Project
|
---|
9 | #
|
---|
10 | # This program is free software; you can redistr te it and/or modify
|
---|
11 | # it under the terms of the GNU General Public License as published by
|
---|
12 | # the Free Software Foundation; either version 2 of the License, or
|
---|
13 | # (at your option) any later version.
|
---|
14 | #
|
---|
15 | # This program is distributed in the hope that it will be useful,
|
---|
16 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
17 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
18 | # GNU General Public License for more details.
|
---|
19 | #
|
---|
20 | # You should have received a copy of the GNU General Public License
|
---|
21 | # along with this program; if not, write to the Free Software
|
---|
22 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
23 | #
|
---|
24 | ###########################################################################
|
---|
25 |
|
---|
26 | package metadataaction;
|
---|
27 |
|
---|
28 | use strict;
|
---|
29 |
|
---|
30 | use cgiactions::baseaction;
|
---|
31 |
|
---|
32 | use dbutil;
|
---|
33 | use ghtml;
|
---|
34 |
|
---|
35 |
|
---|
36 | BEGIN {
|
---|
37 | # unshift (@INC, "$ENV{'GSDLHOME'}/perllib/cpan/perl-5.8");
|
---|
38 | require XML::Rules;
|
---|
39 | }
|
---|
40 |
|
---|
41 |
|
---|
42 | @metadataaction::ISA = ('baseaction');
|
---|
43 |
|
---|
44 |
|
---|
45 | my $action_table =
|
---|
46 | {
|
---|
47 | "get-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
|
---|
48 | 'optional-args' => [] },
|
---|
49 |
|
---|
50 | "get-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
|
---|
51 | 'optional-args' => [ "metapos" ] },
|
---|
52 |
|
---|
53 | "set-live-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
|
---|
54 | 'optional-args' => [ ] },
|
---|
55 |
|
---|
56 | "set-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
|
---|
57 | 'optional-args' => [ "metapos" ] },
|
---|
58 |
|
---|
59 | "set-archives-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
|
---|
60 | 'optional-args' => [ "metapos" ]
|
---|
61 |
|
---|
62 | },
|
---|
63 |
|
---|
64 | "set-import-metadata" => { 'compulsory-args' => [ "metaname", "metavalue" ],
|
---|
65 | 'optional-args' => [ "d", "f", "metamode" ]
|
---|
66 | # metamode can be "accumulate", "override", or "unique-id"
|
---|
67 | },
|
---|
68 |
|
---|
69 |
|
---|
70 | "remove-live-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
|
---|
71 | 'optional-args' => [ ] },
|
---|
72 |
|
---|
73 | "remove-metadata" => { 'compulsory-args' => [ "d", "metaname" ],
|
---|
74 | 'optional-args' => [ "metapos" ] },
|
---|
75 |
|
---|
76 | "insert-metadata" => { 'compulsory-args' => [ "d", "metaname", "metavalue" ],
|
---|
77 | 'optional-args' => [ ]
|
---|
78 | }
|
---|
79 | };
|
---|
80 |
|
---|
81 |
|
---|
82 | sub new
|
---|
83 | {
|
---|
84 | my $class = shift (@_);
|
---|
85 | my ($gsdl_cgi,$iis6_mode) = @_;
|
---|
86 |
|
---|
87 | my $self = new baseaction($action_table,$gsdl_cgi,$iis6_mode);
|
---|
88 |
|
---|
89 | return bless $self, $class;
|
---|
90 | }
|
---|
91 |
|
---|
92 |
|
---|
93 | sub get_live_metadata
|
---|
94 | {
|
---|
95 | my $self = shift @_;
|
---|
96 |
|
---|
97 | my $username = $self->{'username'};
|
---|
98 | my $collect = $self->{'collect'};
|
---|
99 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
100 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
101 |
|
---|
102 | # Note: Not sure why get_live_metadata doesn't need the authentication check
|
---|
103 |
|
---|
104 | # Obtain the collect dir
|
---|
105 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
106 |
|
---|
107 | # Make sure the collection isn't locked by someone else
|
---|
108 | $self->lock_collection($username, $collect);
|
---|
109 |
|
---|
110 | # look up additional args
|
---|
111 | my $docid = $self->{'d'};
|
---|
112 | if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
|
---|
113 | $gsdl_cgi->generate_error("No docid (d=...) specified.");
|
---|
114 | }
|
---|
115 |
|
---|
116 | # Generate the dbkey
|
---|
117 | my $metaname = $self->{'metaname'};
|
---|
118 | my $dbkey = "$docid.$metaname";
|
---|
119 |
|
---|
120 | # To people who know $collect_tail please add some comments
|
---|
121 | # Obtain path to the database
|
---|
122 | my $collect_tail = $collect;
|
---|
123 | $collect_tail =~ s/^.*[\/|\\]//;
|
---|
124 | my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
|
---|
125 | my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory);
|
---|
126 |
|
---|
127 | # Obtain the content of the key
|
---|
128 | my $cmd = "gdbmget $infodb_file_path $dbkey";
|
---|
129 | if (open(GIN,"$cmd |") == 0) {
|
---|
130 | # Catch error if gdbmget failed
|
---|
131 | my $mess = "Failed to get metadata key: $metaname\n";
|
---|
132 | $mess .= "$!\n";
|
---|
133 |
|
---|
134 | $gsdl_cgi->generate_error($mess);
|
---|
135 | }
|
---|
136 | else {
|
---|
137 | # Read everything in and concatenate them into $metavalue
|
---|
138 | my $metavalue = "";
|
---|
139 | my $line;
|
---|
140 | while (defined ($line=<GIN>)) {
|
---|
141 | $metavalue .= $line;
|
---|
142 | }
|
---|
143 | close(GIN);
|
---|
144 | chomp($metavalue); # Get rid off the tailing newlines
|
---|
145 | $gsdl_cgi->generate_ok_message("$metavalue");
|
---|
146 | }
|
---|
147 |
|
---|
148 | # Release the lock once it is done
|
---|
149 | $self->unlock_collection($username, $collect);
|
---|
150 | }
|
---|
151 |
|
---|
152 |
|
---|
153 | sub get_metadata
|
---|
154 | {
|
---|
155 | my $self = shift @_;
|
---|
156 |
|
---|
157 | my $username = $self->{'username'};
|
---|
158 | my $collect = $self->{'collect'};
|
---|
159 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
160 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
161 |
|
---|
162 | # Authenticate user if it is enabled
|
---|
163 | if ($baseaction::authentication_enabled) {
|
---|
164 | # Ensure the user is allowed to edit this collection
|
---|
165 | &authenticate_user($gsdl_cgi, $username, $collect);
|
---|
166 | }
|
---|
167 |
|
---|
168 | # Obtain the collect dir
|
---|
169 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
170 |
|
---|
171 | # Make sure the collection isn't locked by someone else
|
---|
172 | $self->lock_collection($username, $collect);
|
---|
173 |
|
---|
174 | # look up additional args
|
---|
175 | my $docid = $self->{'d'};
|
---|
176 | my $metaname = $self->{'metaname'};
|
---|
177 | my $metapos = $self->{'metapos'};
|
---|
178 |
|
---|
179 | # To people who know $collect_tail please add some comments
|
---|
180 | # Obtain path to the database
|
---|
181 | my $collect_tail = $collect;
|
---|
182 | $collect_tail =~ s/^.*[\/\\]//;
|
---|
183 | my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
|
---|
184 | my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory);
|
---|
185 |
|
---|
186 | # Read the docid entry
|
---|
187 | my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid);
|
---|
188 | my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
|
---|
189 |
|
---|
190 | # Basically loop through and unescape_html the values
|
---|
191 | foreach my $k (keys %$doc_rec) {
|
---|
192 | my @escaped_v = ();
|
---|
193 | foreach my $v (@{$doc_rec->{$k}}) {
|
---|
194 | my $ev = &ghtml::unescape_html($v);
|
---|
195 | push(@escaped_v, $ev);
|
---|
196 | }
|
---|
197 | $doc_rec->{$k} = \@escaped_v;
|
---|
198 | }
|
---|
199 |
|
---|
200 | # Obtain the specified metadata value
|
---|
201 | $metapos = 0 if (!defined $metapos);
|
---|
202 | my $metavalue = $doc_rec->{$metaname}->[$metapos];
|
---|
203 | $gsdl_cgi->generate_ok_message("$metavalue");
|
---|
204 |
|
---|
205 | # Release the lock once it is done
|
---|
206 | $self->unlock_collection($username, $collect);
|
---|
207 | }
|
---|
208 |
|
---|
209 |
|
---|
210 | sub set_live_metadata
|
---|
211 | {
|
---|
212 | my $self = shift @_;
|
---|
213 |
|
---|
214 | my $username = $self->{'username'};
|
---|
215 | my $collect = $self->{'collect'};
|
---|
216 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
217 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
218 |
|
---|
219 | # don't user authenticate for now
|
---|
220 | if ($baseaction::authentication_enabled) {
|
---|
221 | # Ensure the user is allowed to edit this collection
|
---|
222 | &authenticate_user($gsdl_cgi, $username, $collect);
|
---|
223 | }
|
---|
224 |
|
---|
225 | # Obtain the collect dir
|
---|
226 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
227 |
|
---|
228 | # Make sure the collection isn't locked by someone else
|
---|
229 | $self->lock_collection($username, $collect);
|
---|
230 |
|
---|
231 | # look up additional args
|
---|
232 | my $docid = $self->{'d'};
|
---|
233 | if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
|
---|
234 | $gsdl_cgi->generate_error("No docid (d=...) specified.");
|
---|
235 | }
|
---|
236 | my $metavalue = $self->{'metavalue'};
|
---|
237 |
|
---|
238 | # Generate the dbkey
|
---|
239 | my $metaname = $self->{'metaname'};
|
---|
240 | my $dbkey = "$docid.$metaname";
|
---|
241 |
|
---|
242 | # To people who know $collect_tail please add some comments
|
---|
243 | # Obtain path to the database
|
---|
244 | my $collect_tail = $collect;
|
---|
245 | $collect_tail =~ s/^.*[\/\\]//;
|
---|
246 | my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
|
---|
247 | my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory);
|
---|
248 |
|
---|
249 | # Set the new value
|
---|
250 | my $cmd = "gdbmset \"$infodb_file_path\" \"$dbkey\" \"$metavalue\"";
|
---|
251 | my $status = system($cmd);
|
---|
252 | if ($status != 0) {
|
---|
253 | # Catch error if gdbmget failed
|
---|
254 | my $mess = "Failed to set metadata key: $dbkey\n";
|
---|
255 |
|
---|
256 | $mess .= "PATH: $ENV{'PATH'}\n";
|
---|
257 | $mess .= "cmd = $cmd\n";
|
---|
258 | $mess .= "Exit status: $status\n";
|
---|
259 | $mess .= "System Error Message: $!\n";
|
---|
260 |
|
---|
261 | $gsdl_cgi->generate_error($mess);
|
---|
262 | }
|
---|
263 | else {
|
---|
264 | $gsdl_cgi->generate_ok_message("set-live-metadata successful: Key[$metaname]=$metavalue");
|
---|
265 | }
|
---|
266 |
|
---|
267 | # Release the lock once it is done
|
---|
268 | $self->unlock_collection($username, $collect);
|
---|
269 | }
|
---|
270 |
|
---|
271 |
|
---|
272 | sub set_metadata
|
---|
273 | {
|
---|
274 | my $self = shift @_;
|
---|
275 |
|
---|
276 | my $username = $self->{'username'};
|
---|
277 | my $collect = $self->{'collect'};
|
---|
278 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
279 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
280 |
|
---|
281 | # don't user authenticate for now
|
---|
282 | if ($baseaction::authentication_enabled) {
|
---|
283 | # Ensure the user is allowed to edit this collection
|
---|
284 | &authenticate_user($gsdl_cgi, $username, $collect);
|
---|
285 | }
|
---|
286 |
|
---|
287 | # Obtain the collect dir
|
---|
288 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
289 |
|
---|
290 | # Make sure the collection isn't locked by someone else
|
---|
291 | $self->lock_collection($username, $collect);
|
---|
292 |
|
---|
293 | # look up additional args
|
---|
294 | my $docid = $self->{'d'};
|
---|
295 | my $metaname = $self->{'metaname'};
|
---|
296 | my $metapos = $self->{'metapos'};
|
---|
297 | my $metavalue = $self->{'metavalue'};
|
---|
298 |
|
---|
299 | # To people who know $collect_tail please add some comments
|
---|
300 | # Obtain path to the database
|
---|
301 | my $collect_tail = $collect;
|
---|
302 | $collect_tail =~ s/^.*[\/\\]//;
|
---|
303 | my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
|
---|
304 | my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory);
|
---|
305 |
|
---|
306 | # Read the docid entry
|
---|
307 | my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid);
|
---|
308 | my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
|
---|
309 | foreach my $k (keys %$doc_rec) {
|
---|
310 | my @escaped_v = ();
|
---|
311 | foreach my $v (@{$doc_rec->{$k}}) {
|
---|
312 | if ($k eq "contains") {
|
---|
313 | # protect quotes in ".2;".3 etc
|
---|
314 | $v =~ s/\"/\\\"/g;
|
---|
315 | push(@escaped_v, $v);
|
---|
316 | }
|
---|
317 | else {
|
---|
318 | my $ev = &ghtml::unescape_html($v);
|
---|
319 | $ev =~ s/\"/\\\"/g;
|
---|
320 | push(@escaped_v, $ev);
|
---|
321 | }
|
---|
322 | }
|
---|
323 | $doc_rec->{$k} = \@escaped_v;
|
---|
324 | }
|
---|
325 | ## print STDERR "**** metavalue = $metavalue\n";
|
---|
326 |
|
---|
327 | # Protect the quotes
|
---|
328 | $metavalue =~ s/\"/\\\"/g;
|
---|
329 |
|
---|
330 | # Set the metadata value
|
---|
331 | if (defined $metapos) {
|
---|
332 | $doc_rec->{$metaname}->[$metapos] = $metavalue;
|
---|
333 | }
|
---|
334 | else {
|
---|
335 | $doc_rec->{$metaname} = [ $metavalue ];
|
---|
336 | }
|
---|
337 | ## print STDERR "**** metavalue = $metavalue\n";
|
---|
338 |
|
---|
339 | # Generate the record string
|
---|
340 | my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
|
---|
341 | ## print STDERR "**** ser dr\n$serialized_doc_rec\n\n\n";
|
---|
342 |
|
---|
343 | # Store it into GDBM
|
---|
344 | my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
|
---|
345 | my $status = system($cmd);
|
---|
346 | if ($status != 0) {
|
---|
347 | # Catch error if gdbmget failed
|
---|
348 | my $mess = "Failed to set metadata key: $docid\n";
|
---|
349 |
|
---|
350 | $mess .= "PATH: $ENV{'PATH'}\n";
|
---|
351 | $mess .= "cmd = $cmd\n";
|
---|
352 | $mess .= "Exit status: $status\n";
|
---|
353 | $mess .= "System Error Message: $!\n";
|
---|
354 |
|
---|
355 | $gsdl_cgi->generate_error($mess);
|
---|
356 | }
|
---|
357 | else {
|
---|
358 | my $mess = "set-document-metadata successful: Key[$docid]\n";
|
---|
359 | $mess .= " $metaname";
|
---|
360 | $mess .= "->[$metapos]" if (defined $metapos);
|
---|
361 | $mess .= " = $metavalue";
|
---|
362 |
|
---|
363 | $gsdl_cgi->generate_ok_message($mess);
|
---|
364 | }
|
---|
365 |
|
---|
366 | # Release the lock once it is done
|
---|
367 | $self->unlock_collection($username, $collect);
|
---|
368 | }
|
---|
369 |
|
---|
370 |
|
---|
371 | sub dxml_metadata
|
---|
372 | {
|
---|
373 | my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
|
---|
374 | my $metaname = $parser->{'parameters'}->{'metaname'};
|
---|
375 | my $metamode = $parser->{'parameters'}->{'metamode'};
|
---|
376 |
|
---|
377 | # Find the right metadata tag and checks if we are going to override it
|
---|
378 | # Note: This over writes the first metadata block it encountered. If there are multiple Sections in the doc.xml, it might not behave as you would expect
|
---|
379 | my $name_attr = $attrHash->{'name'};
|
---|
380 | if (($name_attr eq $metaname) && ($metamode eq "override")) {
|
---|
381 | # Get the value and override the current value
|
---|
382 | my $metavalue = $parser->{'parameters'}->{'metavalue'};
|
---|
383 | $attrHash->{'_content'} = $metavalue;
|
---|
384 |
|
---|
385 | # Don't want it to wipe out any other pieces of metadata
|
---|
386 | $parser->{'parameters'}->{'metamode'} = "done";
|
---|
387 | }
|
---|
388 |
|
---|
389 | # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
|
---|
390 | return [$tagname => $attrHash];
|
---|
391 | }
|
---|
392 |
|
---|
393 |
|
---|
394 | sub dxml_description
|
---|
395 | {
|
---|
396 | my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
|
---|
397 | my $metamode = $parser->{'parameters'}->{'metamode'};
|
---|
398 |
|
---|
399 | # Accumulate the metadata
|
---|
400 | # NOTE: This appends new metadata element to all description fields.
|
---|
401 | # If there are multiple Sections/SubSections, the new metadata block will get appended to all of them
|
---|
402 | if ($metamode eq "accumulate") {
|
---|
403 | # tack a new metadata tag on to the end of the <Metadata>+ block
|
---|
404 | my $metaname = $parser->{'parameters'}->{'metaname'};
|
---|
405 | my $metavalue = $parser->{'parameters'}->{'metavalue'};
|
---|
406 |
|
---|
407 | my $metadata_attr = { '_content' => $metavalue,
|
---|
408 | 'name' => $metaname,
|
---|
409 | 'mode' => "accumulate" };
|
---|
410 |
|
---|
411 | my $append_metadata = [ "Metadata" => $metadata_attr ];
|
---|
412 | my $description_content = $attrHash->{'_content'};
|
---|
413 |
|
---|
414 | push(@$description_content," ", $append_metadata ,"\n ");
|
---|
415 | }
|
---|
416 |
|
---|
417 |
|
---|
418 | # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
|
---|
419 | return [$tagname => $attrHash];
|
---|
420 | }
|
---|
421 |
|
---|
422 |
|
---|
423 | sub edit_xml_file
|
---|
424 | {
|
---|
425 | my $self = shift @_;
|
---|
426 | my ($gsdl_cgi, $filename, $rules, $options) = @_;
|
---|
427 |
|
---|
428 | # use XML::Rules to add it in (read in and out again)
|
---|
429 | my $parser = XML::Rules->new(rules => $rules,
|
---|
430 | style => 'filter' );
|
---|
431 |
|
---|
432 | my $xml_in = "";
|
---|
433 | if (!open(MIN,"<$filename")) {
|
---|
434 | $gsdl_cgi->generate_error("Unable to read in $filename: $!");
|
---|
435 | }
|
---|
436 | else {
|
---|
437 | # Read all the text in
|
---|
438 | my $line;
|
---|
439 | while (defined ($line=<MIN>)) {
|
---|
440 | $xml_in .= $line;
|
---|
441 | }
|
---|
442 | close(MIN);
|
---|
443 |
|
---|
444 | # Matched lines will get handled by the call backs
|
---|
445 | my $xml_out = "";
|
---|
446 | $parser->filter($xml_in,\$xml_out, $options);
|
---|
447 |
|
---|
448 | if (!open(MOUT,">$filename")) {
|
---|
449 | $gsdl_cgi->generate_error("Unable to write out to $filename: $!");
|
---|
450 | }
|
---|
451 | else {
|
---|
452 | print MOUT $xml_out;
|
---|
453 | close(MOUT);
|
---|
454 | }
|
---|
455 | }
|
---|
456 | }
|
---|
457 |
|
---|
458 |
|
---|
459 | sub edit_doc_xml
|
---|
460 | {
|
---|
461 | my $self = shift @_;
|
---|
462 | my ($gsdl_cgi, $doc_xml_filename, $metaname, $metavalue, $metapos) = @_;
|
---|
463 |
|
---|
464 | # use XML::Rules to add it in (read in and out again)
|
---|
465 | # Set the call back functions
|
---|
466 | my @rules =
|
---|
467 | ( _default => 'raw',
|
---|
468 | 'Metadata' => \&dxml_metadata,
|
---|
469 | 'Description' => \&dxml_description );
|
---|
470 |
|
---|
471 | # Sets the parameters
|
---|
472 | my $options = { 'metaname' => $metaname,
|
---|
473 | 'metapos' => $metapos,
|
---|
474 | 'metavalue' => $metavalue };
|
---|
475 | $self->edit_xml_file($gsdl_cgi,$doc_xml_filename,\@rules,$options);
|
---|
476 | }
|
---|
477 |
|
---|
478 |
|
---|
479 | sub set_archives_metadata
|
---|
480 | {
|
---|
481 | my $self = shift @_;
|
---|
482 |
|
---|
483 | my $username = $self->{'username'};
|
---|
484 | my $collect = $self->{'collect'};
|
---|
485 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
486 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
487 |
|
---|
488 | # don't user authenticate for now
|
---|
489 | if ($baseaction::authentication_enabled) {
|
---|
490 | # Ensure the user is allowed to edit this collection
|
---|
491 | $self->authenticate_user($username, $collect);
|
---|
492 | }
|
---|
493 |
|
---|
494 | # Obtain the collect and archive dir
|
---|
495 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
496 | my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
|
---|
497 |
|
---|
498 | # Make sure the collection isn't locked by someone else
|
---|
499 | $self->lock_collection($username, $collect);
|
---|
500 |
|
---|
501 | # look up additional args
|
---|
502 | my $docid = $self->{'d'};
|
---|
503 | my $metaname = $self->{'metaname'};
|
---|
504 | my $metavalue = $self->{'metavalue'};
|
---|
505 | my $metapos = $self->{'metapos'};
|
---|
506 | $metapos = 0 if (!defined $metapos);
|
---|
507 |
|
---|
508 | # Obtain the doc.xml path for the specified docID
|
---|
509 | my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir);
|
---|
510 | my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid);
|
---|
511 | my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
|
---|
512 | my $doc_xml_file = $doc_rec->{'doc-file'}->[0];
|
---|
513 |
|
---|
514 | # The $doc_xml_file is relative to the archives, and now let's get the full path
|
---|
515 | my $archives_dir = &util::filename_cat($collect_dir,$collect,"archives");
|
---|
516 | my $doc_xml_filename = &util::filename_cat($archives_dir,$doc_xml_file);
|
---|
517 |
|
---|
518 | # Edit the doc.xml file with the specified metadata name, value and position.
|
---|
519 | # TODO: there is a potential problem here as this edit_doc_xml function
|
---|
520 | # is assuming the simple doc.xml situation where there is only one Section and no SubSections.
|
---|
521 | # Running import.pl -groupsize will cause this to have multiple sections in one doc.xml
|
---|
522 | $self->edit_doc_xml($gsdl_cgi,$doc_xml_filename,
|
---|
523 | $metaname,$metavalue,$metapos);
|
---|
524 |
|
---|
525 | # Release the lock once it is done
|
---|
526 | $self->unlock_collection($username, $collect);
|
---|
527 | }
|
---|
528 |
|
---|
529 |
|
---|
530 | sub mxml_metadata
|
---|
531 | {
|
---|
532 | my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
|
---|
533 | my $metaname = $parser->{'parameters'}->{'metaname'};
|
---|
534 | my $metamode = $parser->{'parameters'}->{'metamode'};
|
---|
535 |
|
---|
536 | # Report error if we don't see FileName tag before this
|
---|
537 | die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
|
---|
538 |
|
---|
539 | # Don't do anything if we are not in the right FileSet
|
---|
540 | my $file_regexp = $parser->{'parameters'}->{'current_file'};
|
---|
541 | return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
|
---|
542 |
|
---|
543 | # Find the right metadata tag and checks if we are going to override it
|
---|
544 | # Note: This over writes the first metadata block it encountered even if it doesn't belong to the source file we specified
|
---|
545 | my $name_attr = $attrHash->{'name'};
|
---|
546 | if (($name_attr eq $metaname) && ($metamode eq "override")) {
|
---|
547 | # Get the value and override the current value
|
---|
548 | my $metavalue = $parser->{'parameters'}->{'metavalue'};
|
---|
549 | $attrHash->{'_content'} = $metavalue;
|
---|
550 |
|
---|
551 | # Don't want it to wipe out any other pieces of metadata
|
---|
552 | $parser->{'parameters'}->{'metamode'} = "done";
|
---|
553 | }
|
---|
554 |
|
---|
555 | # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
|
---|
556 | return [$tagname => $attrHash];
|
---|
557 | }
|
---|
558 |
|
---|
559 |
|
---|
560 | sub mxml_description
|
---|
561 | {
|
---|
562 | my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
|
---|
563 | my $metamode = $parser->{'parameters'}->{'metamode'};
|
---|
564 |
|
---|
565 | # Failed... Report error if we don't see FileName tag before this
|
---|
566 | die "Fatel Error: Unexpected metadata.xml structure. Undefind current_file, possiblely encountered Description before FileName" if (!defined($parser->{'parameters'}->{'current_file'}));
|
---|
567 |
|
---|
568 | # Don't do anything if we are not in the right FileSet
|
---|
569 | my $file_regexp = $parser->{'parameters'}->{'current_file'};
|
---|
570 | return [$tagname => $attrHash] if (!($parser->{'parameters'}->{'src_file'} =~ /$file_regexp/));
|
---|
571 |
|
---|
572 | # Accumulate the metadata block to the end of the description block
|
---|
573 | # Note: This adds metadata block to all description blocks, so if there are
|
---|
574 | # multiple FileSets, it will add to all of them
|
---|
575 | if ($metamode eq "accumulate") {
|
---|
576 | # tack a new metadata tag on to the end of the <Metadata>+ block
|
---|
577 | my $metaname = $parser->{'parameters'}->{'metaname'};
|
---|
578 | my $metavalue = $parser->{'parameters'}->{'metavalue'};
|
---|
579 |
|
---|
580 | my $metadata_attr = { '_content' => $metavalue,
|
---|
581 | 'name' => $metaname,
|
---|
582 | 'mode' => "accumulate" };
|
---|
583 |
|
---|
584 | my $append_metadata = [ "Metadata" => $metadata_attr ];
|
---|
585 | my $description_content = $attrHash->{'_content'};
|
---|
586 |
|
---|
587 | push(@$description_content," ", $append_metadata ,"\n ");
|
---|
588 | }
|
---|
589 |
|
---|
590 | # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
|
---|
591 | return [$tagname => $attrHash];
|
---|
592 | }
|
---|
593 |
|
---|
594 |
|
---|
595 | sub mxml_filename
|
---|
596 | {
|
---|
597 | my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
|
---|
598 |
|
---|
599 | # Store the filename of the Current Fileset
|
---|
600 | # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
|
---|
601 | # FileName tag must come before Description tag
|
---|
602 | $parser->{'parameters'}->{'current_file'} = $attrHash->{'_content'};
|
---|
603 |
|
---|
604 | # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
|
---|
605 | return [$tagname => $attrHash];
|
---|
606 | }
|
---|
607 |
|
---|
608 |
|
---|
609 | sub mxml_fileset
|
---|
610 | {
|
---|
611 | my ($tagname, $attrHash, $contextArray, $parentDataArray, $parser) = @_;
|
---|
612 |
|
---|
613 | # Initilise the current_file
|
---|
614 | # Note: According to http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd
|
---|
615 | # FileName tag must come before Description tag
|
---|
616 | $parser->{'parameters'}->{'current_file'} = "";
|
---|
617 |
|
---|
618 | # RAW is [$tagname => $attrHash] not $tagname => $attrHash!!
|
---|
619 | return [$tagname => $attrHash];
|
---|
620 | }
|
---|
621 |
|
---|
622 |
|
---|
623 | sub edit_metadata_xml
|
---|
624 | {
|
---|
625 | my $self = shift @_;
|
---|
626 | my ($gsdl_cgi, $metadata_xml_filename, $metaname, $metavalue, $metamode, $src_file) = @_;
|
---|
627 |
|
---|
628 | # Set the call-back functions for the metadata tags
|
---|
629 | my @rules =
|
---|
630 | ( _default => 'raw',
|
---|
631 | 'FileName' => \&mxml_filename,
|
---|
632 | 'Metadata' => \&mxml_metadata,
|
---|
633 | 'Description' => \&mxml_description,
|
---|
634 | 'FileSet' => \&mxml_fileset);
|
---|
635 |
|
---|
636 | # use XML::Rules to add it in (read in and out again)
|
---|
637 | my $parser = XML::Rules->new(rules => \@rules,
|
---|
638 | style => 'filter',
|
---|
639 | output_encoding => 'utf8');
|
---|
640 |
|
---|
641 | my $xml_in = "";
|
---|
642 | if (!open(MIN,"<$metadata_xml_filename")) {
|
---|
643 | $gsdl_cgi->generate_error("Unable to read in $metadata_xml_filename: $!");
|
---|
644 | }
|
---|
645 | else {
|
---|
646 | # Read them in
|
---|
647 | my $line;
|
---|
648 | while (defined ($line=<MIN>)) {
|
---|
649 | $xml_in .= $line;
|
---|
650 | }
|
---|
651 | close(MIN);
|
---|
652 |
|
---|
653 | # Filter with the call-back functions
|
---|
654 | my $xml_out = "";
|
---|
655 | $parser->filter($xml_in,\$xml_out, { metaname => $metaname,
|
---|
656 | metavalue => $metavalue,
|
---|
657 | metamode => $metamode,
|
---|
658 | src_file => $src_file,
|
---|
659 | current_file => undef} );
|
---|
660 |
|
---|
661 | if (!open(MOUT,">$metadata_xml_filename")) {
|
---|
662 | $gsdl_cgi->generate_error("Unable to write out to $metadata_xml_filename: $!");
|
---|
663 | }
|
---|
664 | else {
|
---|
665 | # Some wise person please find out how to keep the DTD and encode lines in after it gets filtered by this XML::Rules
|
---|
666 | # At the moment, I will just hack it!
|
---|
667 | my $header_with_utf8_dtd = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n<!DOCTYPE DirectoryMetadata SYSTEM \"http://greenstone.org/dtd/DirectoryMetadata/1.0/DirectoryMetadata.dtd\">";
|
---|
668 | $xml_out =~ s/\<\?xml\sversion\=\"1.0\"\?\>/$header_with_utf8_dtd/;
|
---|
669 | print MOUT $xml_out;
|
---|
670 | close(MOUT);
|
---|
671 | }
|
---|
672 | }
|
---|
673 | }
|
---|
674 |
|
---|
675 |
|
---|
676 | sub set_import_metadata
|
---|
677 | {
|
---|
678 | my $self = shift @_;
|
---|
679 |
|
---|
680 | my $username = $self->{'username'};
|
---|
681 | my $collect = $self->{'collect'};
|
---|
682 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
683 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
684 |
|
---|
685 | # don't user authenticate for now
|
---|
686 | if ($baseaction::authentication_enabled) {
|
---|
687 | # Ensure the user is allowed to edit this collection
|
---|
688 | $self->authenticate_user($username, $collect);
|
---|
689 | }
|
---|
690 |
|
---|
691 | # Obtain the collect and archive dir
|
---|
692 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
693 | my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
|
---|
694 |
|
---|
695 | # Make sure the collection isn't locked by someone else
|
---|
696 | $self->lock_collection($username, $collect);
|
---|
697 |
|
---|
698 | # look up additional args
|
---|
699 | # want either d= or f=
|
---|
700 | my $docid = $self->{'d'};
|
---|
701 | my $import_file = $self->{'f'};
|
---|
702 | if ((!defined $docid) && (!defined $import_file)) {
|
---|
703 | $gsdl_cgi->generate_error("No docid (d=...) or import file (f=) specified.");
|
---|
704 | }
|
---|
705 |
|
---|
706 | # Get the parameters and set default mode to "accumulate"
|
---|
707 | my $metaname = $self->{'metaname'};
|
---|
708 | my $metavalue = $self->{'metavalue'};
|
---|
709 | my $metamode = $self->{'metamode'};
|
---|
710 | if ((!defined $metamode) || ($metamode =~ m/^\s*$/)) {
|
---|
711 | # make "accumulate" the default (less destructive, as won't actually
|
---|
712 | # delete any existing values)
|
---|
713 | $metamode = "accumulate";
|
---|
714 | }
|
---|
715 |
|
---|
716 | # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
|
---|
717 | # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
|
---|
718 | my $metadata_xml_file;
|
---|
719 | my $import_filename = undef;
|
---|
720 | if (defined $docid) {
|
---|
721 | my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir);
|
---|
722 | my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid);
|
---|
723 | my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
|
---|
724 |
|
---|
725 | # This now stores the full pathname
|
---|
726 | $import_filename = $doc_rec->{'src-file'}->[0];
|
---|
727 | }
|
---|
728 | else {
|
---|
729 | $import_filename = &util::filename_cat($collect_dir,$collect,$import_file);
|
---|
730 | }
|
---|
731 |
|
---|
732 | # figure out correct metadata.xml file [?]
|
---|
733 | # Assuming the metadata.xml file is next to the source file
|
---|
734 | # Note: This will not work if it is using the inherited metadata from the parent folder
|
---|
735 | my ($import_tailname, $import_dirname)
|
---|
736 | = File::Basename::fileparse($import_filename);
|
---|
737 | my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
|
---|
738 |
|
---|
739 | # Edit the metadata.xml
|
---|
740 | # Modified by Jeffrey from DL Consulting
|
---|
741 | # Handle the case where there is one metadata.xml file for multiple FileSets
|
---|
742 | # The XML filter needs to know whether it is in the right FileSet
|
---|
743 | # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
|
---|
744 | # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
|
---|
745 | $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
|
---|
746 | $metaname, $metavalue, $metamode, $import_tailname);
|
---|
747 |
|
---|
748 | # Release the lock once it is done
|
---|
749 | $self->unlock_collection($username, $collect);
|
---|
750 | }
|
---|
751 |
|
---|
752 |
|
---|
753 | sub remove_live_metadata
|
---|
754 | {
|
---|
755 | my $self = shift @_;
|
---|
756 |
|
---|
757 | my $username = $self->{'username'};
|
---|
758 | my $collect = $self->{'collect'};
|
---|
759 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
760 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
761 |
|
---|
762 | if ($baseaction::authentication_enabled) {
|
---|
763 | # Ensure the user is allowed to edit this collection
|
---|
764 | &authenticate_user($gsdl_cgi, $username, $collect);
|
---|
765 | }
|
---|
766 |
|
---|
767 | # Obtain the collect dir
|
---|
768 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
769 |
|
---|
770 | # Make sure the collection isn't locked by someone else
|
---|
771 | $self->lock_collection($username, $collect);
|
---|
772 |
|
---|
773 | # look up additional args
|
---|
774 | my $docid = $self->{'d'};
|
---|
775 | if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
|
---|
776 | $gsdl_cgi->generate_error("No docid (d=...) specified.");
|
---|
777 | }
|
---|
778 |
|
---|
779 | # Generate the dbkey
|
---|
780 | my $metaname = $self->{'metaname'};
|
---|
781 | my $dbkey = "$docid.$metaname";
|
---|
782 |
|
---|
783 | # To people who know $collect_tail please add some comments
|
---|
784 | # Obtain the live gdbm_db path
|
---|
785 | my $collect_tail = $collect;
|
---|
786 | $collect_tail =~ s/^.*[\/\\]//;
|
---|
787 | my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
|
---|
788 | my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", "live-$collect_tail", $index_text_directory);
|
---|
789 |
|
---|
790 | # Remove the key
|
---|
791 | my $cmd = "gdbmdel \"$infodb_file_path\" \"$dbkey\"";
|
---|
792 | my $status = system($cmd);
|
---|
793 | if ($status != 0) {
|
---|
794 | # Catch error if gdbmdel failed
|
---|
795 | my $mess = "Failed to set metadata key: $dbkey\n";
|
---|
796 |
|
---|
797 | $mess .= "PATH: $ENV{'PATH'}\n";
|
---|
798 | $mess .= "cmd = $cmd\n";
|
---|
799 | $mess .= "Exit status: $status\n";
|
---|
800 | $mess .= "System Error Message: $!\n";
|
---|
801 |
|
---|
802 | $gsdl_cgi->generate_error($mess);
|
---|
803 | }
|
---|
804 | else {
|
---|
805 | $gsdl_cgi->generate_ok_message("DB remove successful: Key[$metaname]");
|
---|
806 | }
|
---|
807 |
|
---|
808 | }
|
---|
809 |
|
---|
810 |
|
---|
811 | sub remove_metadata
|
---|
812 | {
|
---|
813 | my $self = shift @_;
|
---|
814 |
|
---|
815 | my $username = $self->{'username'};
|
---|
816 | my $collect = $self->{'collect'};
|
---|
817 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
818 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
819 |
|
---|
820 | # don't user authenticate for now
|
---|
821 | if ($baseaction::authentication_enabled) {
|
---|
822 | # Ensure the user is allowed to edit this collection
|
---|
823 | &authenticate_user($gsdl_cgi, $username, $collect);
|
---|
824 | }
|
---|
825 |
|
---|
826 | # Obtain the collect dir
|
---|
827 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
828 |
|
---|
829 | # Make sure the collection isn't locked by someone else
|
---|
830 | $self->lock_collection($username, $collect);
|
---|
831 |
|
---|
832 | # look up additional args
|
---|
833 | my $docid = $self->{'d'};
|
---|
834 | if ((!defined $docid) || ($docid =~ m/^\s*$/)) {
|
---|
835 | $gsdl_cgi->generate_error("No docid (d=...) specified.");
|
---|
836 | }
|
---|
837 | my $metaname = $self->{'metaname'};
|
---|
838 | my $metapos = $self->{'metapos'};
|
---|
839 |
|
---|
840 | # To people who know $collect_tail please add some comments
|
---|
841 | # Obtain the path to the database
|
---|
842 | my $collect_tail = $collect;
|
---|
843 | $collect_tail =~ s/^.*[\/\\]//;
|
---|
844 | my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
|
---|
845 | my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory);
|
---|
846 |
|
---|
847 | # Read the docid entry
|
---|
848 | my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid);
|
---|
849 | my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
|
---|
850 |
|
---|
851 | # Basically loop through and unescape_html the values
|
---|
852 | foreach my $k (keys %$doc_rec) {
|
---|
853 | my @escaped_v = ();
|
---|
854 | foreach my $v (@{$doc_rec->{$k}}) {
|
---|
855 | if ($k eq "contains") {
|
---|
856 | # protect quotes in ".2;".3 etc
|
---|
857 | $v =~ s/\"/\\\"/g;
|
---|
858 | push(@escaped_v, $v);
|
---|
859 | }
|
---|
860 | else {
|
---|
861 | my $ev = &ghtml::unescape_html($v);
|
---|
862 | $ev =~ s/\"/\\\"/g;
|
---|
863 | push(@escaped_v, $ev);
|
---|
864 | }
|
---|
865 | }
|
---|
866 | $doc_rec->{$k} = \@escaped_v;
|
---|
867 | }
|
---|
868 |
|
---|
869 | # Check to make sure the key does exist
|
---|
870 | if (!defined ($doc_rec->{$metaname})) {
|
---|
871 | $gsdl_cgi->generate_error("No metadata field \"" . $metaname . "\" in the specified document: [" . $docid . "]");
|
---|
872 | }
|
---|
873 |
|
---|
874 | # Obtain the specified metadata pos
|
---|
875 | $metapos = 0 if (!defined $metapos);
|
---|
876 |
|
---|
877 | # consider check key is defined before deleting?
|
---|
878 | # Loop through the metadata array and ignore the specified position
|
---|
879 | my $filtered_metadata = [];
|
---|
880 | my $num_metadata_vals = scalar(@{$doc_rec->{$metaname}});
|
---|
881 | for (my $i=0; $i<$num_metadata_vals; $i++) {
|
---|
882 | my $metavalue = shift(@{$doc_rec->{$metaname}});
|
---|
883 |
|
---|
884 | if ($i != $metapos) {
|
---|
885 | push(@$filtered_metadata,$metavalue)
|
---|
886 | }
|
---|
887 | }
|
---|
888 | $doc_rec->{$metaname} = $filtered_metadata;
|
---|
889 |
|
---|
890 | # Turn the record back to string
|
---|
891 | my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
|
---|
892 |
|
---|
893 | # Store it back to the database
|
---|
894 | my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
|
---|
895 | my $status = system($cmd);
|
---|
896 | if ($status != 0) {
|
---|
897 | my $mess = "Failed to set metadata key: $docid\n";
|
---|
898 |
|
---|
899 | $mess .= "PATH: $ENV{'PATH'}\n";
|
---|
900 | $mess .= "cmd = $cmd\n";
|
---|
901 | $mess .= "Exit status: $status\n";
|
---|
902 | $mess .= "System Error Message: $!\n";
|
---|
903 |
|
---|
904 | $gsdl_cgi->generate_error($mess);
|
---|
905 | }
|
---|
906 | else {
|
---|
907 | my $mess = "DB set (with item deleted) successful: Key[$docid]\n";
|
---|
908 | $mess .= " $metaname";
|
---|
909 | $mess .= "->[$metapos]" if (defined $metapos);
|
---|
910 |
|
---|
911 | $gsdl_cgi->generate_ok_message($mess);
|
---|
912 | }
|
---|
913 | }
|
---|
914 |
|
---|
915 |
|
---|
916 | # Was trying to reused the codes, but the functions need to be broken down more before they can be reused, otherwise there will be too much overhead and duplicate process...
|
---|
917 | sub insert_metadata
|
---|
918 | {
|
---|
919 | my $self = shift @_;
|
---|
920 |
|
---|
921 | my $username = $self->{'username'};
|
---|
922 | my $collect = $self->{'collect'};
|
---|
923 | my $gsdl_cgi = $self->{'gsdl_cgi'};
|
---|
924 | my $gsdlhome = $self->{'gsdlhome'};
|
---|
925 |
|
---|
926 | # If the import metadata and gdbm database have been updated, we need to insert some notification to warn user that the the text they see at the moment is not indexed and require a rebuild.
|
---|
927 | my $rebuild_pending_macro = "_rebuildpendingmessage_";
|
---|
928 |
|
---|
929 | # don't user authenticate for now
|
---|
930 | if ($baseaction::authentication_enabled) {
|
---|
931 | # Ensure the user is allowed to edit this collection
|
---|
932 | $self->authenticate_user($username, $collect);
|
---|
933 | }
|
---|
934 |
|
---|
935 | # Obtain the collect and archive dir
|
---|
936 | my $collect_dir = &util::filename_cat($gsdlhome, "collect");
|
---|
937 | my $archive_dir = &util::filename_cat($collect_dir,$collect,"archives");
|
---|
938 |
|
---|
939 | # Make sure the collection isn't locked by someone else
|
---|
940 | $self->lock_collection($username, $collect);
|
---|
941 |
|
---|
942 | # Check additional args
|
---|
943 | my $docid = $self->{'d'};
|
---|
944 | if (!defined($docid)) {
|
---|
945 | $gsdl_cgi->generate_error("No document id is specified: d=...");
|
---|
946 | }
|
---|
947 | my $metaname = $self->{'metaname'};
|
---|
948 | if (!defined($metaname)) {
|
---|
949 | $gsdl_cgi->generate_error("No metaname is specified: metadataname=...");
|
---|
950 | }
|
---|
951 | my $metavalue = $self->{'metavalue'};
|
---|
952 | if (!defined($metavalue) || $metavalue eq "") {
|
---|
953 | $gsdl_cgi->generate_error("No metavalue or empty metavalue is specified: metadataname=...");
|
---|
954 | }
|
---|
955 | # make "accumulate" the default (less destructive, as won't actually
|
---|
956 | # delete any existing values)
|
---|
957 | my $metamode = "accumulate";
|
---|
958 |
|
---|
959 | #=======================================================================#
|
---|
960 | # set_import_metadata [START]
|
---|
961 | #=======================================================================#
|
---|
962 | # Obtain where the metadata.xml is from the archiveinfo-doc.gdb file
|
---|
963 | # If the doc oid is not specified, we assume the metadata.xml is next to the specified "f"
|
---|
964 | my $metadata_xml_file;
|
---|
965 | my $arcinfo_doc_filename = &dbutil::get_infodb_file_path("gdbm", "archiveinf-doc", $archive_dir);
|
---|
966 | my $archive_doc_rec_string = &dbutil::read_infodb_entry("gdbm", $arcinfo_doc_filename, $docid);
|
---|
967 | my $archive_doc_rec = &dbutil::convert_infodb_string_to_hash($archive_doc_rec_string);
|
---|
968 |
|
---|
969 | # This now stores the full pathname
|
---|
970 | my $import_filename = $archive_doc_rec->{'src-file'}->[0];
|
---|
971 |
|
---|
972 | # figure out correct metadata.xml file [?]
|
---|
973 | # Assuming the metadata.xml file is next to the source file
|
---|
974 | # Note: This will not work if it is using the inherited metadata from the parent folder
|
---|
975 | my ($import_tailname, $import_dirname)
|
---|
976 | = File::Basename::fileparse($import_filename);
|
---|
977 | my $metadata_xml_filename = &util::filename_cat($import_dirname,"metadata.xml");
|
---|
978 |
|
---|
979 | # Shane's escape characters
|
---|
980 | $metavalue = pack "U0C*", unpack "C*", $metavalue;
|
---|
981 | $metavalue =~ s/\,/,/g;
|
---|
982 | $metavalue =~ s/\:/:/g;
|
---|
983 | $metavalue =~ s/\|/|/g;
|
---|
984 | $metavalue =~ s/\(/(/g;
|
---|
985 | $metavalue =~ s/\)/)/g;
|
---|
986 | $metavalue =~ s/\[/[/g;
|
---|
987 | $metavalue =~ s/\\/\/g;
|
---|
988 | $metavalue =~ s/\]/]/g;
|
---|
989 | $metavalue =~ s/\{/{/g;
|
---|
990 | $metavalue =~ s/\}/}/g;
|
---|
991 | $metavalue =~ s/\"/"/g;
|
---|
992 | $metavalue =~ s/\`/`/g;
|
---|
993 | $metavalue =~ s/\n/_newline_/g;
|
---|
994 |
|
---|
995 | # Edit the metadata.xml
|
---|
996 | # Modified by Jeffrey from DL Consulting
|
---|
997 | # Handle the case where there is one metadata.xml file for multiple FileSets
|
---|
998 | # The XML filter needs to know whether it is in the right FileSet
|
---|
999 | # TODO: This doesn't fix the problem where the metadata.xml is not next to the src file.
|
---|
1000 | # TODO: This doesn't handle the common metadata (where FileName doesn't point to a single file)
|
---|
1001 | $self->edit_metadata_xml($gsdl_cgi, $metadata_xml_filename,
|
---|
1002 | $metaname, $metavalue, $metamode, $import_tailname);
|
---|
1003 | #=======================================================================#
|
---|
1004 | # set_import_metadata [END]
|
---|
1005 | #=======================================================================#
|
---|
1006 |
|
---|
1007 |
|
---|
1008 | #=======================================================================#
|
---|
1009 | # set_metadata (accumulate version) [START]
|
---|
1010 | #=======================================================================#
|
---|
1011 | # To people who know $collect_tail please add some comments
|
---|
1012 | # Obtain path to the database
|
---|
1013 | my $collect_tail = $collect;
|
---|
1014 | $collect_tail =~ s/^.*[\/\\]//;
|
---|
1015 | my $index_text_directory = &util::filename_cat($collect_dir,$collect,"index","text");
|
---|
1016 | my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collect_tail, $index_text_directory);
|
---|
1017 |
|
---|
1018 | # Read the docid entry
|
---|
1019 | my $doc_rec_string = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $docid);
|
---|
1020 | my $doc_rec = &dbutil::convert_infodb_string_to_hash($doc_rec_string);
|
---|
1021 | foreach my $k (keys %$doc_rec) {
|
---|
1022 | my @escaped_v = ();
|
---|
1023 | foreach my $v (@{$doc_rec->{$k}}) {
|
---|
1024 | if ($k eq "contains") {
|
---|
1025 | # protect quotes in ".2;".3 etc
|
---|
1026 | $v =~ s/\"/\\\"/g;
|
---|
1027 | push(@escaped_v, $v);
|
---|
1028 | }
|
---|
1029 | else {
|
---|
1030 | my $ev = &ghtml::unescape_html($v);
|
---|
1031 | $ev =~ s/\"/\\\"/g;
|
---|
1032 | push(@escaped_v, $ev);
|
---|
1033 | }
|
---|
1034 | }
|
---|
1035 | $doc_rec->{$k} = \@escaped_v;
|
---|
1036 | }
|
---|
1037 |
|
---|
1038 | # Protect the quotes
|
---|
1039 | $metavalue =~ s/\"/\\\"/g;
|
---|
1040 |
|
---|
1041 | # Adds the pending macro
|
---|
1042 | my $macro_metavalue = $rebuild_pending_macro . $metavalue;
|
---|
1043 |
|
---|
1044 | # If the metadata doesn't exist, create a new one
|
---|
1045 | if (!defined($doc_rec->{$metaname})){
|
---|
1046 | $doc_rec->{$metaname} = [ $macro_metavalue ];
|
---|
1047 | }
|
---|
1048 | # Else, let's acculumate the values
|
---|
1049 | else {
|
---|
1050 | push(@{$doc_rec->{$metaname}},$macro_metavalue);
|
---|
1051 | }
|
---|
1052 |
|
---|
1053 | # Generate the record string
|
---|
1054 | my $serialized_doc_rec = &dbutil::convert_infodb_hash_to_string($doc_rec);
|
---|
1055 |
|
---|
1056 | # Store it into GDBM
|
---|
1057 | my $cmd = "gdbmset \"$infodb_file_path\" \"$docid\" \"$serialized_doc_rec\"";
|
---|
1058 | my $status = system($cmd);
|
---|
1059 | if ($status != 0) {
|
---|
1060 | # Catch error if gdbmget failed
|
---|
1061 | my $mess = "Failed to set metadata key: $docid\n";
|
---|
1062 |
|
---|
1063 | $mess .= "PATH: $ENV{'PATH'}\n";
|
---|
1064 | $mess .= "cmd = $cmd\n";
|
---|
1065 | $mess .= "Exit status: $status\n";
|
---|
1066 | $mess .= "System Error Message: $!\n";
|
---|
1067 |
|
---|
1068 | $gsdl_cgi->generate_error($mess);
|
---|
1069 | }
|
---|
1070 | else {
|
---|
1071 | my $mess = "insert-metadata successful: Key[$docid]\n";
|
---|
1072 | $mess .= " [In metadata.xml] $metaname";
|
---|
1073 | $mess .= " = $metavalue\n";
|
---|
1074 | $mess .= " [In database] $metaname";
|
---|
1075 | $mess .= " = $macro_metavalue\n";
|
---|
1076 | $mess .= " The new text has not been indexed, rebuilding collection is required\n";
|
---|
1077 | $gsdl_cgi->generate_ok_message($mess);
|
---|
1078 | }
|
---|
1079 | #=======================================================================#
|
---|
1080 | # set_metadata (accumulate version) [END]
|
---|
1081 | #=======================================================================#
|
---|
1082 |
|
---|
1083 | # Release the lock once it is done
|
---|
1084 | $self->unlock_collection($username, $collect);
|
---|
1085 | }
|
---|
1086 |
|
---|
1087 | 1;
|
---|