source: main/trunk/greenstone2/perllib/plugins/BasePlugin.pm@ 23347

Last change on this file since 23347 was 23347, checked in by davidb, 13 years ago

Tidy up of debugging statements for handline filename encodings, plus finishing off the 'deduce_filename_encoding' routine

  • Property svn:keywords set to Author Date Id Revision
File size: 42.2 KB
Line 
1###########################################################################
2#
3# BasePlugin.pm -- base class for all the import plugins
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999-2005 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26package BasePlugin;
27
28use strict;
29no strict 'subs';
30no strict 'refs'; # allow filehandles to be variables and viceversa
31
32use File::Basename;
33use Encode;
34
35use encodings;
36use unicode;
37use doc;
38use ghtml;
39use gsprintf 'gsprintf';
40
41use PrintInfo;
42
43BEGIN {
44 @BasePlugin::ISA = ( 'PrintInfo' );
45}
46
47# the different methods that can be applied when renaming
48# imported documents and their associated files
49our $file_rename_method_list =
50 [ { 'name' => "url",
51 'desc' => "{BasePlugin.rename_method.url}" },
52 { 'name' => "base64",
53 'desc' => "{BasePlugin.rename_method.base64}" },
54 { 'name' => "none",
55 'desc' => "{BasePlugin.rename_method.none}",
56 'hiddengli' => "yes" } ];
57
58our $encoding_list =
59 [ { 'name' => "ascii",
60 'desc' => "{BasePlugin.encoding.ascii}" },
61 { 'name' => "utf8",
62 'desc' => "{BasePlugin.encoding.utf8}" },
63 { 'name' => "unicode",
64 'desc' => "{BasePlugin.encoding.unicode}" } ];
65
66
67my $e = $encodings::encodings;
68foreach my $enc (sort {$e->{$a}->{'name'} cmp $e->{$b}->{'name'}} keys (%$e))
69{
70 my $hashEncode =
71 {'name' => $enc,
72 'desc' => $e->{$enc}->{'name'}};
73
74 push(@{$encoding_list},$hashEncode);
75}
76
77our $encoding_plus_auto_list =
78 [ { 'name' => "auto",
79 'desc' => "{BasePlugin.filename_encoding.auto}" },
80 { 'name' => "auto-language-analysis",
81 'desc' => "{BasePlugin.filename_encoding.auto_language_analysis}" }, # textcat
82 { 'name' => "auto-filesystem-encoding",
83 'desc' => "{BasePlugin.filename_encoding.auto_filesystem_encoding}" }, # locale
84 { 'name' => "auto-fl",
85 'desc' => "{BasePlugin.filename_encoding.auto_fl}" }, # locale followed by textcat
86 { 'name' => "auto-lf",
87 'desc' => "{BasePlugin.filename_encoding.auto_lf}" } ]; # texcat followed by locale
88
89push(@{$encoding_plus_auto_list},@{$encoding_list});
90
91our $oidtype_list =
92 [ { 'name' => "auto",
93 'desc' => "{BasePlugin.OIDtype.auto}" },
94 { 'name' => "hash",
95 'desc' => "{import.OIDtype.hash}" },
96 { 'name' => "assigned",
97 'desc' => "{import.OIDtype.assigned}" },
98 { 'name' => "incremental",
99 'desc' => "{import.OIDtype.incremental}" },
100 { 'name' => "dirname",
101 'desc' => "{import.OIDtype.dirname}" } ];
102
103my $arguments =
104 [ { 'name' => "process_exp",
105 'desc' => "{BasePlugin.process_exp}",
106 'type' => "regexp",
107 'deft' => "",
108 'reqd' => "no" },
109 { 'name' => "no_blocking",
110 'desc' => "{BasePlugin.no_blocking}",
111 'type' => "flag",
112 'reqd' => "no"},
113 { 'name' => "block_exp",
114 'desc' => "{BasePlugin.block_exp}",
115 'type' => "regexp",
116 'deft' => "",
117 'reqd' => "no" },
118 { 'name' => "store_original_file",
119 'desc' => "{BasePlugin.store_original_file}",
120 'type' => "flag",
121 'reqd' => "no" },
122 { 'name' => "associate_ext",
123 'desc' => "{BasePlugin.associate_ext}",
124 'type' => "string",
125 'reqd' => "no" },
126 { 'name' => "associate_tail_re",
127 'desc' => "{BasePlugin.associate_tail_re}",
128 'type' => "string",
129 'reqd' => "no" },
130 { 'name' => "OIDtype",
131 'desc' => "{import.OIDtype}",
132 'type' => "enum",
133 'list' => $oidtype_list,
134 # leave default empty so we can tell if its been set or not - if not set will use option from import.pl
135 'deft' => "auto",
136 'reqd' => "no" },
137 { 'name' => "OIDmetadata",
138 'desc' => "{import.OIDmetadata}",
139 'type' => "metadata",
140 'deft' => "dc.Identifier",
141 'reqd' => "no" },
142# { 'name' => "use_as_doc_identifier",
143# 'desc' => "{BasePlugin.use_as_doc_identifier}",
144# 'type' => "string",
145# 'reqd' => "no" ,
146# 'deft' => "" } ,
147 { 'name' => "no_cover_image",
148 'desc' => "{BasePlugin.no_cover_image}",
149 'type' => "flag",
150 'reqd' => "no" },
151 { 'name' => "filename_encoding",
152 'desc' => "{BasePlugin.filename_encoding}",
153 'type' => "enum",
154 'deft' => "auto",
155 'list' => $encoding_plus_auto_list,
156 'reqd' => "no" },
157 { 'name' => "smart_block",
158 'desc' => "{common.deprecated}. {BasePlugin.smart_block}",
159 'type' => "flag",
160 'reqd' => "no",
161 'hiddengli' => "yes" }, # deprecated, but leave in for old collections
162 { 'name' => "file_rename_method",
163 'desc' => "{BasePlugin.file_rename_method}",
164 'type' => "enum",
165 'deft' => &get_default_file_rename_method(), # by default rename imported files and assoc files using this encoding
166 'list' => $file_rename_method_list,
167 'reqd' => "no"
168 }
169
170 ];
171
172
173my $options = { 'name' => "BasePlugin",
174 'desc' => "{BasePlugin.desc}",
175 'abstract' => "yes",
176 'inherits' => "no",
177 'args' => $arguments };
178
179sub new {
180
181 my ($class) = shift (@_);
182 my ($pluginlist,$inputargs,$hashArgOptLists,$auxiliary) = @_;
183 push(@$pluginlist, $class);
184
185 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
186 push(@{$hashArgOptLists->{"OptList"}},$options);
187
188 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists,$auxiliary);
189
190 if ($self->{'info_only'}) {
191 # don't worry about any options etc
192 return bless $self, $class;
193 }
194
195 if ($self->{'smart_block'}) {
196 print STDERR "WARNING: -smart_block option has been deprecated and is no longer useful\n";
197 }
198 $self->{'smart_block'} = undef;
199
200 my $plugin_name = (defined $pluginlist->[0]) ? $pluginlist->[0] : $class;
201 $self->{'plugin_type'} = $plugin_name;
202
203 # remove ex. from OIDmetadata
204 $self->{'OIDmetadata'} =~ s/^ex\.// if defined $self->{'OIDmetadata'};
205 $self->{'num_processed'} = 0;
206 $self->{'num_not_processed'} = 0;
207 $self->{'num_blocked'} = 0;
208 $self->{'num_archives'} = 0;
209 $self->{'cover_image'} = 1; # cover image is on by default
210 $self->{'cover_image'} = 0 if ($self->{'no_cover_image'});
211 $self->{'can_process_directories'} = 0;
212 #$self->{'option_list'} = $hashArgOptLists->{"OptList"};
213
214 my $associate_ext = $self->{'associate_ext'};
215 if ((defined $associate_ext) && ($associate_ext ne "")) {
216
217 my $associate_tail_re = $self->{'associate_tail_re'};
218 if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
219 my $outhandle = $self->{'outhandle'};
220 print $outhandle "Warning: can only specify 'associate_ext' or 'associate_tail_re'\n";
221 print $outhandle " defaulting to 'associate_tail_re'\n";
222 }
223 else {
224 my @exts = split(/,/,$associate_ext);
225
226 my @exts_bracketed = map { $_ = "(?:\\.$_)" } @exts;
227 my $associate_tail_re = join("|",@exts_bracketed);
228 $self->{'associate_tail_re'} = $associate_tail_re;
229 }
230
231 delete $self->{'associate_ext'};
232 }
233
234 return bless $self, $class;
235
236}
237
238sub merge_inheritance
239{
240 my $self = {};
241 my @child_selfs = @_;
242
243 foreach my $child_self (@child_selfs) {
244 foreach my $key (keys %$child_self) {
245 if (defined $self->{$key}) {
246 if ($self->{$key} ne $child_self->{$key}) {
247# print STDERR "Warning: Conflicting value in multiple inheritance for '$key'\n";
248# print STDERR "Existing stored value = $self->{$key}\n";
249# print STDERR "New (child) value = $child_self->{$key}\n";
250# print STDERR "Keeping existing value\n";
251 # Existing value seems to be option specified in collect.cfg
252
253 ### $self->{$key} = $child_self->{$key};
254
255 }
256 else {
257## print STDERR "****Info: Value $self->{$key} for $key already defined through multiple inheritance as the same value\n";
258 }
259
260 }
261 else {
262 $self->{$key} = $child_self->{$key};
263 }
264 }
265 }
266
267 return $self;
268}
269
270# initialize BasePlugin options
271# if init() is overridden in a sub-class, remember to call BasePlugin::init()
272sub init {
273 my $self = shift (@_);
274 my ($verbosity, $outhandle, $failhandle) = @_;
275
276 # verbosity is passed through from the processor
277 $self->{'verbosity'} = $verbosity;
278
279 # as are the outhandle and failhandle
280 $self->{'outhandle'} = $outhandle if defined $outhandle;
281 $self->{'failhandle'} = $failhandle;
282# $self->SUPER::init(@_);
283
284 # set process_exp and block_exp to defaults unless they were
285 # explicitly set
286
287 if ((!$self->is_recursive()) and
288 (!defined $self->{'process_exp'}) || ($self->{'process_exp'} eq "")) {
289
290 $self->{'process_exp'} = $self->get_default_process_exp ();
291 if ($self->{'process_exp'} eq "") {
292 warn ref($self) . " Warning: Non-recursive plugin has no process_exp\n";
293 }
294 }
295
296 if ((!defined $self->{'block_exp'}) || ($self->{'block_exp'} eq "")) {
297 $self->{'block_exp'} = $self->get_default_block_exp ();
298 }
299
300}
301
302sub begin {
303 my $self = shift (@_);
304 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
305
306 if ($self->{'OIDtype'} eq "auto") {
307 # hasn't been set in the plugin, use the processor values
308 $self->{'OIDtype'} = $processor->{'OIDtype'};
309 $self->{'OIDmetadata'} = $processor->{'OIDmetadata'};
310 }
311 if ($self->{'OIDtype'} eq "hash") {
312 # should we hash on the file or on the doc xml??
313 $self->{'OIDtype'} = $self->get_oid_hash_type();
314 if ($self->{'OIDtype'} !~ /^(hash_on_file|hash_on_ga_xml)$/) {
315 $self->{'OIDtype'} = "hash_on_file";
316 }
317 }
318}
319
320# This is called once if removeold is set with import.pl. Most plugins will do
321# nothing but if a plugin does any stuff outside of creating doc obj, then
322# it may need to clear something.
323sub remove_all {
324 my $self = shift (@_);
325 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
326}
327
328# This is called per document for docs that have been deleted from the
329# collection. Most plugins will do nothing
330# but if a plugin does any stuff outside of creating doc obj, then it may need
331# to clear something.
332sub remove_one {
333 my $self = shift (@_);
334
335 my ($file, $oids, $archivedir) = @_;
336 return 0 if $self->can_process_this_file($file);
337 return undef;
338}
339
340sub end {
341 # potentially called at the end of each plugin pass
342 # import.pl only has one plugin pass, but buildcol.pl has multiple ones
343
344 my ($self) = shift (@_);
345}
346
347sub deinit {
348 # called only once, after all plugin passes have been done
349
350 my ($self) = @_;
351}
352
353# default hashing type is to hash on the original file (or converted file)
354# override this to return hash_on_ga_xml for filetypes where hashing on the
355# file is no good eg video
356sub get_oid_hash_type {
357
358 my $self = shift (@_);
359
360 return "hash_on_file";
361}
362
363
364# this function should be overridden to return 1
365# in recursive plugins
366sub is_recursive {
367 my $self = shift (@_);
368
369 return 0;
370}
371
372sub get_default_block_exp {
373 my $self = shift (@_);
374
375 return "";
376}
377
378sub get_default_process_exp {
379 my $self = shift (@_);
380
381 return "";
382}
383
384# rename imported files and assoc files using URL encoding by default
385# as this will work for most plugins and give more legible filenames
386sub get_default_file_rename_method() {
387 my $self = shift (@_);
388 return "url";
389}
390
391# returns this plugin's active (possibly user-selected) file_rename_method
392sub get_file_rename_method() {
393 my $self = shift (@_);
394 my $rename_method = $self->{'file_rename_method'};
395 if($rename_method) {
396 return $rename_method;
397 } else {
398 return $self->get_default_file_rename_method();
399 }
400}
401
402# default implementation is to do nothing
403sub store_block_files {
404
405 my $self =shift (@_);
406 my ($filename_full_path, $block_hash) = @_;
407
408}
409
410# put files to block into hash
411sub use_block_expressions {
412
413 my $self =shift (@_);
414 my ($filename_full_path, $block_hash) = @_;
415
416 if ($self->{'block_exp'} ne "" && $filename_full_path =~ /$self->{'block_exp'}/) {
417 $block_hash->{'file_blocks'}->{$filename_full_path} = 1;
418 }
419
420}
421
422#default implementation is to block a file with same name as this, but extension jpg or JPG, if cover_images is on.
423sub block_cover_image
424{
425 my $self =shift;
426 my ($filename, $block_hash) = @_;
427
428 if ($self->{'cover_image'}) {
429 my $coverfile = $filename;
430 $coverfile =~ s/\.[^\\\/\.]+$/\.jpg/;
431 if (!-e $coverfile) {
432 $coverfile =~ s/jpg$/JPG/;
433 }
434 if (-e $coverfile) {
435 $block_hash->{'file_blocks'}->{$coverfile} = 1;
436 }
437 }
438
439 return;
440}
441
442
443# discover all the files that should be blocked by this plugin
444# check the args ...
445sub file_block_read {
446
447 my $self = shift (@_);
448 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $gli) = @_;
449 # Keep track of filenames with same root but different extensions
450 # Used to support -associate_ext and the more generalised
451 # -associate_tail_re
452 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
453
454 if (!-d $filename_full_path) {
455 $block_hash->{'all_files'}->{$file} = 1;
456 }
457
458 my $associate_tail_re = $self->{'associate_tail_re'};
459 if ((defined $associate_tail_re) && ($associate_tail_re ne "")) {
460 my ($file_prefix,$file_ext)
461 = &util::get_prefix_and_tail_by_regex($filename_full_path,$associate_tail_re);
462 if ((defined $file_prefix) && (defined $file_ext)) {
463 my $shared_fileroot = $block_hash->{'shared_fileroot'};
464 if (!defined $shared_fileroot->{$file_prefix}) {
465 my $file_prefix_rec = { 'tie_to' => undef,
466 'exts' => {} };
467 $shared_fileroot->{$file_prefix} = $file_prefix_rec;
468 }
469
470 my $file_prefix_rec = $shared_fileroot->{$file_prefix};
471
472 if ($self->can_process_this_file($filename_full_path)) {
473 # This is the document the others should be tied to
474 $file_prefix_rec->{'tie_to'} = $file_ext;
475 }
476 else {
477 if ($file_ext =~ m/$associate_tail_re$/) {
478 # this file should be associated to the main one
479 $file_prefix_rec->{'exts'}->{$file_ext} = 1;
480 }
481 }
482
483 }
484 }
485
486 # check block expressions
487 $self->use_block_expressions($filename_full_path, $block_hash) unless $self->{'no_blocking'};
488
489 # now check whether we are actually processing this
490 if (!-f $filename_full_path || !$self->can_process_this_file($filename_full_path)) {
491 return undef; # can't recognise
492 }
493
494 # if we have a block_exp, then this overrides the normal 'smart' blocking
495 $self->store_block_files($filename_full_path, $block_hash) unless ($self->{'no_blocking'} || $self->{'block_exp'} ne "");
496
497 # block the cover image if there is one
498 if ($self->{'cover_image'}) {
499 $self->block_cover_image($filename_full_path, $block_hash);
500 }
501
502 return 1;
503}
504
505# plugins that rely on more than process_exp (eg XML plugins) can override this method
506sub can_process_this_file {
507 my $self = shift(@_);
508 my ($filename) = @_;
509
510 if (-d $filename && !$self->{'can_process_directories'}) {
511 return 0;
512 }
513
514 if ($self->{'process_exp'} ne "" && $filename =~ /$self->{'process_exp'}/) {
515 return 1;
516 }
517 return 0;
518
519}
520
521# just converts path as is to utf8.
522sub filepath_to_utf8 {
523 my $self = shift (@_);
524 my ($file, $file_encoding) = @_;
525 my $filemeta = $file;
526
527 my $filename_encoding = $self->{'filename_encoding'}; # filename encoding setting
528
529 # Whenever filename-encoding is set to any of the auto settings, we
530 # check if the filename is already in UTF8. If it is, then we're done.
531 if($filename_encoding =~ m/auto/) {
532 if(&unicode::check_is_utf8($filemeta))
533 {
534 $filename_encoding = "utf8";
535 return $filemeta;
536 }
537 }
538
539 # Auto setting, but filename is not utf8
540 if ($filename_encoding eq "auto")
541 {
542 # try textcat
543 $filename_encoding = $self->textcat_encoding($filemeta);
544
545 # check the locale next
546 $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
547
548
549 # now try the encoding of the document, if available
550 if ($filename_encoding eq "undefined" && defined $file_encoding) {
551 $filename_encoding = $file_encoding;
552 }
553
554 }
555
556 elsif ($filename_encoding eq "auto-language-analysis")
557 {
558 $filename_encoding = $self->textcat_encoding($filemeta);
559
560 # now try the encoding of the document, if available
561 if ($filename_encoding eq "undefined" && defined $file_encoding) {
562 $filename_encoding = $file_encoding;
563 }
564 }
565
566 elsif ($filename_encoding eq "auto-filesystem-encoding")
567 {
568 # try locale
569 $filename_encoding = $self->locale_encoding();
570 }
571
572 elsif ($filename_encoding eq "auto-fl")
573 {
574 # filesystem-encoding (locale) then language-analysis (textcat)
575 $filename_encoding = $self->locale_encoding();
576
577 # try textcat
578 $filename_encoding = $self->textcat_encoding($filemeta) if $filename_encoding eq "undefined";
579
580 # else assume filename encoding is encoding of file content, if that's available
581 if ($filename_encoding eq "undefined" && defined $file_encoding) {
582 $filename_encoding = $file_encoding;
583 }
584 }
585
586 elsif ($filename_encoding eq "auto-lf")
587 {
588 # language-analysis (textcat) then filesystem-encoding (locale)
589 $filename_encoding = $self->textcat_encoding($filemeta);
590
591 # guess filename encoding from encoding of file content, if available
592 if ($filename_encoding eq "undefined" && defined $file_encoding) {
593 $filename_encoding = $file_encoding;
594 }
595
596 # try locale
597 $filename_encoding = $self->locale_encoding() if $filename_encoding eq "undefined";
598 }
599
600 # if still undefined, use utf8 as fallback
601 if ($filename_encoding eq "undefined") {
602 $filename_encoding = "utf8";
603 }
604
605 #print STDERR "**** UTF8 encoding the filename $filemeta ";
606
607 # if the filename encoding is set to utf8 but it isn't utf8 already--such as when
608 # 1. the utf8 fallback is used, or 2. if the system locale is used and happens to
609 # be always utf8 (in which case the filename's encoding is also set as utf8 even
610 # though the filename need not be if it originates from another system)--in such
611 # cases attempt to make the filename utf8 to match.
612 if($filename_encoding eq "utf8" && !&unicode::check_is_utf8($filemeta)) {
613 &unicode::ensure_utf8(\$filemeta);
614 }
615
616 # convert non-unicode encodings to utf8
617 if ($filename_encoding !~ m/(?:ascii|utf8|unicode)/) {
618 $filemeta = &unicode::unicode2utf8(
619 &unicode::convert2unicode($filename_encoding, \$filemeta)
620 );
621 }
622
623 #print STDERR " from encoding $filename_encoding -> $filemeta\n";
624 return $filemeta;
625}
626
627# gets the filename with no path, converts to utf8, and then dm safes it.
628# filename_encoding set by user
629sub filename_to_utf8_metadata
630{
631 my $self = shift (@_);
632 my ($file, $file_encoding) = @_;
633
634 my $outhandle = $self->{'outhandle'};
635
636 print $outhandle "****!!!!**** BasePlugin::filename_to_utf8_metadata now deprecated\n";
637 my ($cpackage,$cfilename,$cline,$csubr,$chas_args,$cwantarray) = caller(0);
638 print $outhandle "Calling method: $cfilename:$cline $cpackage->$csubr\n";
639
640
641 my ($filemeta) = $file =~ /([^\\\/]+)$/; # getting the tail of the filepath (skips all string parts containing slashes upto the end)
642 $filemeta = $self->filepath_to_utf8($filemeta, $file_encoding);
643
644 return $filemeta;
645}
646
647sub locale_encoding {
648 my $self = shift(@_);
649
650 if (!defined $self->{'filesystem_encoding'}) {
651 $self->{'filesystem_encoding'} = $self->get_filesystem_encoding();
652 $self->{'filesystem_encoding'} = "undefined" if !defined $self->{'filesystem_encoding'};
653 }
654
655 #print STDERR "*** filename encoding determined based on locale: " . $self->{'filesystem_encoding'} . "\n";
656 return $self->{'filesystem_encoding'}; # can be the string "undefined"
657}
658
659sub textcat_encoding {
660 my $self = shift(@_);
661 my ($filemeta) = @_;
662
663 # analyse filenames without extensions and digits (and trimmed of
664 # surrounding whitespace), so that irrelevant chars don't confuse
665 # textcat
666 my $strictfilemeta = $filemeta;
667 $strictfilemeta =~ s/\.[^\.]+$//g;
668 $strictfilemeta =~ s/\d//g;
669 $strictfilemeta =~ s/^\s*//g;
670 $strictfilemeta =~ s/\s*$//g;
671
672 my $filename_encoding = $self->encoding_from_language_analysis($strictfilemeta);
673 if(!defined $filename_encoding) {
674 $filename_encoding = "undefined";
675 }
676
677 return $filename_encoding; # can be the string "undefined"
678}
679
680# performs textcat
681sub encoding_from_language_analysis {
682 my $self = shift(@_);
683 my ($text) = @_;
684
685 my $outhandle = $self->{'outhandle'};
686 my $best_encoding = undef;
687
688 # get the language/encoding of the textstring using textcat
689 require textcat; # Only load the textcat module if it is required
690 $self->{'textcat'} = new textcat() unless defined($self->{'textcat'});
691 my $results = $self->{'textcat'}->classify_cached_filename(\$text);
692
693
694 if (scalar @$results < 0) {
695 return undef;
696 }
697
698 # We have some results, we choose the first
699 my ($language, $encoding) = $results->[0] =~ /^([^-]*)(?:-(.*))?$/;
700
701 $best_encoding = $encoding;
702 if (!defined $best_encoding) {
703 return undef;
704 }
705
706 if (defined $best_encoding && $best_encoding =~ m/^iso_8859/ && &unicode::check_is_utf8($text)) {
707 # the text is valid utf8, so assume that's the real encoding (since textcat is based on probabilities)
708 $best_encoding = 'utf8';
709 }
710
711
712 # check for equivalents where textcat doesn't have some encodings...
713 # eg MS versions of standard encodings
714 if (defined $best_encoding && $best_encoding =~ /^iso_8859_(\d+)/) {
715 my $iso = $1; # which variant of the iso standard?
716 # iso-8859 sets don't use chars 0x80-0x9f, windows codepages do
717 if ($text =~ /[\x80-\x9f]/) {
718 # Western Europe
719 if ($iso == 1 or $iso == 15) { $best_encoding = 'windows_1252' }
720 elsif ($iso == 2) {$best_encoding = 'windows_1250'} # Central Europe
721 elsif ($iso == 5) {$best_encoding = 'windows_1251'} # Cyrillic
722 elsif ($iso == 6) {$best_encoding = 'windows_1256'} # Arabic
723 elsif ($iso == 7) {$best_encoding = 'windows_1253'} # Greek
724 elsif ($iso == 8) {$best_encoding = 'windows_1255'} # Hebrew
725 elsif ($iso == 9) {$best_encoding = 'windows_1254'} # Turkish
726 }
727 }
728
729 if (defined $best_encoding && $best_encoding !~ /^(ascii|utf8|unicode)$/ &&
730 !defined $encodings::encodings->{$best_encoding})
731 {
732 if ($self->{'verbosity'}) {
733 gsprintf($outhandle, "BasePlugin: {ReadTextFile.unsupported_encoding}\n", $text, $best_encoding, "undef");
734 }
735 $best_encoding = undef;
736 }
737
738 return $best_encoding;
739}
740
741# uses locale
742sub get_filesystem_encoding {
743
744 my $self = shift(@_);
745
746 my $outhandle = $self->{'outhandle'};
747 my $filesystem_encoding = undef;
748
749 eval {
750 use POSIX qw(locale_h);
751
752 # With only one parameter, setlocale retrieves the
753 # current value
754 my $current_locale = setlocale(LC_CTYPE);
755
756 if ($current_locale =~ m/^.*\.(.*?)$/) {
757 my $char_encoding = lc($1);
758 if ($char_encoding =~ m/^(iso)(8859)(\d{1,2})$/) {
759 $char_encoding = "$1\_$2\_$3";
760 }
761
762 $char_encoding =~ s/-/_/g;
763 $char_encoding =~ s/^utf_8$/utf8/;
764
765 if ($char_encoding =~ m/^\d+$/) {
766 if (defined $encodings::encodings->{"windows_$char_encoding"}) {
767 $char_encoding = "windows_$char_encoding";
768 }
769 elsif (defined $encodings::encodings->{"dos_$char_encoding"}) {
770 $char_encoding = "dos_$char_encoding";
771 }
772 }
773
774 if (($char_encoding =~ m/(?:ascii|utf8|unicode)/)
775 || (defined $encodings::encodings->{$char_encoding})) {
776 $filesystem_encoding = $char_encoding;
777 }
778 else {
779 print $outhandle "Warning: Unsupported character encoding '$char_encoding' from locale '$current_locale'\n";
780 }
781 }
782
783
784 };
785 if ($@) {
786 print $outhandle "$@\n";
787 print $outhandle "Warning: Unable to establish locale. Will assume filesystem is UTF-8\n";
788
789 }
790 return $filesystem_encoding;
791}
792
793
794
795sub deduce_filename_encoding
796{
797 my $self = shift (@_);
798 my ($file,$metadata,$plugin_filename_encoding) = @_;
799
800 my $gs_filename_encoding = $metadata->{"gs.filename_encoding"};
801 my $deduced_filename_encoding = undef;
802
803 # Start by looking for manually assigned metadata
804 if (defined $gs_filename_encoding) {
805 if (ref ($gs_filename_encoding) eq "ARRAY") {
806 my $outhandle = $self->{'outhandle'};
807
808 $deduced_filename_encoding = $gs_filename_encoding->[0];
809
810 my $num_vals = scalar(@$gs_filename_encoding);
811 if ($num_vals>1) {
812 print $outhandle "Warning: gs.filename_encoding multiply defined for $file\n";
813 print $outhandle " Selecting first value: $deduced_filename_encoding\n";
814 }
815 }
816 else {
817 $deduced_filename_encoding = $gs_filename_encoding;
818 }
819 }
820
821 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
822 # Look to see if plugin specifies this value
823
824 if (defined $plugin_filename_encoding) {
825 # First look to see if we're using any of the "older" (i.e. deprecated auto-... plugin options)
826 if ($plugin_filename_encoding =~ m/^auto-.*$/) {
827 my $outhandle = $self->{'outhandle'};
828 print $outhandle "Warning: $plugin_filename_encoding is no longer supported\n";
829 print $outhandle " default to 'auto'\n";
830 $self->{'filename_encoding'} = $plugin_filename_encoding = "auto";
831 }
832
833 if ($plugin_filename_encoding ne "auto") {
834 # We've been given a specific filenamne encoding
835 # => so use it!
836 $deduced_filename_encoding = $plugin_filename_encoding;
837 }
838 }
839 }
840
841 if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
842 # See if we can determine the file system encoding through locale
843 $deduced_filename_encoding = $self->locale_encoding();
844
845 # if locale shows us filesystem is utf8, check to see filename is consistent
846 # => if not, then we have an "alien" filename on our hands
847
848 if ($deduced_filename_encoding =~ m/^utf-?8$/i) {
849 if (!&unicode::check_is_utf8($file)) {
850 # "alien" filename, so revert
851 $deduced_filename_encoding = undef;
852 }
853 }
854 }
855
856
857# if (!defined $deduced_filename_encoding || ($deduced_filename_encoding =~ m/^\s*$/)) {
858# # Last chance, apply textcat to deduce filename encoding
859# $deduced_filename_encoding = $self->textcat_encoding($file);
860# }
861
862 if ($self->{'verbosity'}>3) {
863 my $outhandle = $self->{'outhandle'};
864
865 if (defined $deduced_filename_encoding) {
866 print $outhandle " Deduced filename encoding as: $deduced_filename_encoding\n";
867 }
868 else {
869 print $outhandle " No filename encoding deduced\n";
870 }
871 }
872
873 return $deduced_filename_encoding;
874}
875
876
877
878
879# Notionally written to be called once for each document, it is however safe to
880# call multiple times (as in the case of ImagePlugin) which calls this later on
881# after the original image has potentially been converted to a *new* source image
882# format (e.g. TIFF to PNG)
883
884sub set_Source_metadata {
885 my $self = shift (@_);
886 my ($doc_obj, $raw_file, $filename_encoding) = @_;
887
888 # 1. Sets the filename (Source) for display encoded as Unicode if possible,
889 # and (as a fallback) using %xx if not for non-ascii chars
890 # 2. Sets the url ref (SourceFile) to the URL encoded version
891 # of filename for generated files
892
893 my $top_section = $doc_obj->get_top_section();
894
895 # UTF-8 version of filename
896 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
897 print STDERR "****** Setting Source Metadata given: $raw_file\n";
898 }
899
900 my $url_encoded_filename;
901 if (defined $filename_encoding) {
902 # => Generate a pretty print version of filename that is mapped to Unicode
903
904 # Use filename_encoding to map raw filename to a Perl unicode-aware string
905 $url_encoded_filename = decode($filename_encoding,$raw_file);
906 }
907 else {
908 # otherwise generate %xx encoded version of filename for char > 127
909 $url_encoded_filename = &unicode::raw_filename_to_url_encoded($raw_file);
910 }
911
912 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
913 print STDERR "***** saving Source as: $url_encoded_filename\n";
914 }
915
916
917 # Source is the UTF8 display name - not necessarily the name of the file on the system
918 $doc_obj->set_utf8_metadata_element($top_section, "Source", $url_encoded_filename);
919
920 my $renamed_raw_file = &util::rename_file($raw_file, $self->{'file_rename_method'});
921 # If using URL encoding, then SourceFile is the url-reference to url-encoded
922 # renamed_raw_url: it's a url that refers to the actual file on the system
923 my $renamed_raw_url = &unicode::filename_to_url($renamed_raw_file);
924
925 $doc_obj->set_utf8_metadata_element($top_section, "SourceFile",
926 $renamed_raw_url);
927
928 if ((defined $ENV{"DEBUG_UNICODE"}) && ($ENV{"DEBUG_UNICODE"})) {
929 print STDERR "***** saving SourceFile as: $renamed_raw_url\n";
930 }
931}
932
933# this should be called by all plugins to set the oid of the doc obj, rather
934# than calling doc_obj->set_OID directly
935sub add_OID {
936 my $self = shift (@_);
937 my ($doc_obj) = @_;
938
939 $doc_obj->set_OIDtype($self->{'OIDtype'}, $self->{'OIDmetadata'});
940
941 # see if there is a plugin specific set_OID function
942 if (defined ($self->can('set_OID'))) {
943 $self->set_OID(@_); # pass through doc_obj and any extra arguments
944 }
945 else {
946 # use the default set_OID() in doc.pm
947 $doc_obj->set_OID();
948 }
949
950}
951
952# The BasePlugin read_into_doc_obj() function. This function does all the
953# right things to make general options work for a given plugin. It doesn't do anything with the file other than setting reads in
954# a file and sets up a slew of metadata all saved in doc_obj, which
955# it then returns as part of a tuple (process_status,doc_obj)
956#
957# Much of this functionality used to reside in read, but it was broken
958# down into a supporting routine to make the code more flexible.
959#
960# recursive plugins (e.g. RecPlug) and specialized plugins like those
961# capable of processing many documents within a single file (e.g.
962# GMLPlug) will normally want to implement their own version of
963# read_into_doc_obj()
964#
965# Note that $base_dir might be "" and that $file might
966# include directories
967
968# currently blocking has been done before it gets here - does this affect secondary plugin stuff??
969sub read_into_doc_obj {
970 my $self = shift (@_);
971 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
972
973 my $outhandle = $self->{'outhandle'};
974
975 # should we move this to read? What about secondary plugins?
976 print STDERR "<Processing n='$file' p='$self->{'plugin_type'}'>\n" if ($gli);
977 print $outhandle "$self->{'plugin_type'} processing $file\n"
978 if $self->{'verbosity'} > 1;
979
980 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
981
982 # create a new document
983 my $doc_obj = new doc ($filename_full_path, "indexed_doc", $self->{'file_rename_method'});
984 my $top_section = $doc_obj->get_top_section();
985
986 $doc_obj->add_utf8_metadata($top_section, "Plugin", "$self->{'plugin_type'}");
987 $doc_obj->add_utf8_metadata($top_section, "FileSize", (-s $filename_full_path));
988
989
990 my $plugin_filename_encoding = $self->{'filename_encoding'};
991 my $filename_encoding = $self->deduce_filename_encoding($file,$metadata,$plugin_filename_encoding);
992 $self->set_Source_metadata($doc_obj,$filename_no_path,$filename_encoding);
993
994 # plugin specific stuff - what args do we need here??
995 unless (defined ($self->process($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli))) {
996 print STDERR "<ProcessingError n='$file'>\n" if ($gli);
997 return -1;
998 }
999
1000 # include any metadata passed in from previous plugins
1001 # note that this metadata is associated with the top level section
1002 my $section = $doc_obj->get_top_section();
1003 # can we merge these two methods??
1004 $self->add_associated_files($doc_obj, $filename_full_path);
1005 $self->extra_metadata ($doc_obj, $section, $metadata);
1006 $self->auto_extract_metadata($doc_obj);
1007
1008 # if we haven't found any Title so far, assign one
1009 # this was shifted to here from inside read()
1010 $self->title_fallback($doc_obj,$section,$filename_no_path);
1011
1012 $self->add_OID($doc_obj);
1013
1014 $self->post_process_doc_obj($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli);
1015 return (1,$doc_obj);
1016}
1017
1018sub post_process_doc_obj {
1019 my $self = shift (@_);
1020 my ($pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
1021
1022 return 1;
1023}
1024
1025sub add_dummy_text {
1026 my $self = shift(@_);
1027 my ($doc_obj, $section) = @_;
1028
1029 # add NoText metadata so we can hide this dummy text in format statements
1030 $doc_obj->add_metadata($section, "NoText", "1");
1031 $doc_obj->add_text($section, &gsprintf::lookup_string("{BasePlugin.dummy_text}",1));
1032
1033}
1034
1035# does nothing. Can be overridden by subclass
1036sub auto_extract_metadata {
1037 my $self = shift(@_);
1038 my ($doc_obj) = @_;
1039}
1040
1041# adds cover image, associate_file options stuff. Should be called by sub class
1042# read_into_doc_obj
1043sub add_associated_files {
1044 my $self = shift(@_);
1045 # whatis filename??
1046 my ($doc_obj, $filename) = @_;
1047
1048 # add in the cover image
1049 if ($self->{'cover_image'}) {
1050 $self->associate_cover_image($doc_obj, $filename);
1051 }
1052 # store the original (used for eg TextPlugin to store the original for OAI)
1053 if ($self->{'store_original_file'}) {
1054 $self->associate_source_file($doc_obj, $filename);
1055 }
1056
1057
1058}
1059
1060# implement this if you are extracting metadata for other documents
1061sub metadata_read {
1062 my $self = shift (@_);
1063 my ($pluginfo, $base_dir, $file, $block_hash,
1064 $extrametakeys, $extrametadata, $extrametafile,
1065 $processor, $gli, $aux) = @_;
1066
1067 # can we process this file??
1068 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
1069 return undef unless $self->can_process_this_file($filename_full_path);
1070
1071 return 1; # we recognise the file, but don't actually do anything with it
1072}
1073
1074
1075# The BasePlugin read() function. This function calls read_into_doc_obj()
1076# to ensure all the right things to make general options work for a
1077# given plugin are done. It then calls the process() function which
1078# does all the work specific to a plugin (like the old read functions
1079# used to do). Most plugins should define their own process() function
1080# and let this read() function keep control.
1081#
1082# recursive plugins (e.g. RecPlug) and specialized plugins like those
1083# capable of processing many documents within a single file (e.g.
1084# GMLPlug) might want to implement their own version of read(), but
1085# more likely need to implement their own version of read_into_doc_obj()
1086#
1087# Return number of files processed, undef if can't recognise, -1 if can't
1088# process
1089
1090sub read {
1091 my $self = shift (@_);
1092 my ($pluginfo, $base_dir, $file, $block_hash, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
1093
1094 # can we process this file??
1095 my ($filename_full_path, $filename_no_path) = &util::get_full_filenames($base_dir, $file);
1096
1097 return undef unless $self->can_process_this_file($filename_full_path);
1098
1099 my ($process_status,$doc_obj) = $self->read_into_doc_obj(@_);
1100
1101 if ((defined $process_status) && ($process_status == 1)) {
1102
1103 # process the document
1104 $processor->process($doc_obj);
1105
1106 $self->{'num_processed'} ++;
1107 undef $doc_obj;
1108 }
1109 # delete any temp files that we may have created
1110 $self->clean_up_after_doc_obj_processing();
1111
1112
1113 # if process_status == 1, then the file has been processed.
1114 return $process_status;
1115
1116}
1117
1118# returns undef if file is rejected by the plugin
1119sub process {
1120 my $self = shift (@_);
1121 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj, $gli) = @_;
1122
1123 gsprintf(STDERR, "BasePlugin::process {common.must_be_implemented}\n") && die "\n";
1124
1125 return undef; # never gets here
1126}
1127
1128# overwrite this method to delete any temp files that we have created
1129sub clean_up_after_doc_obj_processing {
1130 my $self = shift(@_);
1131
1132}
1133
1134# write_file -- used by ConvertToPlug, for example in post processing
1135#
1136# where should this go, is here the best place??
1137sub utf8_write_file {
1138 my $self = shift (@_);
1139 my ($textref, $filename) = @_;
1140
1141 if (!open (FILE, ">:utf8", $filename)) {
1142 gsprintf(STDERR, "ConvertToPlug::write_file {ConvertToPlug.could_not_open_for_writing} ($!)\n", $filename);
1143 die "\n";
1144 }
1145 print FILE $$textref;
1146
1147 close FILE;
1148}
1149
1150
1151sub filename_based_title
1152{
1153 my $self = shift (@_);
1154 my ($file) = @_;
1155
1156 my $file_derived_title = $file;
1157 $file_derived_title =~ s/_/ /g;
1158 $file_derived_title =~ s/\.[^.]+$//;
1159
1160 return $file_derived_title;
1161}
1162
1163
1164sub title_fallback
1165{
1166 my $self = shift (@_);
1167 my ($doc_obj,$section,$file) = @_;
1168
1169 if (!defined $doc_obj->get_metadata_element ($section, "Title")
1170 || $doc_obj->get_metadata_element($section, "Title") eq "") {
1171
1172 my $source_file = $doc_obj->get_metadata_element($section, "Source");
1173 my $file_derived_title;
1174 if (defined $source_file) {
1175 $file_derived_title = $self->filename_based_title($source_file);
1176 }
1177 else {
1178 # pp = pretty print
1179 my $pp_file = (defined $source_file) ? $source_file : $file;
1180
1181 my $raw_title = $self->filename_based_title($file);
1182 my $file_derived_title = &unicode::raw_filename_to_url_encoded($raw_title);
1183 }
1184
1185
1186 if (!defined $doc_obj->get_metadata_element ($section, "Title")) {
1187 $doc_obj->add_utf8_metadata ($section, "Title", $file_derived_title);
1188 }
1189 else {
1190 $doc_obj->set_utf8_metadata_element ($section, "Title", $file_derived_title);
1191 }
1192 }
1193
1194}
1195
1196# add any extra metadata that's been passed around from one
1197# plugin to another.
1198# extra_metadata uses add_utf8_metadata so it expects metadata values
1199# to already be in utf8
1200sub extra_metadata {
1201 my $self = shift (@_);
1202 my ($doc_obj, $cursection, $metadata) = @_;
1203
1204 my $associate_tail_re = $self->{'associate_tail_re'};
1205
1206 foreach my $field (keys(%$metadata)) {
1207 # $metadata->{$field} may be an array reference
1208 if ($field eq "gsdlassocfile_tobe") {
1209 # 'gsdlassocfile_tobe' is artificially introduced metadata
1210 # that is used to signal that certain additional files should
1211 # be tied to this document. Useful in situations where a
1212 # metadata pass in the plugin pipeline works out some files
1213 # need to be associated with a document, but the document hasn't
1214 # been formed yet.
1215 my $equiv_form = "";
1216 foreach my $gaf (@{$metadata->{$field}}) {
1217 my ($full_filename,$mimetype) = ($gaf =~ m/^(.*):(.*):$/);
1218 my ($tail_filename) = ($full_filename =~ /^.*[\/\\](.+?)$/);
1219
1220 # we need to make sure the filename is valid utf-8 - we do
1221 # this by url or base64 encoding it
1222 # $tail_filename is the name that we store the file as
1223 $tail_filename = &util::rename_file($tail_filename, $self->{'file_rename_method'});
1224 $doc_obj->associate_file($full_filename,$tail_filename,$mimetype);
1225 $doc_obj->associate_source_file($full_filename);
1226 # If the filename is url_encoded, we need to encode the % signs
1227 # in the filename, so that it works in a url
1228 my $url_tail_filename = &unicode::filename_to_url($tail_filename);
1229 # work out extended tail extension (i.e. matching tail re)
1230
1231 my ($file_prefix,$file_extended_ext)
1232 = &util::get_prefix_and_tail_by_regex($tail_filename,$associate_tail_re);
1233 my ($pre_doc_ext) = ($file_extended_ext =~ m/^(.*)\..*$/);
1234 my ($doc_ext) = ($tail_filename =~ m/^.*\.(.*)$/);
1235 my $start_doclink = "<a href=\"_httpprefix_/collect/[collection]/index/assoc/{Or}{[parent(Top):assocfilepath],[assocfilepath]}/$url_tail_filename\">";
1236 my $srcicon = "_icon".$doc_ext."_";
1237 my $end_doclink = "</a>";
1238
1239 my $assoc_form = "$start_doclink\{If\}{$srcicon,$srcicon,$doc_ext\}$end_doclink";
1240
1241 if (defined $pre_doc_ext && $pre_doc_ext ne "") {
1242 # for metadata such as [mp3._edited] [mp3._full] ...
1243 $doc_obj->add_utf8_metadata ($cursection, "$doc_ext.$pre_doc_ext", $assoc_form);
1244 }
1245
1246 # for multiple metadata such as [mp3.assoclink]
1247 $doc_obj->add_utf8_metadata ($cursection, "$doc_ext.assoclink", $assoc_form);
1248
1249 $equiv_form .= " $assoc_form";
1250 }
1251 $doc_obj->add_utf8_metadata ($cursection, "equivlink", $equiv_form);
1252 }
1253 elsif ($field eq "gsdlzipfilename") {
1254 # special case for when files have come out of a zip. source_path
1255 # (used for archives dbs and keeping track for incremental import)
1256 # must be set to the zip file name
1257 my $zip_filename = $metadata->{$field};
1258 # overwrite the source_path
1259 $doc_obj->{'source_path'} = $zip_filename;
1260 # and set the metadata
1261 $zip_filename = &util::filename_within_collection($zip_filename);
1262 $zip_filename = $doc_obj->encode_filename($zip_filename, $self->{'file_rename_method'});
1263 $doc_obj->add_utf8_metadata ($cursection, $field, $zip_filename);
1264 }
1265 elsif (ref ($metadata->{$field}) eq "ARRAY") {
1266 map {
1267 $doc_obj->add_utf8_metadata ($cursection, $field, $_);
1268 } @{$metadata->{$field}};
1269 } else {
1270 $doc_obj->add_utf8_metadata ($cursection, $field, $metadata->{$field});
1271 }
1272 }
1273}
1274
1275
1276sub compile_stats {
1277 my $self = shift(@_);
1278 my ($stats) = @_;
1279
1280 $stats->{'num_processed'} += $self->{'num_processed'};
1281 $stats->{'num_not_processed'} += $self->{'num_not_processed'};
1282 $stats->{'num_archives'} += $self->{'num_archives'};
1283
1284}
1285sub associate_source_file {
1286 my $self = shift(@_);
1287
1288 my ($doc_obj, $filename) = @_;
1289 my $cursection = $doc_obj->get_top_section();
1290 my $assocfilename = $doc_obj->get_assocfile_from_sourcefile();
1291
1292 $doc_obj->associate_file($filename, $assocfilename, undef, $cursection);
1293 $doc_obj->add_utf8_metadata ($cursection, "srclink_file", $doc_obj->get_sourcefile());
1294}
1295
1296sub associate_cover_image {
1297 my $self = shift(@_);
1298 my ($doc_obj, $filename) = @_;
1299
1300 $filename =~ s/\.[^\\\/\.]+$/\.jpg/;
1301 if (exists $self->{'covers_missing_cache'}->{$filename}) {
1302 # don't stat() for existence eg for multiple document input files
1303 # (eg SplitPlug)
1304 return;
1305 }
1306
1307 my $top_section=$doc_obj->get_top_section();
1308
1309 if (-e $filename) {
1310 $doc_obj->associate_source_file($filename);
1311 $doc_obj->associate_file($filename, "cover.jpg", "image/jpeg");
1312 $doc_obj->add_utf8_metadata($top_section, "hascover", 1);
1313 } else {
1314 my $upper_filename = $filename;
1315 $upper_filename =~ s/jpg$/JPG/;
1316 if (-e $upper_filename) {
1317 $doc_obj->associate_source_file($upper_filename);
1318 $doc_obj->associate_file($upper_filename, "cover.jpg",
1319 "image/jpeg");
1320 $doc_obj->add_utf8_metadata($top_section, "hascover", 1);
1321 } else {
1322 # file doesn't exist, so record the fact that it's missing so
1323 # we don't stat() again (stat is slow)
1324 $self->{'covers_missing_cache'}->{$filename} = 1;
1325 }
1326 }
1327
1328}
1329
1330
1331# Overridden by exploding plugins (eg. ISISPlug)
1332sub clean_up_after_exploding
1333{
1334 my $self = shift(@_);
1335}
1336
1337
1338
13391;
Note: See TracBrowser for help on using the repository browser.