1 | ###########################################################################
|
---|
2 | #
|
---|
3 | # BaseMediaConverter - helper plugin that provide base functionality for
|
---|
4 | # image/video conversion using ImageMagick/ffmpeg
|
---|
5 | #
|
---|
6 | # A component of the Greenstone digital library software
|
---|
7 | # from the New Zealand Digital Library Project at the
|
---|
8 | # University of Waikato, New Zealand.
|
---|
9 | #
|
---|
10 | # Copyright (C) 2008 New Zealand Digital Library Project
|
---|
11 | #
|
---|
12 | # This program is free software; you can redistribute it and/or modify
|
---|
13 | # it under the terms of the GNU General Public License as published by
|
---|
14 | # the Free Software Foundation; either version 2 of the License, or
|
---|
15 | # (at your option) any later version.
|
---|
16 | #
|
---|
17 | # This program is distributed in the hope that it will be useful,
|
---|
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | # GNU General Public License for more details.
|
---|
21 | #
|
---|
22 | # You should have received a copy of the GNU General Public License
|
---|
23 | # along with this program; if not, write to the Free Software
|
---|
24 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | #
|
---|
26 | ###########################################################################
|
---|
27 | package BaseMediaConverter;
|
---|
28 |
|
---|
29 | use PrintInfo;
|
---|
30 |
|
---|
31 | use convertutil;
|
---|
32 |
|
---|
33 | use strict;
|
---|
34 | no strict 'refs'; # allow filehandles to be variables and viceversa
|
---|
35 | use gsprintf 'gsprintf';
|
---|
36 |
|
---|
37 | BEGIN {
|
---|
38 | @BaseMediaConverter::ISA = ('PrintInfo');
|
---|
39 | }
|
---|
40 |
|
---|
41 | my $arguments = [
|
---|
42 | { 'name' => "enable_cache",
|
---|
43 | 'desc' => "{BaseMediaConverter.enable_cache}",
|
---|
44 | 'type' => "flag",
|
---|
45 | 'reqd' => "no",
|
---|
46 | }
|
---|
47 |
|
---|
48 | ];
|
---|
49 |
|
---|
50 | my $options = { 'name' => "BaseMediaConverter",
|
---|
51 | 'desc' => "{BaseMediaConverter.desc}",
|
---|
52 | 'abstract' => "yes",
|
---|
53 | 'inherits' => "yes",
|
---|
54 | 'args' => $arguments };
|
---|
55 |
|
---|
56 | sub new {
|
---|
57 | my ($class) = shift (@_);
|
---|
58 | my ($pluginlist,$inputargs,$hashArgOptLists,$auxiliary) = @_;
|
---|
59 | push(@$pluginlist, $class);
|
---|
60 |
|
---|
61 | push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
|
---|
62 | push(@{$hashArgOptLists->{"OptList"}},$options);
|
---|
63 |
|
---|
64 | my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists, $auxiliary);
|
---|
65 |
|
---|
66 | return bless $self, $class;
|
---|
67 | }
|
---|
68 |
|
---|
69 | sub begin {
|
---|
70 | my $self = shift (@_);
|
---|
71 | my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
|
---|
72 |
|
---|
73 | # Save base_dir for use in file cache
|
---|
74 | $self->{'base_dir'} = $base_dir;
|
---|
75 | }
|
---|
76 |
|
---|
77 |
|
---|
78 | # This method exists to prevent duplication of long filepaths within the cache dir.
|
---|
79 | # Given the file location where media or other product files to be cached are generated
|
---|
80 | # (such as the pages of a PDF file converted to paged images), this subroutine returns
|
---|
81 | # the relative path the document is to be stored as, within the cache subdirectory.
|
---|
82 | # e.g. collect/<collection>/cached/<fileroot/numbered-filename.ext>
|
---|
83 | # will return fileroot/numbered-filename.ext
|
---|
84 | # e.g.#2 collect/<collection>/import/<fileroot.ext> will return fileroot.ext
|
---|
85 | sub get_cache_filename_for_location
|
---|
86 | {
|
---|
87 | my $self = shift @_;
|
---|
88 | my ($filename, $orig_file_root, $base_dir, $collect_dir) = @_;
|
---|
89 |
|
---|
90 | my $cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached");
|
---|
91 |
|
---|
92 | # hashmap of regex match to replacement regex
|
---|
93 | my %prefixre_to_replacere_map;
|
---|
94 |
|
---|
95 | my $orig_file_root_replacere = "";
|
---|
96 | if(defined $orig_file_root) {
|
---|
97 | $orig_file_root_replacere = "/$orig_file_root";
|
---|
98 | }
|
---|
99 |
|
---|
100 | # populate hashmap with regex matcher to regex replacement search
|
---|
101 | # https://stackoverflow.com/questions/41334509/can-i-use-qq-or-q-instead-of-qw-in-the-following-perl-script
|
---|
102 | # https://perldoc.perl.org/perlop#Regexp-Quote-Like-Operators
|
---|
103 | $prefixre_to_replacere_map{qr/^$base_dir(.*?)$/} = "";
|
---|
104 | $prefixre_to_replacere_map{qr/^$cached_dir(.*?)$/} = qr/[\/\\][^\/\\]+([\/\\][^\/\\]*)$/;
|
---|
105 | $prefixre_to_replacere_map{qr/^$collect_dir\/(tmp\/.*?)$/} = "";
|
---|
106 | $prefixre_to_replacere_map{qr/^$collect_dir(.*?)$/} = "";
|
---|
107 | $prefixre_to_replacere_map{qr@^$ENV{'GSDLHOME'}/tmp/[^/]*(.*?)$@} = $orig_file_root_replacere; # prefix this
|
---|
108 | #$prefixre_to_replacere_map{qr/^$ENV{'GSDLHOME'}(.*?)$/} = ""; # Not found a real-world use for this yet
|
---|
109 | #$prefixre_to_replacere_map{qr//} = qr//; # dummy template line to add additional regex match processing rules
|
---|
110 |
|
---|
111 | my $file;
|
---|
112 |
|
---|
113 | # https://stackoverflow.com/questions/3033/whats-the-safest-way-to-iterate-through-the-keys-of-a-perl-hash
|
---|
114 | # https://stackoverflow.com/questions/383528/how-can-i-sort-a-hashs-keys-naturally
|
---|
115 | # http://www.java2s.com/Code/Perl/Hash/SortHashbyKeysinReverseOrder.htm
|
---|
116 | foreach my $key (reverse sort keys %prefixre_to_replacere_map) {
|
---|
117 | # reverse sort as we want to try matching more specific "$coll_dir/$subdir" before $coll_dir
|
---|
118 |
|
---|
119 | #print STDERR "@@@ Key and value: $key\n\t $prefixre_to_replacere_map{$key}\n";
|
---|
120 |
|
---|
121 | my $prefixre = $key;
|
---|
122 | my $replacere = $prefixre_to_replacere_map{$key};
|
---|
123 |
|
---|
124 | ($file) = ($filename =~ m/$prefixre/);
|
---|
125 | if (!defined $file || $file eq $filename) {
|
---|
126 | #print STDERR "\t#### No match\n"; # keep looping looking for the next match
|
---|
127 | next;
|
---|
128 | } else {
|
---|
129 | #print STDERR "\t#### Found match. Applying: $replacere\n";
|
---|
130 | if($replacere eq $orig_file_root_replacere) {
|
---|
131 | $file = $orig_file_root_replacere.$file if $file;
|
---|
132 | } else {
|
---|
133 | $file =~ s/$replacere/$1/;
|
---|
134 | }
|
---|
135 | last; # found and processed a match
|
---|
136 | }
|
---|
137 | }
|
---|
138 |
|
---|
139 | # No matches found, reset $file to $filename
|
---|
140 | if(!defined $file) {
|
---|
141 | $file = $filename;
|
---|
142 | }
|
---|
143 |
|
---|
144 | $file =~ s/^\/|\\//; # get rid of leading slash from relative filename
|
---|
145 | $file =~ s@^(\.(\/|\/))*@@; # get rid of any ./ at the start
|
---|
146 | print STDERR "\t@@@ Final cache tail file is: $file\n";
|
---|
147 |
|
---|
148 | return $file
|
---|
149 | }
|
---|
150 |
|
---|
151 | sub init_cache_for_file
|
---|
152 | {
|
---|
153 | my $self = shift @_;
|
---|
154 | my ($filename, $orig_file_root) = @_;
|
---|
155 |
|
---|
156 | my $verbosity = $self->{'verbosity'};
|
---|
157 | my $outhandle = $self->{'outhandle'};
|
---|
158 | my $base_dir = $self->{'base_dir'};
|
---|
159 |
|
---|
160 | my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
|
---|
161 | $collect_dir =~ s/\\/\//g; # Work in Unix style world
|
---|
162 |
|
---|
163 | # Work out relative filename within 'base_dir'
|
---|
164 | $filename =~ s/\\/\//g;
|
---|
165 | $base_dir =~ s/\\/\//g;
|
---|
166 |
|
---|
167 | my $file = $self->get_cache_filename_for_location($filename, $orig_file_root, $base_dir, $collect_dir);
|
---|
168 |
|
---|
169 | # Setup cached_dir and file_root
|
---|
170 |
|
---|
171 | my ($file_root, $dirname, $suffix)
|
---|
172 | = &File::Basename::fileparse($file, "\\.[^\\.]+\$");
|
---|
173 | #= &File::Basename::fileparse($conv_file, "\\.[^\\.]+\$");
|
---|
174 |
|
---|
175 | # if dirname is in collections tmp area, remove collect_dir prefix
|
---|
176 | $dirname =~ s/^$collect_dir//;
|
---|
177 |
|
---|
178 | if ($ENV{'GSDLOS'} eq "windows") {
|
---|
179 | # if dirname starts with Windows drive letter, strip it off
|
---|
180 | $dirname =~ s/^[a-z]:\///i;
|
---|
181 | }
|
---|
182 |
|
---|
183 | my $base_output_dir = &FileUtils::filenameConcatenate($collect_dir,"cached",$dirname);
|
---|
184 |
|
---|
185 | if (!-e $base_output_dir ) {
|
---|
186 | print $outhandle "Creating directory $base_output_dir\n"
|
---|
187 | if ($verbosity>2);
|
---|
188 |
|
---|
189 | &FileUtils::makeAllDirectories($base_output_dir);
|
---|
190 | }
|
---|
191 |
|
---|
192 |
|
---|
193 | #print STDERR "@@@@ base_output_dir: $base_output_dir\n";
|
---|
194 | #print STDERR "@@@@ file_root: $file_root\n";
|
---|
195 |
|
---|
196 | my $output_dir = &FileUtils::filenameConcatenate($base_output_dir,$file_root);
|
---|
197 |
|
---|
198 | if (!-e $output_dir) {
|
---|
199 | print $outhandle "Creating directory $output_dir\n"
|
---|
200 | if ($verbosity>2);
|
---|
201 |
|
---|
202 | &FileUtils::makeAllDirectories($output_dir);
|
---|
203 | }
|
---|
204 |
|
---|
205 | $self->{'cached_dir'} = $output_dir;
|
---|
206 | $self->{'cached_file_root'} = $file_root;
|
---|
207 | }
|
---|
208 |
|
---|
209 |
|
---|
210 |
|
---|
211 | sub run_general_cmd
|
---|
212 | {
|
---|
213 | my $self = shift @_;
|
---|
214 | my ($command,$print_info) = @_;
|
---|
215 |
|
---|
216 |
|
---|
217 | if (!defined $print_info->{'verbosity'}) {
|
---|
218 | $print_info->{'verbosity'} = $self->{'verbosity'};
|
---|
219 | }
|
---|
220 |
|
---|
221 | if (!defined $print_info->{'outhandle'}) {
|
---|
222 | $print_info->{'outhandle'} = $self->{'outhandle'};
|
---|
223 | }
|
---|
224 |
|
---|
225 |
|
---|
226 | return &convertutil::run_general_cmd(@_);
|
---|
227 | }
|
---|
228 |
|
---|
229 |
|
---|
230 | sub regenerate_general_cmd
|
---|
231 | {
|
---|
232 | my $self = shift @_;
|
---|
233 | my ($command,$ifilename,$ofilename,$print_info) = @_;
|
---|
234 |
|
---|
235 | if (!defined $print_info->{'verbosity'}) {
|
---|
236 | $print_info->{'verbosity'} = $self->{'verbosity'};
|
---|
237 | }
|
---|
238 |
|
---|
239 | if (!defined $print_info->{'outhandle'}) {
|
---|
240 | $print_info->{'outhandle'} = $self->{'outhandle'};
|
---|
241 | }
|
---|
242 |
|
---|
243 | return &convertutil::regenerate_general_cmd(@_);
|
---|
244 | }
|
---|
245 |
|
---|
246 |
|
---|
247 |
|
---|
248 | sub run_uncached_general_cmd
|
---|
249 | {
|
---|
250 | my $self = shift @_;
|
---|
251 |
|
---|
252 | my ($command,$ifilename,$ofilename,$print_info) = @_;
|
---|
253 |
|
---|
254 | return $self->run_general_cmd($command,$print_info);
|
---|
255 | }
|
---|
256 |
|
---|
257 |
|
---|
258 |
|
---|
259 | sub run_cached_general_cmd
|
---|
260 | {
|
---|
261 | my $self = shift @_;
|
---|
262 |
|
---|
263 | my ($command,$ifilename,$ofilename,$print_info) = @_;
|
---|
264 |
|
---|
265 | if (!defined $print_info->{'verbosity'}) {
|
---|
266 | $print_info->{'verbosity'} = $self->{'verbosity'};
|
---|
267 | }
|
---|
268 |
|
---|
269 | if (!defined $print_info->{'outhandle'}) {
|
---|
270 | $print_info->{'outhandle'} = $self->{'outhandle'};
|
---|
271 | }
|
---|
272 |
|
---|
273 | return &convertutil::run_cached_general_cmd(@_);
|
---|
274 | }
|
---|
275 |
|
---|
276 |
|
---|
277 |
|
---|
278 | sub autorun_general_cmd
|
---|
279 | {
|
---|
280 | my $self = shift @_;
|
---|
281 |
|
---|
282 | my ($command,$ifilename,$ofilename,$print_info) = @_;
|
---|
283 |
|
---|
284 | my $result;
|
---|
285 | my $regenerated;
|
---|
286 | my $had_error;
|
---|
287 |
|
---|
288 | if ($self->{'enable_cache'}) {
|
---|
289 | ($regenerated,$result,$had_error)
|
---|
290 | = $self->run_cached_general_cmd($command,$ifilename,$ofilename,$print_info);
|
---|
291 | }
|
---|
292 | else {
|
---|
293 | $regenerated = 1; # always true for a command that is always run
|
---|
294 | ($result,$had_error)
|
---|
295 | = $self->run_general_cmd($command,$print_info);
|
---|
296 | }
|
---|
297 |
|
---|
298 | return ($regenerated,$result,$had_error);
|
---|
299 | }
|
---|
300 |
|
---|
301 |
|
---|
302 | #
|
---|
303 | 1;
|
---|