source: main/trunk/greenstone2/perllib/plugins/BaseMediaConverter.pm

Last change on this file was 38478, checked in by anupama, 5 months ago

Related to the previous 2 commits. Removed or commented out most debug statements, only one remains (the final filepath within the cache dir for a file that has been determined by the new subroutine). Adjusted the new subroutine's description.

  • Property svn:executable set to *
File size: 9.2 KB
Line 
1###########################################################################
2#
3# BaseMediaConverter - helper plugin that provide base functionality for
4# image/video conversion using ImageMagick/ffmpeg
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2008 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package BaseMediaConverter;
28
29use PrintInfo;
30
31use convertutil;
32
33use strict;
34no strict 'refs'; # allow filehandles to be variables and viceversa
35use gsprintf 'gsprintf';
36
37BEGIN {
38 @BaseMediaConverter::ISA = ('PrintInfo');
39}
40
41my $arguments = [
42 { 'name' => "enable_cache",
43 'desc' => "{BaseMediaConverter.enable_cache}",
44 'type' => "flag",
45 'reqd' => "no",
46 }
47
48 ];
49
50my $options = { 'name' => "BaseMediaConverter",
51 'desc' => "{BaseMediaConverter.desc}",
52 'abstract' => "yes",
53 'inherits' => "yes",
54 'args' => $arguments };
55
56sub new {
57 my ($class) = shift (@_);
58 my ($pluginlist,$inputargs,$hashArgOptLists,$auxiliary) = @_;
59 push(@$pluginlist, $class);
60
61 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
62 push(@{$hashArgOptLists->{"OptList"}},$options);
63
64 my $self = new PrintInfo($pluginlist, $inputargs, $hashArgOptLists, $auxiliary);
65
66 return bless $self, $class;
67}
68
69sub begin {
70 my $self = shift (@_);
71 my ($pluginfo, $base_dir, $processor, $maxdocs) = @_;
72
73 # Save base_dir for use in file cache
74 $self->{'base_dir'} = $base_dir;
75}
76
77
78# This method exists to prevent duplication of long filepaths within the cache dir.
79# Given the file location where media or other product files to be cached are generated
80# (such as the pages of a PDF file converted to paged images), this subroutine returns
81# the relative path the document is to be stored as, within the cache subdirectory.
82# e.g. collect/<collection>/cached/<fileroot/numbered-filename.ext>
83# will return fileroot/numbered-filename.ext
84# e.g.#2 collect/<collection>/import/<fileroot.ext> will return fileroot.ext
85sub get_cache_filename_for_location
86{
87 my $self = shift @_;
88 my ($filename, $orig_file_root, $base_dir, $collect_dir) = @_;
89
90 my $cached_dir = &FileUtils::filenameConcatenate($collect_dir,"cached");
91
92 # hashmap of regex match to replacement regex
93 my %prefixre_to_replacere_map;
94
95 my $orig_file_root_replacere = "";
96 if(defined $orig_file_root) {
97 $orig_file_root_replacere = "/$orig_file_root";
98 }
99
100 # populate hashmap with regex matcher to regex replacement search
101 # https://stackoverflow.com/questions/41334509/can-i-use-qq-or-q-instead-of-qw-in-the-following-perl-script
102 # https://perldoc.perl.org/perlop#Regexp-Quote-Like-Operators
103 $prefixre_to_replacere_map{qr/^$base_dir(.*?)$/} = "";
104 $prefixre_to_replacere_map{qr/^$cached_dir(.*?)$/} = qr/[\/\\][^\/\\]+([\/\\][^\/\\]*)$/;
105 $prefixre_to_replacere_map{qr/^$collect_dir\/(tmp\/.*?)$/} = "";
106 $prefixre_to_replacere_map{qr/^$collect_dir(.*?)$/} = "";
107 $prefixre_to_replacere_map{qr@^$ENV{'GSDLHOME'}/tmp/[^/]*(.*?)$@} = $orig_file_root_replacere; # prefix this
108 #$prefixre_to_replacere_map{qr/^$ENV{'GSDLHOME'}(.*?)$/} = ""; # Not found a real-world use for this yet
109 #$prefixre_to_replacere_map{qr//} = qr//; # dummy template line to add additional regex match processing rules
110
111 my $file;
112
113 # https://stackoverflow.com/questions/3033/whats-the-safest-way-to-iterate-through-the-keys-of-a-perl-hash
114 # https://stackoverflow.com/questions/383528/how-can-i-sort-a-hashs-keys-naturally
115 # http://www.java2s.com/Code/Perl/Hash/SortHashbyKeysinReverseOrder.htm
116 foreach my $key (reverse sort keys %prefixre_to_replacere_map) {
117 # reverse sort as we want to try matching more specific "$coll_dir/$subdir" before $coll_dir
118
119 #print STDERR "@@@ Key and value: $key\n\t $prefixre_to_replacere_map{$key}\n";
120
121 my $prefixre = $key;
122 my $replacere = $prefixre_to_replacere_map{$key};
123
124 ($file) = ($filename =~ m/$prefixre/);
125 if (!defined $file || $file eq $filename) {
126 #print STDERR "\t#### No match\n"; # keep looping looking for the next match
127 next;
128 } else {
129 #print STDERR "\t#### Found match. Applying: $replacere\n";
130 if($replacere eq $orig_file_root_replacere) {
131 $file = $orig_file_root_replacere.$file if $file;
132 } else {
133 $file =~ s/$replacere/$1/;
134 }
135 last; # found and processed a match
136 }
137 }
138
139 # No matches found, reset $file to $filename
140 if(!defined $file) {
141 $file = $filename;
142 }
143
144 $file =~ s/^\/|\\//; # get rid of leading slash from relative filename
145 $file =~ s@^(\.(\/|\/))*@@; # get rid of any ./ at the start
146 print STDERR "\t@@@ Final cache tail file is: $file\n";
147
148 return $file
149}
150
151sub init_cache_for_file
152{
153 my $self = shift @_;
154 my ($filename, $orig_file_root) = @_;
155
156 my $verbosity = $self->{'verbosity'};
157 my $outhandle = $self->{'outhandle'};
158 my $base_dir = $self->{'base_dir'};
159
160 my $collect_dir = $ENV{'GSDLCOLLECTDIR'};
161 $collect_dir =~ s/\\/\//g; # Work in Unix style world
162
163 # Work out relative filename within 'base_dir'
164 $filename =~ s/\\/\//g;
165 $base_dir =~ s/\\/\//g;
166
167 my $file = $self->get_cache_filename_for_location($filename, $orig_file_root, $base_dir, $collect_dir);
168
169 # Setup cached_dir and file_root
170
171 my ($file_root, $dirname, $suffix)
172 = &File::Basename::fileparse($file, "\\.[^\\.]+\$");
173 #= &File::Basename::fileparse($conv_file, "\\.[^\\.]+\$");
174
175 # if dirname is in collections tmp area, remove collect_dir prefix
176 $dirname =~ s/^$collect_dir//;
177
178 if ($ENV{'GSDLOS'} eq "windows") {
179 # if dirname starts with Windows drive letter, strip it off
180 $dirname =~ s/^[a-z]:\///i;
181 }
182
183 my $base_output_dir = &FileUtils::filenameConcatenate($collect_dir,"cached",$dirname);
184
185 if (!-e $base_output_dir ) {
186 print $outhandle "Creating directory $base_output_dir\n"
187 if ($verbosity>2);
188
189 &FileUtils::makeAllDirectories($base_output_dir);
190 }
191
192
193 #print STDERR "@@@@ base_output_dir: $base_output_dir\n";
194 #print STDERR "@@@@ file_root: $file_root\n";
195
196 my $output_dir = &FileUtils::filenameConcatenate($base_output_dir,$file_root);
197
198 if (!-e $output_dir) {
199 print $outhandle "Creating directory $output_dir\n"
200 if ($verbosity>2);
201
202 &FileUtils::makeAllDirectories($output_dir);
203 }
204
205 $self->{'cached_dir'} = $output_dir;
206 $self->{'cached_file_root'} = $file_root;
207}
208
209
210
211sub run_general_cmd
212{
213 my $self = shift @_;
214 my ($command,$print_info) = @_;
215
216
217 if (!defined $print_info->{'verbosity'}) {
218 $print_info->{'verbosity'} = $self->{'verbosity'};
219 }
220
221 if (!defined $print_info->{'outhandle'}) {
222 $print_info->{'outhandle'} = $self->{'outhandle'};
223 }
224
225
226 return &convertutil::run_general_cmd(@_);
227}
228
229
230sub regenerate_general_cmd
231{
232 my $self = shift @_;
233 my ($command,$ifilename,$ofilename,$print_info) = @_;
234
235 if (!defined $print_info->{'verbosity'}) {
236 $print_info->{'verbosity'} = $self->{'verbosity'};
237 }
238
239 if (!defined $print_info->{'outhandle'}) {
240 $print_info->{'outhandle'} = $self->{'outhandle'};
241 }
242
243 return &convertutil::regenerate_general_cmd(@_);
244}
245
246
247
248sub run_uncached_general_cmd
249{
250 my $self = shift @_;
251
252 my ($command,$ifilename,$ofilename,$print_info) = @_;
253
254 return $self->run_general_cmd($command,$print_info);
255}
256
257
258
259sub run_cached_general_cmd
260{
261 my $self = shift @_;
262
263 my ($command,$ifilename,$ofilename,$print_info) = @_;
264
265 if (!defined $print_info->{'verbosity'}) {
266 $print_info->{'verbosity'} = $self->{'verbosity'};
267 }
268
269 if (!defined $print_info->{'outhandle'}) {
270 $print_info->{'outhandle'} = $self->{'outhandle'};
271 }
272
273 return &convertutil::run_cached_general_cmd(@_);
274}
275
276
277
278sub autorun_general_cmd
279{
280 my $self = shift @_;
281
282 my ($command,$ifilename,$ofilename,$print_info) = @_;
283
284 my $result;
285 my $regenerated;
286 my $had_error;
287
288 if ($self->{'enable_cache'}) {
289 ($regenerated,$result,$had_error)
290 = $self->run_cached_general_cmd($command,$ifilename,$ofilename,$print_info);
291 }
292 else {
293 $regenerated = 1; # always true for a command that is always run
294 ($result,$had_error)
295 = $self->run_general_cmd($command,$print_info);
296 }
297
298 return ($regenerated,$result,$had_error);
299}
300
301
302#
3031;
Note: See TracBrowser for help on using the repository browser.