source: main/trunk/greenstone2/perllib/cpan/Mojo/Asset/File.pm@ 32205

Last change on this file since 32205 was 32205, checked in by ak19, 6 years ago

First set of commits to do with implementing the new 'paged_html' output option of PDFPlugin that uses using xpdftools' new pdftohtml. So far tested only on Linux (64 bit), but things work there so I'm optimistically committing the changes since they work. 2. Committing the pre-built Linux binaries of XPDFtools for both 32 and 64 bit built by the XPDF group. 2. To use the correct bitness variant of xpdftools, setup.bash now exports the BITNESS env var, consulted by gsConvert.pl. 3. All the perl code changes to do with using xpdf tools' pdftohtml to generate paged_html and feed it in the desired form into GS(3): gsConvert.pl, PDFPlugin.pm and its parent ConvertBinaryPFile.pm have been modified to make it all work. xpdftools' pdftohtml generates a folder containing an html file and a screenshot for each page in a PDF (as well as an index.html linking to each page's html). However, we want a single html file that contains each individual 'page' html's content in a div, and need to do some further HTML style, attribute and structure modifications to massage the xpdftool output to what we want for GS. In order to parse and manipulate the HTML 'DOM' to do this, we're using the Mojo::DOM package that Dr Bainbridge found and which he's compiled up. Mojo::DOM is therefore also committed in this revision. Some further changes and some display fixes are required, but need to check with the others about that.

File size: 5.9 KB
Line 
1package Mojo::Asset::File;
2use Mojo::Base 'Mojo::Asset';
3
4use Carp 'croak';
5use Fcntl 'SEEK_SET';
6use File::Spec::Functions ();
7use Mojo::File 'tempfile';
8
9has [qw(cleanup path)];
10has handle => sub {
11 my $self = shift;
12
13 # Open existing file
14 my $path = $self->path;
15 return Mojo::File->new($path)->open('<') if defined $path && -e $path;
16
17 $self->cleanup(1) unless defined $self->cleanup;
18
19 # Create a specific file
20 return Mojo::File->new($path)->open('+>>') if defined $path;
21
22 # Create a temporary file
23 my $template = 'mojo.tmp.XXXXXXXXXXXXXXXX';
24 my $file = tempfile DIR => $self->tmpdir, TEMPLATE => $template, UNLINK => 0;
25 $self->path($file->to_string);
26 return $file->open('+>>');
27};
28has tmpdir => sub { $ENV{MOJO_TMPDIR} || File::Spec::Functions::tmpdir };
29
30sub DESTROY {
31 my $self = shift;
32
33 return unless $self->cleanup && defined(my $path = $self->path);
34 if (my $handle = $self->handle) { close $handle }
35
36 # Only the process that created the file is allowed to remove it
37 unlink $path if -w $path && ($self->{pid} // $$) == $$;
38}
39
40sub add_chunk {
41 my ($self, $chunk) = @_;
42 ($self->handle->syswrite($chunk) // -1) == length $chunk
43 or croak "Can't write to asset: $!";
44 return $self;
45}
46
47sub contains {
48 my ($self, $str) = @_;
49
50 my $handle = $self->handle;
51 $handle->sysseek($self->start_range, SEEK_SET);
52
53 # Calculate window size
54 my $end = $self->end_range // $self->size;
55 my $len = length $str;
56 my $size = $len > 131072 ? $len : 131072;
57 $size = $end - $self->start_range if $size > $end - $self->start_range;
58
59 # Sliding window search
60 my $offset = 0;
61 my $start = $handle->sysread(my $window, $len);
62 while ($offset < $end) {
63
64 # Read as much as possible
65 my $diff = $end - ($start + $offset);
66 my $read = $handle->sysread(my $buffer, $diff < $size ? $diff : $size);
67 $window .= $buffer;
68
69 # Search window
70 my $pos = index $window, $str;
71 return $offset + $pos if $pos >= 0;
72 return -1 if $read == 0 || ($offset += $read) == $end;
73
74 # Resize window
75 substr $window, 0, $read, '';
76 }
77
78 return -1;
79}
80
81sub get_chunk {
82 my ($self, $offset, $max) = @_;
83 $max //= 131072;
84
85 $offset += $self->start_range;
86 my $handle = $self->handle;
87 $handle->sysseek($offset, SEEK_SET);
88
89 my $buffer;
90 if (defined(my $end = $self->end_range)) {
91 return '' if (my $chunk = $end + 1 - $offset) <= 0;
92 $handle->sysread($buffer, $chunk > $max ? $max : $chunk);
93 }
94 else { $handle->sysread($buffer, $max) }
95
96 return $buffer;
97}
98
99sub is_file {1}
100
101sub move_to {
102 my ($self, $to) = @_;
103
104 # Windows requires that the handle is closed
105 close $self->handle;
106 delete $self->{handle};
107
108 # Move file and prevent clean up
109 Mojo::File->new($self->path)->move_to($to);
110 return $self->path($to)->cleanup(0);
111}
112
113sub mtime { (stat shift->handle)[9] }
114
115sub new {
116 my $file = shift->SUPER::new(@_);
117 $file->{pid} = $$;
118 return $file;
119}
120
121sub size { -s shift->handle }
122
123sub slurp {
124 my $handle = shift->handle;
125 $handle->sysseek(0, SEEK_SET);
126 my $ret = my $content = '';
127 while ($ret = $handle->sysread(my $buffer, 131072, 0)) { $content .= $buffer }
128 return defined $ret ? $content : croak "Can't read from asset: $!";
129}
130
131sub to_file {shift}
132
1331;
134
135=encoding utf8
136
137=head1 NAME
138
139Mojo::Asset::File - File storage for HTTP content
140
141=head1 SYNOPSIS
142
143 use Mojo::Asset::File;
144
145 # Temporary file
146 my $file = Mojo::Asset::File->new;
147 $file->add_chunk('foo bar baz');
148 say 'File contains "bar"' if $file->contains('bar') >= 0;
149 say $file->slurp;
150
151 # Existing file
152 my $file = Mojo::Asset::File->new(path => '/home/sri/foo.txt');
153 $file->move_to('/yada.txt');
154 say $file->slurp;
155
156=head1 DESCRIPTION
157
158L<Mojo::Asset::File> is a file storage backend for HTTP content.
159
160=head1 EVENTS
161
162L<Mojo::Asset::File> inherits all events from L<Mojo::Asset>.
163
164=head1 ATTRIBUTES
165
166L<Mojo::Asset::File> inherits all attributes from L<Mojo::Asset> and implements
167the following new ones.
168
169=head2 cleanup
170
171 my $bool = $file->cleanup;
172 $file = $file->cleanup($bool);
173
174Delete L</"path"> automatically once the file is not used anymore.
175
176=head2 handle
177
178 my $handle = $file->handle;
179 $file = $file->handle(IO::File->new);
180
181Filehandle, created on demand for L</"path">, which can be generated
182automatically and safely based on L</"tmpdir">.
183
184=head2 path
185
186 my $path = $file->path;
187 $file = $file->path('/home/sri/foo.txt');
188
189File path used to create L</"handle">.
190
191=head2 tmpdir
192
193 my $tmpdir = $file->tmpdir;
194 $file = $file->tmpdir('/tmp');
195
196Temporary directory used to generate L</"path">, defaults to the value of the
197C<MOJO_TMPDIR> environment variable or auto-detection.
198
199=head1 METHODS
200
201L<Mojo::Asset::File> inherits all methods from L<Mojo::Asset> and implements
202the following new ones.
203
204=head2 add_chunk
205
206 $file = $file->add_chunk('foo bar baz');
207
208Add chunk of data.
209
210=head2 contains
211
212 my $position = $file->contains('bar');
213
214Check if asset contains a specific string.
215
216=head2 get_chunk
217
218 my $bytes = $file->get_chunk($offset);
219 my $bytes = $file->get_chunk($offset, $max);
220
221Get chunk of data starting from a specific position, defaults to a maximum
222chunk size of C<131072> bytes (128KiB).
223
224=head2 is_file
225
226 my $bool = $file->is_file;
227
228True, this is a L<Mojo::Asset::File> object.
229
230=head2 move_to
231
232 $file = $file->move_to('/home/sri/bar.txt');
233
234Move asset data into a specific file and disable L</"cleanup">.
235
236=head2 mtime
237
238 my $mtime = $file->mtime;
239
240Modification time of asset.
241
242=head2 new
243
244 my $file = Mojo::Asset::File->new;
245 my $file = Mojo::Asset::File->new(path => '/home/sri/test.txt');
246 my $file = Mojo::Asset::File->new({path => '/home/sri/test.txt'});
247
248Construct a new L<Mojo::Asset::File> object.
249
250=head2 size
251
252 my $size = $file->size;
253
254Size of asset data in bytes.
255
256=head2 slurp
257
258 my $bytes = $file->slurp;
259
260Read all asset data at once.
261
262=head2 to_file
263
264 $file = $file->to_file;
265
266Does nothing but return the invocant, since we already have a
267L<Mojo::Asset::File> object.
268
269=head1 SEE ALSO
270
271L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
272
273=cut
Note: See TracBrowser for help on using the repository browser.