source: main/trunk/greenstone2/perllib/cpan/Mojo/Path.pm@ 32205

Last change on this file since 32205 was 32205, checked in by ak19, 6 years ago

First set of commits to do with implementing the new 'paged_html' output option of PDFPlugin that uses using xpdftools' new pdftohtml. So far tested only on Linux (64 bit), but things work there so I'm optimistically committing the changes since they work. 2. Committing the pre-built Linux binaries of XPDFtools for both 32 and 64 bit built by the XPDF group. 2. To use the correct bitness variant of xpdftools, setup.bash now exports the BITNESS env var, consulted by gsConvert.pl. 3. All the perl code changes to do with using xpdf tools' pdftohtml to generate paged_html and feed it in the desired form into GS(3): gsConvert.pl, PDFPlugin.pm and its parent ConvertBinaryPFile.pm have been modified to make it all work. xpdftools' pdftohtml generates a folder containing an html file and a screenshot for each page in a PDF (as well as an index.html linking to each page's html). However, we want a single html file that contains each individual 'page' html's content in a div, and need to do some further HTML style, attribute and structure modifications to massage the xpdftool output to what we want for GS. In order to parse and manipulate the HTML 'DOM' to do this, we're using the Mojo::DOM package that Dr Bainbridge found and which he's compiled up. Mojo::DOM is therefore also committed in this revision. Some further changes and some display fixes are required, but need to check with the others about that.

File size: 7.8 KB
Line 
1package Mojo::Path;
2use Mojo::Base -base;
3use overload
4 '@{}' => sub { shift->parts },
5 bool => sub {1},
6 '""' => sub { shift->to_string },
7 fallback => 1;
8
9use Mojo::Util qw(decode encode url_escape url_unescape);
10
11has charset => 'UTF-8';
12
13sub canonicalize {
14 my $self = shift;
15
16 my $parts = $self->parts;
17 for (my $i = 0; $i <= $#$parts;) {
18 if (!length $parts->[$i] || $parts->[$i] eq '.' || $parts->[$i] eq '...') {
19 splice @$parts, $i, 1;
20 }
21 elsif ($i < 1 || $parts->[$i] ne '..' || $parts->[$i - 1] eq '..') { $i++ }
22 else { splice @$parts, --$i, 2 }
23 }
24
25 return @$parts ? $self : $self->trailing_slash(undef);
26}
27
28sub clone {
29 my $self = shift;
30
31 my $clone = $self->new;
32 if (exists $self->{charset}) { $clone->{charset} = $self->{charset} }
33 if (my $parts = $self->{parts}) {
34 $clone->{$_} = $self->{$_} for qw(leading_slash trailing_slash);
35 $clone->{parts} = [@$parts];
36 }
37 else { $clone->{path} = $self->{path} }
38
39 return $clone;
40}
41
42sub contains { $_[1] eq '/' || $_[0]->to_route =~ m!^\Q$_[1]\E(?:/|$)! }
43
44sub leading_slash { shift->_parse(leading_slash => @_) }
45
46sub merge {
47 my ($self, $path) = @_;
48
49 # Replace
50 return $self->parse($path) if $path =~ m!^/!;
51
52 # Merge
53 pop @{$self->parts} unless $self->trailing_slash;
54 $path = $self->new($path);
55 push @{$self->parts}, @{$path->parts};
56 return $self->trailing_slash($path->trailing_slash);
57}
58
59sub new { @_ > 1 ? shift->SUPER::new->parse(@_) : shift->SUPER::new }
60
61sub parse {
62 my $self = shift;
63 $self->{path} = shift;
64 delete @$self{qw(leading_slash parts trailing_slash)};
65 return $self;
66}
67
68sub parts { shift->_parse(parts => @_) }
69
70sub to_abs_string {
71 my $path = shift->to_string;
72 return $path =~ m!^/! ? $path : "/$path";
73}
74
75sub to_dir {
76 my $clone = shift->clone;
77 pop @{$clone->parts} unless $clone->trailing_slash;
78 return $clone->trailing_slash(!!@{$clone->parts});
79}
80
81sub to_route {
82 my $clone = shift->clone;
83 return '/' . join '/', @{$clone->parts}, $clone->trailing_slash ? '' : ();
84}
85
86sub to_string {
87 my $self = shift;
88
89 # Path
90 my $charset = $self->charset;
91 if (defined(my $path = $self->{path})) {
92 $path = encode $charset, $path if $charset;
93 return url_escape $path, '^A-Za-z0-9\-._~!$&\'()*+,;=%:@/';
94 }
95
96 # Build path
97 my @parts = @{$self->parts};
98 @parts = map { encode $charset, $_ } @parts if $charset;
99 my $path = join '/',
100 map { url_escape $_, '^A-Za-z0-9\-._~!$&\'()*+,;=:@' } @parts;
101 $path = "/$path" if $self->leading_slash;
102 $path = "$path/" if $self->trailing_slash;
103 return $path;
104}
105
106sub trailing_slash { shift->_parse(trailing_slash => @_) }
107
108sub _parse {
109 my ($self, $name) = (shift, shift);
110
111 unless ($self->{parts}) {
112 my $path = url_unescape delete($self->{path}) // '';
113 my $charset = $self->charset;
114 $path = decode($charset, $path) // $path if $charset;
115 $self->{leading_slash} = $path =~ s!^/!!;
116 $self->{trailing_slash} = $path =~ s!/$!!;
117 $self->{parts} = [split '/', $path, -1];
118 }
119
120 return $self->{$name} unless @_;
121 $self->{$name} = shift;
122 return $self;
123}
124
1251;
126
127=encoding utf8
128
129=head1 NAME
130
131Mojo::Path - Path
132
133=head1 SYNOPSIS
134
135 use Mojo::Path;
136
137 # Parse
138 my $path = Mojo::Path->new('/foo%2Fbar%3B/baz.html');
139 say $path->[0];
140
141 # Build
142 my $path = Mojo::Path->new('/i/♥');
143 push @$path, 'mojolicious';
144 say "$path";
145
146=head1 DESCRIPTION
147
148L<Mojo::Path> is a container for paths used by L<Mojo::URL>, based on
149L<RFC 3986|http://tools.ietf.org/html/rfc3986>.
150
151=head1 ATTRIBUTES
152
153L<Mojo::Path> implements the following attributes.
154
155=head2 charset
156
157 my $charset = $path->charset;
158 $path = $path->charset('UTF-8');
159
160Charset used for encoding and decoding, defaults to C<UTF-8>.
161
162 # Disable encoding and decoding
163 $path->charset(undef);
164
165=head1 METHODS
166
167L<Mojo::Path> inherits all methods from L<Mojo::Base> and implements the
168following new ones.
169
170=head2 canonicalize
171
172 $path = $path->canonicalize;
173
174Canonicalize path by resolving C<.> and C<..>, in addition C<...> will be
175treated as C<.> to protect from path traversal attacks.
176
177 # "/foo/baz"
178 Mojo::Path->new('/foo/./bar/../baz')->canonicalize;
179
180 # "/../baz"
181 Mojo::Path->new('/foo/../bar/../../baz')->canonicalize;
182
183 # "/foo/bar"
184 Mojo::Path->new('/foo/.../bar')->canonicalize;
185
186=head2 clone
187
188 my $clone = $path->clone;
189
190Return a new L<Mojo::Path> object cloned from this path.
191
192=head2 contains
193
194 my $bool = $path->contains('/i/♥/mojolicious');
195
196Check if path contains given prefix.
197
198 # True
199 Mojo::Path->new('/foo/bar')->contains('/');
200 Mojo::Path->new('/foo/bar')->contains('/foo');
201 Mojo::Path->new('/foo/bar')->contains('/foo/bar');
202
203 # False
204 Mojo::Path->new('/foo/bar')->contains('/f');
205 Mojo::Path->new('/foo/bar')->contains('/bar');
206 Mojo::Path->new('/foo/bar')->contains('/whatever');
207
208=head2 leading_slash
209
210 my $bool = $path->leading_slash;
211 $path = $path->leading_slash($bool);
212
213Path has a leading slash. Note that this method will normalize the path and
214that C<%2F> will be treated as C</> for security reasons.
215
216 # "/foo/bar"
217 Mojo::Path->new('foo/bar')->leading_slash(1);
218
219 # "foo/bar"
220 Mojo::Path->new('/foo/bar')->leading_slash(0);
221
222=head2 merge
223
224 $path = $path->merge('/foo/bar');
225 $path = $path->merge('foo/bar');
226 $path = $path->merge(Mojo::Path->new);
227
228Merge paths. Note that this method will normalize both paths if necessary and
229that C<%2F> will be treated as C</> for security reasons.
230
231 # "/baz/yada"
232 Mojo::Path->new('/foo/bar')->merge('/baz/yada');
233
234 # "/foo/baz/yada"
235 Mojo::Path->new('/foo/bar')->merge('baz/yada');
236
237 # "/foo/bar/baz/yada"
238 Mojo::Path->new('/foo/bar/')->merge('baz/yada');
239
240=head2 new
241
242 my $path = Mojo::Path->new;
243 my $path = Mojo::Path->new('/foo%2Fbar%3B/baz.html');
244
245Construct a new L<Mojo::Path> object and L</"parse"> path if necessary.
246
247=head2 parse
248
249 $path = $path->parse('/foo%2Fbar%3B/baz.html');
250
251Parse path.
252
253=head2 to_abs_string
254
255 my $str = $path->to_abs_string;
256
257Turn path into an absolute string.
258
259 # "/i/%E2%99%A5/mojolicious"
260 Mojo::Path->new('/i/%E2%99%A5/mojolicious')->to_abs_string;
261 Mojo::Path->new('i/%E2%99%A5/mojolicious')->to_abs_string;
262
263=head2 parts
264
265 my $parts = $path->parts;
266 $path = $path->parts([qw(foo bar baz)]);
267
268The path parts. Note that this method will normalize the path and that C<%2F>
269will be treated as C</> for security reasons.
270
271 # Part with slash
272 push @{$path->parts}, 'foo/bar';
273
274=head2 to_dir
275
276 my $dir = $route->to_dir;
277
278Clone path and remove everything after the right-most slash.
279
280 # "/i/%E2%99%A5/"
281 Mojo::Path->new('/i/%E2%99%A5/mojolicious')->to_dir->to_abs_string;
282
283 # "i/%E2%99%A5/"
284 Mojo::Path->new('i/%E2%99%A5/mojolicious')->to_dir->to_abs_string;
285
286=head2 to_route
287
288 my $route = $path->to_route;
289
290Turn path into a route.
291
292 # "/i/♥/mojolicious"
293 Mojo::Path->new('/i/%E2%99%A5/mojolicious')->to_route;
294 Mojo::Path->new('i/%E2%99%A5/mojolicious')->to_route;
295
296=head2 to_string
297
298 my $str = $path->to_string;
299
300Turn path into a string.
301
302 # "/i/%E2%99%A5/mojolicious"
303 Mojo::Path->new('/i/%E2%99%A5/mojolicious')->to_string;
304
305 # "i/%E2%99%A5/mojolicious"
306 Mojo::Path->new('i/%E2%99%A5/mojolicious')->to_string;
307
308=head2 trailing_slash
309
310 my $bool = $path->trailing_slash;
311 $path = $path->trailing_slash($bool);
312
313Path has a trailing slash. Note that this method will normalize the path and
314that C<%2F> will be treated as C</> for security reasons.
315
316 # "/foo/bar/"
317 Mojo::Path->new('/foo/bar')->trailing_slash(1);
318
319 # "/foo/bar"
320 Mojo::Path->new('/foo/bar/')->trailing_slash(0);
321
322=head1 OPERATORS
323
324L<Mojo::Path> overloads the following operators.
325
326=head2 array
327
328 my @parts = @$path;
329
330Alias for L</"parts">. Note that this will normalize the path and that C<%2F>
331will be treated as C</> for security reasons.
332
333 say $path->[0];
334 say for @$path;
335
336=head2 bool
337
338 my $bool = !!$path;
339
340Always true.
341
342=head2 stringify
343
344 my $str = "$path";
345
346Alias for L</"to_string">.
347
348=head1 SEE ALSO
349
350L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
351
352=cut
Note: See TracBrowser for help on using the repository browser.