source: main/trunk/greenstone2/perllib/cpan/Mojolicious/Command/get.pm@ 32205

Last change on this file since 32205 was 32205, checked in by ak19, 6 years ago

First set of commits to do with implementing the new 'paged_html' output option of PDFPlugin that uses using xpdftools' new pdftohtml. So far tested only on Linux (64 bit), but things work there so I'm optimistically committing the changes since they work. 2. Committing the pre-built Linux binaries of XPDFtools for both 32 and 64 bit built by the XPDF group. 2. To use the correct bitness variant of xpdftools, setup.bash now exports the BITNESS env var, consulted by gsConvert.pl. 3. All the perl code changes to do with using xpdf tools' pdftohtml to generate paged_html and feed it in the desired form into GS(3): gsConvert.pl, PDFPlugin.pm and its parent ConvertBinaryPFile.pm have been modified to make it all work. xpdftools' pdftohtml generates a folder containing an html file and a screenshot for each page in a PDF (as well as an index.html linking to each page's html). However, we want a single html file that contains each individual 'page' html's content in a div, and need to do some further HTML style, attribute and structure modifications to massage the xpdftool output to what we want for GS. In order to parse and manipulate the HTML 'DOM' to do this, we're using the Mojo::DOM package that Dr Bainbridge found and which he's compiled up. Mojo::DOM is therefore also committed in this revision. Some further changes and some display fixes are required, but need to check with the others about that.

File size: 7.8 KB
Line 
1package Mojolicious::Command::get;
2use Mojo::Base 'Mojolicious::Command';
3
4use Mojo::DOM;
5use Mojo::IOLoop;
6use Mojo::JSON qw(to_json j);
7use Mojo::JSON::Pointer;
8use Mojo::URL;
9use Mojo::UserAgent;
10use Mojo::Util qw(decode encode getopt);
11use Scalar::Util 'weaken';
12
13has description => 'Perform HTTP request';
14has usage => sub { shift->extract_usage };
15
16sub run {
17 my ($self, @args) = @_;
18
19 # Data from STDIN
20 vec(my $r = '', fileno(STDIN), 1) = 1;
21 my $in = !-t STDIN && select($r, undef, undef, 0) ? join '', <STDIN> : undef;
22
23 my $ua = Mojo::UserAgent->new(ioloop => Mojo::IOLoop->singleton);
24 my %form;
25 getopt \@args,
26 'C|charset=s' => \my $charset,
27 'c|content=s' => \$in,
28 'f|form=s' => sub { _form(\%form) if $_[1] =~ /^(.+)=(\@?)(.+)$/ },
29 'H|header=s' => \my @headers,
30 'i|inactivity-timeout=i' => sub { $ua->inactivity_timeout($_[1]) },
31 'k|insecure' => sub { $ua->insecure(1) },
32 'M|method=s' => \(my $method = 'GET'),
33 'o|connect-timeout=i' => sub { $ua->connect_timeout($_[1]) },
34 'r|redirect' => \my $redirect,
35 'S|response-size=i' => sub { $ua->max_response_size($_[1]) },
36 'u|user=s' => \my $user,
37 'v|verbose' => \my $verbose;
38
39 @args = map { decode 'UTF-8', $_ } @args;
40 die $self->usage unless my $url = shift @args;
41 my $selector = shift @args;
42
43 # Parse header pairs
44 my %headers = map { /^\s*([^:]+)\s*:\s*(.*+)$/ ? ($1, $2) : () } @headers;
45
46 # Detect proxy for absolute URLs
47 $url !~ m!^/! ? $ua->proxy->detect : $ua->server->app($self->app);
48 $url = Mojo::URL->new($url)->userinfo($user) if $user;
49 $ua->max_redirects(10) if $redirect;
50
51 my $buffer = '';
52 $ua->on(
53 start => sub {
54 my ($ua, $tx) = @_;
55
56 # Verbose
57 weaken $tx;
58 $tx->res->content->on(
59 body => sub { warn _header($tx->req), _header($tx->res) })
60 if $verbose;
61
62 # Stream content (ignore redirects)
63 $tx->res->content->unsubscribe('read')->on(
64 read => sub {
65 return if $redirect && $tx->res->is_redirect;
66 defined $selector ? ($buffer .= pop) : print pop;
67 }
68 );
69 }
70 );
71
72 # Switch to verbose for HEAD requests
73 $verbose = 1 if $method eq 'HEAD';
74 STDOUT->autoflush(1);
75 my @content = %form ? (form => \%form) : defined $in ? ($in) : ();
76 my $tx = $ua->start($ua->build_tx($method, $url, \%headers, @content));
77 my $res = $tx->result;
78
79 # JSON Pointer
80 return unless defined $selector;
81 return _json($buffer, $selector) if !length $selector || $selector =~ m!^/!;
82
83 # Selector
84 $charset //= $res->content->charset || $res->default_charset;
85 _select($buffer, $selector, $charset, @args);
86}
87
88sub _form { push @{$_[0]{$1}}, $2 ? {file => $3} : $3 }
89
90sub _header { $_[0]->build_start_line, $_[0]->headers->to_string, "\n\n" }
91
92sub _json {
93 return unless my $data = j(shift);
94 return unless defined($data = Mojo::JSON::Pointer->new($data)->get(shift));
95 _say(ref $data eq 'HASH' || ref $data eq 'ARRAY' ? to_json($data) : $data);
96}
97
98sub _say { length && say encode('UTF-8', $_) for @_ }
99
100sub _select {
101 my ($buffer, $selector, $charset, @args) = @_;
102
103 # Keep a strong reference to the root
104 $buffer = decode($charset, $buffer) // $buffer if $charset;
105 my $dom = Mojo::DOM->new($buffer);
106 my $results = $dom->find($selector);
107
108 while (defined(my $command = shift @args)) {
109
110 # Number
111 ($results = $results->slice($command)) and next if $command =~ /^\d+$/;
112
113 # Text
114 return _say($results->map('text')->each) if $command eq 'text';
115
116 # All text
117 return _say($results->map('all_text')->each) if $command eq 'all';
118
119 # Attribute
120 return _say($results->map(attr => $args[0] // '')->each)
121 if $command eq 'attr';
122
123 # Unknown
124 die qq{Unknown command "$command".\n};
125 }
126
127 _say($results->each);
128}
129
1301;
131
132=encoding utf8
133
134=head1 NAME
135
136Mojolicious::Command::get - Get command
137
138=head1 SYNOPSIS
139
140 Usage: APPLICATION get [OPTIONS] URL [SELECTOR|JSON-POINTER] [COMMANDS]
141
142 ./myapp.pl get /
143 ./myapp.pl get -H 'Accept: text/html' /hello.html 'head > title' text
144 ./myapp.pl get //sri:secr3t@/secrets.json /1/content
145 mojo get mojolicious.org
146 mojo get -v -r -o 25 -i 50 google.com
147 mojo get -v -H 'Host: mojolicious.org' -H 'Accept: */*' mojolicious.org
148 mojo get -u 'sri:s3cret' https://mojolicious.org
149 mojo get mojolicious.org > example.html
150 mojo get -M PUT mojolicious.org < example.html
151 mojo get -f 'q=Mojolicious' -f 'size=5' https://metacpan.org/search
152 mojo get -M POST -f '[email protected]' mojolicious.org
153 mojo get mojolicious.org 'head > title' text
154 mojo get mojolicious.org .footer all
155 mojo get mojolicious.org a attr href
156 mojo get mojolicious.org '*' attr id
157 mojo get mojolicious.org 'h1, h2, h3' 3 text
158 mojo get https://fastapi.metacpan.org/v1/author/SRI /name
159 mojo get -H 'Host: example.com' http+unix://%2Ftmp%2Fmyapp.sock/index.html
160
161 Options:
162 -C, --charset <charset> Charset of HTML/XML content, defaults
163 to auto-detection
164 -c, --content <content> Content to send with request
165 -f, --form <name=value> One or more form values and file
166 uploads
167 -H, --header <name:value> One or more additional HTTP headers
168 -h, --help Show this summary of available options
169 --home <path> Path to home directory of your
170 application, defaults to the value of
171 MOJO_HOME or auto-detection
172 -i, --inactivity-timeout <seconds> Inactivity timeout, defaults to the
173 value of MOJO_INACTIVITY_TIMEOUT or 20
174 -k, --insecure Do not require a valid TLS certificate
175 to access HTTPS sites
176 -M, --method <method> HTTP method to use, defaults to "GET"
177 -m, --mode <name> Operating mode for your application,
178 defaults to the value of
179 MOJO_MODE/PLACK_ENV or "development"
180 -o, --connect-timeout <seconds> Connect timeout, defaults to the value
181 of MOJO_CONNECT_TIMEOUT or 10
182 -r, --redirect Follow up to 10 redirects
183 -S, --response-size <size> Maximum response size in bytes,
184 defaults to 2147483648 (2GiB)
185 -u, --user <userinfo> Alternate mechanism for specifying
186 colon-separated username and password
187 -v, --verbose Print request and response headers to
188 STDERR
189
190=head1 DESCRIPTION
191
192L<Mojolicious::Command::get> is a command line interface for
193L<Mojo::UserAgent>.
194
195This is a core command, that means it is always enabled and its code a good
196example for learning to build new commands, you're welcome to fork it.
197
198See L<Mojolicious::Commands/"COMMANDS"> for a list of commands that are
199available by default.
200
201=head1 ATTRIBUTES
202
203L<Mojolicious::Command::get> performs requests to remote hosts or local
204applications.
205
206=head2 description
207
208 my $description = $get->description;
209 $get = $get->description('Foo');
210
211Short description of this command, used for the command list.
212
213=head2 usage
214
215 my $usage = $get->usage;
216 $get = $get->usage('Foo');
217
218Usage information for this command, used for the help screen.
219
220=head1 METHODS
221
222L<Mojolicious::Command::get> inherits all methods from L<Mojolicious::Command>
223and implements the following new ones.
224
225=head2 run
226
227 $get->run(@ARGV);
228
229Run this command.
230
231=head1 SEE ALSO
232
233L<Mojolicious>, L<Mojolicious::Guides>, L<https://mojolicious.org>.
234
235=cut
Note: See TracBrowser for help on using the repository browser.