source: gsdl/tags/gsdl-2_71-distribution/gsdl/packages/w3mir/w3mir-1.0.8/w3http.pm@ 14121

Last change on this file since 14121 was 719, checked in by davidb, 25 years ago

added w3mir package

  • Property svn:keywords set to Author Date Id Revision
File size: 24.5 KB
Line 
1# -*- perl -*-
2# w3http.pm --- send http requests, janl's 12" mix for w3mir
3#
4$VERSION=1.0.22;
5#
6# This implements http/1.0 requests. We'll have problems with http/0.9
7# This is in no way specific to w3mir.
8#
9# IMPORTANT: The caller should initialize the C locale for some of the
10# things here to work correctly (specifically the strftime function).
11#
12# This is a rewrite of http.pl by Oscar Nierstrasz; I copied the code he he
13# copied from the camel book. Some functions written by Gorm Haug Eriksen
14# ([email protected]) has been used as is.
15#
16# Contributors:
17# Nicolai Langfeldt, [email protected]
18# Gorm Haug Eriksen, [email protected]
19# Chris Szurgot, [email protected]
20# Bernhard Weisshuh, [email protected]
21# Copying and modification is governed by the "Artistic License" enclosed in
22# the w3mir distribution
23#
24# gorm :
25# &w3http::get_last_modified return the last modified stamp on a file in
26# the right format for use with http
27#
28# janl:
29# &http::query: Send a http query. A completely general function to send a
30# http query. Will extract header values, http response code and, optionaly,
31# convert text files to local linefeed format.
32#
33# Variables to examine after a query
34# $w3http::document: The document returned by the query, if any.
35# $w3http::doclen: The length of the document
36# $w3http::result: The numerical http result code. It may take these values:
37# - Normal HTTP reply codes
38# - 98: OS error, permanent, errormessage in $!
39# - 99: Other permanent error: see $restext.
40# - 100: Transient error: Timeout/connection broken
41# $w3http::restext: The english(?) HTTP result or w3http generated message
42# $w3http::header: The http header returned.
43# $w3http::plaintexthtml: 1 if this doc is non-content-encoded text/html
44# (as opposed to content-encoding: compressed content-type: text/html
45# which needs decompression before we can inspect the html)
46# The tests are somewhat longwinded so I do it just once here.
47# %w3http::headval: Associative array of header values
48# $w3http::headval{'CONTENT-TYPE'}: Derived content type, stripped of charset
49# qualifiers and other distractions.
50# $w3http::xfbytes: Transfered bytes, cumulative. Document part only.
51# $w3http::headbytes: Bytes of headers received, cumulative.
52#
53# Variables that change http's behaviour/requests:
54# $w3http::agent: User agent, default is basename of $0
55# $w3http::from: Request is from, default is user@host
56# $w3http::version: The http version to use, only 1.0 is known to me.
57# $w3http::timeout: How long to wait for new data to arrive, default is 600sec
58# $w3http::buflen: Network read buffer size, default is 4096. It might give a
59# speedup to tune this for specific servers' so it matches their send
60# size. This size can be detected if we want to, I think.
61# $w3http::debug: 1 debuging output, 2, more, 3 queries and replies
62# $w3http::verbose: 0: say nothing, 1: print progress info
63# $w3http::convert: Convert text/* documents to local newline convention?
64# The default is to do it.
65# $w3http::proxyserver: The name of the proxyserver to use.
66# $w3http::proxyport: The port of the proxyserver to use. 0 if no proxyserver.
67# $w3http::proxyuser: If this is set proxy authentication will be used.
68# $w3http::proxypasswd: The password for proxy authentication
69#
70# Things gotten from main:
71# - $main::win32: 1 if win32 restrictions apply to this system
72# - $main::nulldevice: Bit sink file/device on this system.
73#
74# History (european date format dd/mm/yy):
75# janl ??/??/95 -- Rewrite finished
76# szurgot ??/??/95 -- Win32 compatability
77# janl 16/05/96 -- Added SAVEBIN option, based on idea by szurgot
78# szurgot 03/05/96 -- Corrected typo in check for content-length against
79# retreive document length. Added test for zero-length
80# documents (Not retreived because not-modified)
81# szurgot 19/05/96 -- Win32 adaptions, fixes.
82# janl 19/05/96 -- Chris won an argument, and janl simplified http
83# retrival loop (-> version 1.0.4)
84# janl 09/09/96 -- Incorporated a patch submited by Michael Kriby -> 1.0.5
85# janl 16/09/96 -- Support for authorization. -> 1.0.6
86# janl 27/09/96 -- Support for Accept header, lack pointed out by
87# [email protected]: ... HTTP/1.1 (§14.1) says
88# ``If no Accept header field is present, then it is
89# assumed that the client accepts all media types,
90# earlier versions of the protocol suggest that only
91# text/plain and text/html will be offered by default.''
92# This contradicts my memory of a http/1.0 draft.
93# Also added $ACCEPT option.
94# janl 20/10/96 -- Now uses HTTP::Date to produce HTTP timestamps -> 1.0.7
95# janl 27/10/96 -- Didn't use to check if gethostbyname worked -> 1.0.8
96# janl 02/12/96 -- Forgot a unlink when renaming temporary files.
97# janl 21/02/97 -- Multipele $ACCEPT options work. -> 1.0.9
98# janl 19/03/97 -- Now issues Host: header -> 1.0.10
99# janl 10/04/97 -- Changed from wwwurl to URI::URL, and various related
100# changes. -> 1.0.11
101# janl 09/05/97 -- Microsoft ISS servers are _so_ broken -> 1.0.12
102# (don't close the write end of the HTTP socket after
103# sending a query to them)
104# janl 12/05/97 -- New version of perl caught some typos, fixed
105# longstanding bug in the newline conversion bit.
106# -> 1.0.13
107# janl 06/06/97 -- Demand Loading of MIME::BASE64 -> 1.0.14
108# janl 01/12/97 -- FAT filesystems drops LSB of modtime. Patch from
109# Greg Lindhorst ([email protected])
110# -- whoami does not exist on win32, hardwire a default
111# value (unknown) (also Greg L.) -> 1.0.15
112# janl 01/22/98 -- Proxy authentication as outlined by Christian Geuer
113# janl 02/20/98 -- Complex 'content-type' headers handled. -> 1.0.17
114# janl 04/20/98 -- Only newline convert text/html, everything else is
115# handled as binary. -> 1.0.18
116# janl 12/05/98 -- Store tmpfile in its final destination directory
117# avoiding asking movefile move it across filesystems.
118# -> 1.0.19
119# janl 01/08/98 -- Timeout fix from Michael Gusev, also flag short doc
120# as error.
121# janl 24/09/98 -- Better error handling -> 1.0.20
122# bkw 17/12/98 -- Fixed problem with tempfile-generation when
123# running in forget-mode (-f)
124# janl 05/01/99 -- Referer: dropped if argument not true -> 1.0.21
125# janl 13/04/99 -- Added workaround for broken win32 perl resolving.
126
127package w3http;
128
129require 5.002;
130use Socket;
131use HTTP::Date;
132use Sys::Hostname;
133use URI::URL;
134
135# Suplementary libwww-perl:
136sub URI::URL::_generic::basename {
137 my $self = shift;
138 my @p = $self->path_components;
139 my $old = $p[-1];
140 if (@_) {
141 splice(@p, -1, 1, shift);
142 $self->path_components(@p)
143 }
144 $old;
145}
146
147
148END {
149 # Remove tmp file and such in here. That means that main:: gotta catch
150 # interrupt signals and exit on them, so ENDs are executed.
151}
152
153use strict;
154# Global variables, we want to share them:
155use vars qw($GET $HEAD $GETURL $HEADURL $IFMOD $IFMODF $AUTHORIZ $REFERER);
156use vars qw($SAVEBIN $ACCEPT $NOUSER $FREEHEAD $agent $version $timeout);
157use vars qw($debug $convert $proxyserver $proxyport $xfbytes $headbytes);
158use vars qw($verbose $result $restext $header $document);
159use vars qw($plaintexthtml %headval $progress $doclen $proxyuser);
160use vars qw($proxypasswd);
161
162my $hasAlarm; # Win32 does not have any alarm
163my $chime; # Has the alarm gone off yet?
164my %address; # My own DNS cache
165my $savALRM; # Saved ALRM handler
166my $savPIPE; # Saved PIPE handler
167
168# The main:: program should detect if we're running on win32 or not,
169# somehow
170if ($main::win32) {
171 warn "win32\n";
172 # Compensate for lacks of win32 perl.
173 $hasAlarm=0;
174 # Seems to be unavailable in win32/perl5.001. It has to be in 5.003!
175# eval "sub sockaddr_in {
176# ($port, $thataddr) = @_;
177# $sockaddr = 'S n a4 x8';
178# return pack($sockaddr, &AF_INET, $port, $thataddr);
179# }";
180} else {
181 $hasAlarm=1;
182}
183
184
185# Find out some things
186my $thishost = hostname();
187my $proto = getprotobyname("tcp");
188
189(my $name, undef) = gethostbyname($thishost);
190chomp(my $user = $ENV{'LOGNAME'} || $ENV{'USER'} || `whoami` || 'unknown');
191my $from = "$user\@$name";
192
193my $nl = "\r\n";
194# Default values, change by assignment in using-program.
195$agent = $0; $agent =~ s~.*/~~; # Basename
196$version= "1.0";
197$timeout= 600; # Timeout while waiting for data/connection
198my $buflen = 4096; # recv buffer length
199$debug = 0; # Debuging output?
200$convert = 1; # Convert newlines of text docs to local format
201$proxyserver=''; # Proxy server.
202$proxyport=0; # Proxy server port. 0 if no proxy.
203$proxyuser=''; # Username for proxy authentication
204$proxypasswd=''; # Password for proxy authentication
205$xfbytes=0; # 0 bytes transfered, cumulative
206$headbytes=0; # 0 bytes of headers, cumulative
207$doclen=0; # 0 bytes in doc, pr. document
208my $tmpfile="w3mir$$.tmp"; # Temporary filename
209$verbose=0; # Verbosenes, 0: silent, 1: progress info
210
211# Query opcodes
212$GET = 1; # GET query. Arg: host,port,path
213$HEAD = 2; # HEAD query. Arg: host,port,path
214$GETURL = 3; # GET query. Arg: url
215$HEADURL = 4; # HEAD query. Arg: url
216# Here we lack PUT, which is not implemented
217# Modify query thus:
218$IFMOD = 101; # If-modified after: Arg: HTTP-date-str
219$IFMODF = 102; # If-modified after file: Arg: local-file-name
220$AUTHORIZ= 103; # Basic authorization. Arg: 'user:password'
221$REFERER = 104; # Referer: Arg: Referer
222$SAVEBIN = 105; # Write binary files to disk. Arg: File name
223 # If this opcode is used then main must provide
224 # a &main::movefile(oldname,newname) procedure
225 # that handles moving the tmp file to the
226 # final name/location.
227$ACCEPT = 106; # Accept header value: Arg: value
228$NOUSER = 107; # Don't insert user header. Arg: none
229$FREEHEAD= 999; # Freeform header, one line. Arg: header
230
231sub query {
232 # Build and send a HTTP query. And also receive response - janl 95/09/18
233 #
234 # Return codes: 0 if it didn't work. 1 if it did work.
235 # HTTP style result code in w3http::$result and message in w3http::$restext
236
237 # We do next to no argument type checking btw.
238
239 my($host,$port,$request,$query,$method,$inp,$linp,$saveto,$save,$arg);
240 my($start,$wantbytes,$thataddr,$err,$headb,$tmpf,$ldoc,$nouser,$q,$accept);
241 my($origreq,$req_o,$plaintext);
242
243 # Something ought to be said
244 $result=99;
245 $restext='w3http: internal error';
246 $nouser=0;
247
248 if ($version ne '1.0') {
249 warn "Unknown HTTP version $version, no request sent\n";
250 return 0;
251 }
252
253 $accept=$saveto=$query='';
254
255 # Find out what to ask for
256
257 while (defined($arg=shift)) {
258 if ($arg == $GET) {
259 $host=shift;
260 $port=shift;
261 $request=shift;
262 $req_o=url 'http://'.$host.':'.$port.$request;
263 if ($proxyport) {
264 $query.='GET http://'.$req_o->as_string;
265 } else {
266 $query.='GET '.$req_o->epath;
267 }
268 $query.=' HTTP/'.$version.$nl;
269 } elsif ($arg == $HEAD) {
270 $host=shift;
271 $port=shift;
272 $request=shift;
273 $req_o=url 'http://'.$host.':'.$port.$request;
274 if ($proxyport) {
275 $query.='HEAD '.$req_o->as_string;
276 } else {
277 $query.='HEAD '.$req_o->epath;
278 }
279 $query.=' HTTP/'.$version.$nl;
280 } elsif ($arg == $GETURL) {
281 $req_o=shift;
282 $req_o=url $req_o unless ref $req_o;
283 ($method,undef,undef,$host,$port,$request,undef,$q) = $req_o->crack;
284 if ($proxyport) {
285 $query.='GET '.$req_o->as_string;
286 } else {
287 $q=$req_o->equery;
288 $query.='GET '.$request.($q?"?$q":'');
289 }
290 $query.=' HTTP/'.$version.$nl;
291 } elsif ($arg == $HEADURL) {
292 $req_o=shift;
293 $req_o=url $req_o unless ref $req_o;
294 if ($proxyport) {
295 $query.='HEAD '.$req_o->as_string;
296 } else {
297 $q=$req_o->equery;
298 $query.='HEAD '.$req_o->epath.($q?"?$q":'');
299 }
300 $query.=' HTTP/'.$version.$nl;
301 } elsif ($arg == $IFMOD) {
302 $query.='If-Modified-Since: '.(shift).$nl;
303 } elsif ($arg == $IFMODF) {
304 $query.='If-Modified-Since: '.&last_modified(shift).$nl;
305 } elsif ($arg == $AUTHORIZ) {
306 # Demand-load MIME::Base64
307 if (!defined(&MIME::Base64::encode)) {
308 eval "use MIME::Base64;";
309 die "w3http: Could not load MIME::Base64 module necessary for authentication\n"
310 unless defined(&MIME::Base64::encode);
311 }
312 $query.='Authorization: Basic '.MIME::Base64::encode(shift,'').$nl;
313 } elsif ($arg == $REFERER) {
314 my($referer)=shift;
315 $query.='Referer: '.$referer.$nl if $referer;
316 } elsif ($arg == $SAVEBIN) {
317 $saveto=shift;
318 } elsif ($arg == $ACCEPT) {
319 $accept.='Accept: '.(shift).$nl;
320 } elsif ($arg == $NOUSER) {
321 $nouser=1;
322 } elsif ($arg == $FREEHEAD) {
323 $query.=(shift).$nl;
324 } else {
325 warn "Unknown http query opcode: $arg\n";
326 }
327 # Insert the last parts of the query:
328 }
329
330 $query.='Host: '.$req_o->netloc.$nl;
331 $query.='From: '.$from.$nl unless $nouser;
332
333 $accept='Accept: */*'.$nl unless $accept;
334
335 if ($proxyport) {
336 # Use proxy instead of originserver
337 $host=$proxyserver;
338 $port=$proxyport;
339
340 # Add authentication stuff to query
341 if ($proxyuser) {
342 # Demand-load MIME::Base64
343 if (!defined(&MIME::Base64::encode)) {
344 eval "use MIME::Base64;";
345 die "w3http: Could not load MIME::Base64 module necessary for authentication\n"
346 unless defined(&MIME::Base64::encode);
347 }
348
349 $query.='Proxy-Authorization: Basic '.
350 MIME::Base64::encode($proxyuser.':'.$proxypasswd);
351
352 print STDERR "\nProxyuser: [$proxyuser]\nProxypasswd: [$proxypasswd]\n"
353 if $debug>=2;
354 }
355 }
356
357 $query.='User-Agent: '.$agent.$nl.$accept.$nl;
358
359 # If we're using proxy then set up things...
360 print STDERR "\nQUERY:\n",$query,"---\n" if $debug>=2;
361
362 # win32 fix: this should be added in case of troubles with
363 # gethostbyname. possible reason: nameserver down?
364 if ($host =~ /^\d+(\.\d+){3}$/) {
365 # in case gethostbyname will not work ... ;-)
366 $address{$host} = pack 'C4', (split /\./, $host);
367 }
368
369 # Find out who to ask, check if we know already
370 if (exists($address{$host})) {
371 # We know
372 $thataddr=$address{$host};
373 } else {
374 # Cache miss, get and remember.
375 (my $fqdn, undef, undef, undef, $thataddr) = gethostbyname($host);
376 # Hostname lookup failure? Cache even misses.
377 if (defined($fqdn)) {
378 print STDERR "Lookup of $host:\nFQDN: $fqdn\n"
379 if $debug;
380 $address{$host}=$thataddr;
381 $address{$fqdn}=$thataddr if $fqdn ne $host;
382 } else {
383 $thataddr=$address{$host}=undef;
384 }
385 }
386
387 # Check if lookup failure, return
388 if (!defined($thataddr)) {
389 $restext='Host lookup failure';
390 return;
391 }
392
393 $port=80 unless defined($port) && $port;
394
395 # When connected we might receive SIGPIPE. I'm not sure if the
396 # default behaviour of dying is beneficial in that case. If we get
397 # alarm a timeout has expired.
398 $savPIPE = $SIG{'PIPE'};
399 $savALRM = $SIG{'ALRM'};
400
401 $chime=0; # There has been no alarm yet
402 $SIG{'ALRM'} = \&timeout;
403 $SIG{'PIPE'} = \&ignore;
404
405 # Close the socket, just in case, and ignore error returns
406 close(FS);
407
408 socket(FS, AF_INET, SOCK_STREAM, $proto) or return &oserror;
409 warn "Got my socks on\n" if $debug;
410
411 my $paddr = sockaddr_in($port, $thataddr);
412 connect(FS, $paddr) or return &oserror;
413 warn "Connected\n" if $debug;
414
415 # Arrange timeout
416 alarm($timeout) if $hasAlarm;
417
418 # We have, in fact, received SIGPIPE on this line:
419 send(FS,$query,0) or return &oserror;
420
421 if ($chime) {
422 $result=100;
423 $restext='timeout sending query';
424 return &resetsign;
425 }
426
427 $header='';
428 $document='';
429 $inp=' 'x$buflen;
430 $doclen=$chime=$plaintext=$plaintexthtml=$save=0;
431
432 # Breaks some M$ ISS servers:
433 # shutdown(FS,1); # Half-close socket, sending now not allowed
434
435 print STDERR ", receiving header" if $verbose>0;
436
437 # Retrive HTTP response HEADER. Why do I use recv and not <FS>?
438 # Because then the timeout can work correctly!
439 while (1) {
440 # Set up alarm to ensure recv returns within a reasonable timeframe
441 alarm($timeout) if $hasAlarm;
442 $err = recv(FS,$inp,$buflen,0);
443 # recv returned, cancel alarm.
444 alarm(0) if $hasAlarm;
445
446 # If there has been a timeout, then we quit now. The recv man page
447 # does not seem to allow recv to return the bytes received up to
448 # the timeout.
449 if ($chime) {
450 $result=100;
451 $restext='timeout fetching document';
452 $!=0;
453 if ($save) {
454 unlink($tmpf) ||
455 warn "Could not unlink $tmpf: $!\n";
456 }
457 return &resetsign;
458 }
459
460 # recv returnes the undefined value on error
461 if (!defined($err)) {
462 warn "Error in recv: $!\n";
463 last;
464 }
465
466 $linp=length($inp);
467
468 # If the returned input was 0 in length then we've gotten to the
469 # end of the response.
470 last unless $linp;
471
472 # Accounting
473 $xfbytes += $linp;
474 $doclen += $linp;
475
476 # Accumulate input
477 $header.=$inp;
478
479 # eof(SOCKET) has strange semantics it seems
480 # last if eof(FS);
481
482 # Check if header is complete
483 last if ($header =~ m/(\r?\n\r?\n)/);
484 }
485
486 if (length($header)==0) {
487 $restext='the HTTP reply header is empty!';
488 return &resetsign;
489 }
490
491 if ($header =~ m/(\r?\n\r?\n)/) {
492 $header=$`;
493 $document=$';
494 } else {
495 $header=$document;
496 }
497
498 # Adjust accounting
499 $headb = length($header)+length($1);
500 $headbytes += $headb;
501 $xfbytes -= $headb;
502 $doclen -= $headb;
503
504 # Pick headers to pieces
505 ($result,$restext,%headval)=&analyze_header($header);
506
507 print STDERR "REPLY:\n",$header,"\n---\n" if $debug>=2;
508
509 # Check if the document is a non-encoded text document. The contents
510 # could be (x-)?compress or (x-)gzip coded (compressed in other
511 # words).
512
513 $plaintext=defined($headval{'CONTENT-TYPE'}) &&
514 (substr($headval{'CONTENT-TYPE'},0,5) eq 'text/' || 0) &&
515 !defined($headval{'content-encoding'});
516 $plaintexthtml=$plaintext &&
517 ($headval{'CONTENT-TYPE'} eq 'text/html');
518
519 if ($result==200) {
520
521 # Save this to a file, or not? Never save html files.
522 if ($saveto && !$plaintexthtml) {
523 # We're going to save this document directly into a file. This
524 # stresses the VM less when getting the large binares so often
525 # found at cool sites.
526 $save=1;
527
528 # Find a temporary filename
529 $tmpf=url "file:$saveto";
530 $tmpf->basename($tmpfile);
531 $tmpf=$tmpf->unix_path;
532
533 # Find suitable final filename, one with no URL escapes
534 $saveto=(url "file:$saveto")->unix_path;
535
536 # If output to stdout then send it directly there rather than
537 # using disk unnecesarily.
538 $tmpf='-' if ($saveto eq '-');
539
540 # If output is nulldevice (running -f), use it also for tmpfile,
541 # since it would otherwise try to create it in /dev under unix.
542 $tmpf=$main::nulldevice if ($saveto eq $main::nulldevice);
543
544 warn "USING TMPFILE: $tmpf\n" if $debug;
545
546 open(SAVE,">$tmpf") ||
547 die "Could not open tmp file: $tmpf: $!\n";
548 binmode SAVE; # It's a binary file...
549 }
550
551 if ($verbose>0) {
552 print STDERR ", document";
553 print STDERR "->disk" if $save;
554 }
555
556 # Now retrive document itself. Se comments in header loop
557 $start=time;
558 $wantbytes = defined($headval{'content-length'})?
559 $headval{'content-length'}:0;
560
561 $ldoc=length($document);
562
563 while (1) {
564 alarm($timeout) if $hasAlarm;
565 recv(FS,$inp,$buflen,0);
566 alarm(0) if $hasAlarm;
567
568 if ($chime) {
569 $result=100;
570 $restext='timeout fetching document';
571 $!=0;
572 if ($save) {
573 unlink($tmpf) ||
574 warn "Could not unlink $tmpf: $!\n";
575 }
576 return &resetsign;
577 }
578
579 $linp=length($inp);
580
581 last unless $linp || $ldoc;
582 $ldoc = 0;
583
584 $xfbytes += $linp;
585 $doclen += $linp;
586
587 if ($verbose>0 && time-$start>5) {
588 # Write progress info ...
589 if ($wantbytes) {
590 $progress = sprintf " %3d%%", $doclen/$wantbytes*100;
591 } else {
592 $progress = sprintf " %d", $doclen;
593 }
594 print STDERR $progress, "\ch"x(length($progress));
595 # ...every 5 seconds
596 $start=time;
597 }
598
599 $document.=$inp;
600
601 if ($save) {
602 $err = print SAVE $document;
603 die "Error writing $tmpf: $!\n" unless $err;
604 $document='';
605 }
606
607 # The eof test seems to work very oddly for sockets.
608 # last if eof(FS);
609 }
610
611 close(FS); # Close socket completely
612
613 print STDERR "DOCUMENT:\n----\n",$document,"\n----\n" if $debug>=255;
614
615 if ($wantbytes &&
616 $wantbytes != $doclen) {
617 $result=100;
618 $restext='transfer error; too many bytes in document';
619 $restext='document was incomplete' if ($wantbytes > $doclen) ;
620 print STDERR "SHORT DOCUMENT" if $debug>=16;
621 if ($save) {
622 unlink($tmpf) || warn "Could not unlink $tmpf: $!\n";
623 }
624 return &resetsign;
625 }
626
627 # warn "XFB: $xfbytes, DL: $doclen\n";
628 if ($save) {
629 close(SAVE);
630 &main::movefile($tmpf,$saveto);
631 }
632
633 # If this is a non-encoded text file and we're supposed to convert
634 # foreign newlines then we do it. It would be faster to do this
635 # with each chunk of input in the input loop, but this gives us
636 # two problems:
637 # - A \r\n newline could be split into two chunks. Thus escaping
638 # newline conversion.
639 # - It messes up the received bytes accounting rather badly.
640 #
641 # This used to be a test for $plaintext, the problem is that too
642 # many documents were typed as text/plain and so we corrupted
643 # binary files. This is bad. So now we're more paranoid about it:
644 # Only HTML gets converted.
645 if ($convert && $plaintexthtml) {
646 # Change non unix newlines to unix newlines. bare \r is known
647 # from macintosh (they hadta be different didn't they?), \r\n is
648 # known as 'network format' and from numerous systems, among
649 # them ms-dos.
650 $document =~ s~\r~\n~g unless $document =~ s~\r\n~\n~g;
651 warn "Newlines converted(?)\n" if $debug;
652 }
653
654 } # if $result == 200
655
656 &resetsign;
657 return 1;
658}
659
660
661sub analyze_header {
662 my($header)=@_;
663 my($result,$restext,%headval,$hdln,$key,$value);
664
665 # Summary of the http spec on headers (with my comments):
666 # - Each header line ends in CRLF (or just LF, or maybe even just CR,
667 # anyways, it's easier if all is LF).
668 $header =~ s/\r/\n/mg unless $header =~ s/\r\n/\n/mg;
669 # - If a line starts with space then it's a continuation of the previous
670 # line (these I fold into one line).
671 $header =~ s/\n\s/ /mg;
672 # - The header field names are case insensitive (so I convert them to
673 # lowercase)
674 # - A field may appear twice, that is equivalent to listing the values
675 # in a comma separated list (so I fold them into a comma separated list)n
676 # - The field name and the field value are separated by ': '
677 ($result,$restext) = $header =~ m~^HTTP/\d\.\d (\d\d\d) (.*)~;
678 # Shave off http result code from the header
679 $header =~ s~^.*\n~~;
680
681 warn "Header:\n$header\n---\n" if $debug>=3;
682
683 warn "Result: $result, Text: $restext\n" if $debug>=2;
684
685 %headval=();
686
687 foreach $hdln (split(/\r?\n/m,$header)) {
688 ($key,$value)=split(': ',$hdln,2);
689 $key="\L$key";
690 # Strip leading&trailing space off the reply, some servers use
691 # copious space after.
692 $value =~ s/^\s+|\s+$//g;
693 print STDERR "K: '$key', V: '$value'\n" if $debug>=2;
694 if (defined($headval{$key})) {
695 $headval{$key}.=", ".$value;
696 } else {
697 $headval{$key}=$value;
698 }
699 }
700
701 # See if there are any type parameters in the content-type header
702 # and if so remove them.
703 if (defined($headval{'content-type'})) {
704 my $val=$headval{'content-type'};
705 ($val,undef)=split(';',$val,2) if ($val =~ /;/);
706 $headval{'CONTENT-TYPE'}=$val;
707 }
708
709 return ($result,$restext,%headval);
710}
711
712
713sub last_modified {
714 # will return the last modified time for a local file as a HTTP
715 # timestamp.
716
717 my(@tmp) = stat($_[0]); # file doesn't exist ok to fetch
718
719 # FAT file systems strip the LSB of the file time. Add it back in
720 # here before asking the server about a modified file. The only way
721 # this can fail is if the newer server file was saved one second
722 # after the first version (very unlikely). This isn't needed for
723 # NTFS file systems, but there is no good portable Perl way to
724 # determine the file system type.
725 $tmp[9] = $tmp[9] | 1 if ( $main::win32 );
726
727 # now we got the last modified in a 32 bit integer. time to convert
728 # it and return
729 return time2str($tmp[9]);
730}
731
732
733sub timeout {
734 # Set timeout flag. The using procedure has to set other result codes.
735 $chime=1; # When this is 1 then the alarm has gone off
736 print STDERR "TIMEOUT!!!!\n" if $debug>=16;
737}
738
739
740sub ignore {
741 warn "I got SIGPIPE, ignoring it...\n";
742}
743
744
745sub resetsign {
746 return 0 if !defined($savALRM);
747 $SIG{'ALRM'}=$savALRM;
748 undef $savALRM;
749# $SIG{'PIPE'}=$savPIPE;
750 return 0;
751}
752
753
754sub oserror {
755
756 resetsign;
757
758 $result=98;
759 $restext='w3http: OS error';
760 return 0;
761
762}
763
764
7651;
Note: See TracBrowser for help on using the repository browser.