1 | # -*-perl-*-
|
---|
2 |
|
---|
3 | use Config;
|
---|
4 |
|
---|
5 | &read_makefile;
|
---|
6 | $fullperl = resolve_make_var('FULLPERL') || $Config{'perlpath'};
|
---|
7 | $islib = resolve_make_var('INSTALLSITELIB');
|
---|
8 |
|
---|
9 | $name = $0;
|
---|
10 | $name =~ s~^.*/~~;
|
---|
11 | $name =~ s~.PL$~~;
|
---|
12 |
|
---|
13 | open(OUT,"> $name") ||
|
---|
14 | die "Could open $name for writing: $!\n";
|
---|
15 |
|
---|
16 | print "writing $name\n";
|
---|
17 |
|
---|
18 | while (<DATA>) {
|
---|
19 | if (m~^\#!/.*/perl.*$~o) {
|
---|
20 | # This substitutes the path perl was installed at on this system
|
---|
21 | # _and_ removed any (-w) options.
|
---|
22 | print OUT "#!",$fullperl,$1,"\n";
|
---|
23 | next;
|
---|
24 | }
|
---|
25 | if (/^use lib/o) {
|
---|
26 | # This substitutes the actuall library install path
|
---|
27 | print OUT "use lib '$islib';\n";
|
---|
28 | next;
|
---|
29 | }
|
---|
30 | print OUT;
|
---|
31 | }
|
---|
32 |
|
---|
33 | close(OUT);
|
---|
34 |
|
---|
35 | # Make it executable too, and writeable
|
---|
36 | chmod 0755, $name;
|
---|
37 |
|
---|
38 | #### The library
|
---|
39 |
|
---|
40 | sub resolve_make_var ($) {
|
---|
41 |
|
---|
42 | my($var) = shift @_;
|
---|
43 | my($val) = $make{$var};
|
---|
44 |
|
---|
45 | # print "Resolving: ",$var,"=",$val,"\n";
|
---|
46 |
|
---|
47 | while ($val =~ s~\$\((\S+)\)~$make{$1}~g) {}
|
---|
48 | # print "Resolved: $var: $make{$var} -> $val\n";
|
---|
49 | $val;
|
---|
50 | }
|
---|
51 |
|
---|
52 |
|
---|
53 | sub read_makefile {
|
---|
54 |
|
---|
55 | open(MAKEFILE, 'Makefile') ||
|
---|
56 | die "Could not open Makefile for reading: $!\n";
|
---|
57 |
|
---|
58 | while (<MAKEFILE>) {
|
---|
59 | chomp;
|
---|
60 | next unless m/^([A-Z]+)\s*=\s*(\S+)$/;
|
---|
61 | $make{$1}=$2;
|
---|
62 | # print "Makevar: $1 = $2\n";
|
---|
63 | }
|
---|
64 |
|
---|
65 | close(MAKEFILE)
|
---|
66 | }
|
---|
67 |
|
---|
68 | __END__
|
---|
69 | #!/local/bin/perl5 -w
|
---|
70 | # Perl 5.002 or later. w3mir is mostly tested with perl 5.004
|
---|
71 | #
|
---|
72 | # You might want to change or comment out this:
|
---|
73 | use lib '/hom/janl/lib/perl';
|
---|
74 | #
|
---|
75 | # Can perform the following fixes:
|
---|
76 | # - Rewrite redirected to URLs
|
---|
77 | # - change .../ into .../index.html (or .../Welcome.html)
|
---|
78 | # - change external links to point to some helpfull .html file ---- NOT IMPLEMENTED
|
---|
79 | # - change links to documents not retrived to point to some helpfull
|
---|
80 | # .html file --- NOT IMPLEMENTED
|
---|
81 | # - After adding a 'Also' directive we can edit the urls pointing to within
|
---|
82 | # the new space of the retrival scope, making the pointers consistent.
|
---|
83 |
|
---|
84 | # Method:
|
---|
85 | # 1 Gather list of all URLs to be rewritten:
|
---|
86 | # - Redirects: Just read the .redirs file
|
---|
87 | # - .../ into .../index.html: All can be found in the .referers file
|
---|
88 | # Remember to rewrite missing / redirects too...
|
---|
89 | # - external links: in the .referers file too
|
---|
90 | # - non-retrived documents: What files should be here (according to
|
---|
91 | # .referers file) but are not?
|
---|
92 | # 2 Gather list of all documents needing editing
|
---|
93 | # 3 Edit them
|
---|
94 | #
|
---|
95 | # 09/05/98 janl - Only rewrite .../ to .../$indexname if the attribute
|
---|
96 | # does not refer to a directory. -> 0.6.2
|
---|
97 | # 12/05/98 janl - Use ->local_path to determine filename -> 0.6.3
|
---|
98 | # 22/05/98 janl - .../#foo was not being rewritten to
|
---|
99 | # .../$indexname#foo -> 0.6.4
|
---|
100 | #
|
---|
101 | require 5.002;
|
---|
102 |
|
---|
103 | use vars qw($win32);
|
---|
104 |
|
---|
105 | # To figure out what kind of system this is
|
---|
106 | BEGIN {
|
---|
107 | use Config;
|
---|
108 | $win32 = ( $Config{'osname'} eq 'MSWin32' );
|
---|
109 | }
|
---|
110 |
|
---|
111 | use Carp;
|
---|
112 | use htmlop;
|
---|
113 | use URI::URL;
|
---|
114 | use URI::Escape;
|
---|
115 |
|
---|
116 | use strict;
|
---|
117 |
|
---|
118 | my $VERSION;
|
---|
119 | $VERSION='0.6.5';
|
---|
120 |
|
---|
121 | my $debug=0; # Not debugging
|
---|
122 | my $verbose=0;
|
---|
123 |
|
---|
124 | my $indexname='index.html';
|
---|
125 | my $chdirto=''; # Place to chdir to after reading
|
---|
126 | # config file
|
---|
127 |
|
---|
128 | my $infoloss=0; # 1 if any URL translations (which
|
---|
129 | # cause information loss) are in
|
---|
130 | # effect. If this is true we use the
|
---|
131 | # SAVEURL operation. What to get, and
|
---|
132 | # not. Text of user supplied
|
---|
133 | # fetch/ignore rules
|
---|
134 |
|
---|
135 | my $doindex=1; # append $indexname to /$ ?
|
---|
136 |
|
---|
137 | my $editthis=''; # Edit references matching this expression.
|
---|
138 |
|
---|
139 | my $files=0; # How many files have I edited?
|
---|
140 | my $rc='';
|
---|
141 |
|
---|
142 | my $rule_text="# User defined fetch/ignore rules\n";
|
---|
143 | # Code ref to the rule procedure
|
---|
144 | my $rule_code;
|
---|
145 |
|
---|
146 | # Code to prefix and postfix the generated code. Prefix should make
|
---|
147 | # $_ contain the url to match. Postfix should return 1, the default
|
---|
148 | # is to get the url/file.
|
---|
149 | my $rule_prefix='$rule_code = sub { local($_) = shift;'."\n";
|
---|
150 | my $rule_postfix=' return 1; } ';
|
---|
151 |
|
---|
152 | # Scope tests generated by URL/Also directives in cfg. The scope code
|
---|
153 | # is just like the rule code, but used for program generated
|
---|
154 | # fetch/ignore rules related to multiscope retrival.
|
---|
155 | my $scope_fetch="# Automatic fetch rules for multiscope retrival\n";
|
---|
156 | my $scope_ignore="# Automatic ignore rules for multiscope retrival\n";
|
---|
157 | my $scope_code;
|
---|
158 |
|
---|
159 | my $scope_prefix='$scope_code = sub { local($_) = shift;'."\n";
|
---|
160 | my $scope_postfix=' return 0; } ';
|
---|
161 |
|
---|
162 | # Function to apply to urls, se rule comments.
|
---|
163 | my $user_apply_code; # User specified apply code
|
---|
164 | my $apply_code; # w3mirs apply code
|
---|
165 | my $apply_prefix='$apply_code = sub { local($_) = @_;'."\n";
|
---|
166 | my $apply_lc=' $_ = lc $_; ';
|
---|
167 | my $apply_postfix=' return $_; } ';
|
---|
168 | my @user_apply; # List of users apply rules.
|
---|
169 | my @internal_apply; # List of w3mirs apply rules.
|
---|
170 |
|
---|
171 |
|
---|
172 | my $iinline=''; # inline RE code to make RE caseinsensitive
|
---|
173 | my $ipost=''; # RE postfix to make it caseinsensitive
|
---|
174 | my $lc=0; # Convert urls/filenames to lowercase?
|
---|
175 | my $abs=0; # Absolutify URLs?
|
---|
176 | my $fixrc=''; # Name of w3mfix config file
|
---|
177 | my $fixup=0; # Do things needed to run fixup
|
---|
178 | my $r=0; # Recurse? no recursion = absolutify links
|
---|
179 | my %rum_referers=(); # Array of referers, key: rum_url
|
---|
180 | my %rum_redirected=(); # Array of redirected url: key: original url
|
---|
181 | my %lf_edited=(); # Edited this file yet?
|
---|
182 | my $list; # List url on STDOUT?
|
---|
183 |
|
---|
184 | my %stat=(); # stat($lf_url): 'd' for dir, 'f' for others
|
---|
185 |
|
---|
186 | # ######################### Libwww-perl addons:
|
---|
187 |
|
---|
188 | sub URI::URL::_generic::unix_path {
|
---|
189 | my $self = (shift)->clone;
|
---|
190 |
|
---|
191 | $self->frag(undef);
|
---|
192 | $self->scheme("file");
|
---|
193 | $self->unix_path;
|
---|
194 | }
|
---|
195 |
|
---|
196 |
|
---|
197 | sub URI::URL::_generic::basename {
|
---|
198 | my $self = shift;
|
---|
199 | my @p = $self->path_components;
|
---|
200 | my $old = $p[-1];
|
---|
201 | if (@_) {
|
---|
202 | splice(@p, -1, 1, shift);
|
---|
203 | $self->path_components(@p)
|
---|
204 | }
|
---|
205 | $old;
|
---|
206 | }
|
---|
207 |
|
---|
208 |
|
---|
209 | sub URI::URL::_generic::path_components {
|
---|
210 | my $self = shift;
|
---|
211 | my $old = $self->{'path'};
|
---|
212 | $old = '' unless defined $old;
|
---|
213 | $old = "/$old" if $old !~ m|^/| && defined $self->{'netloc'};
|
---|
214 | if (@_) {
|
---|
215 | $self->_elem('path',
|
---|
216 | join("/", map { uri_escape($_,$URI::URL::reserved)
|
---|
217 | } @_));
|
---|
218 | }
|
---|
219 | map { uri_unescape($_) } split("/", $old, -1);
|
---|
220 | }
|
---|
221 |
|
---|
222 |
|
---|
223 | # ######################### Configuration/argument parsing
|
---|
224 |
|
---|
225 | sub parse_args {
|
---|
226 | my $f;
|
---|
227 | my $i;
|
---|
228 |
|
---|
229 | $i=0;
|
---|
230 |
|
---|
231 | while ($f=shift) {
|
---|
232 | $i++;
|
---|
233 | # This is a demonstration against Getopts::Long.
|
---|
234 | if ($f =~ s/^-+//) {
|
---|
235 | $verbose=-1,next if $f eq 'q'; # Quiet
|
---|
236 | $verbose=1,next if $f eq 'c'; # Chatty
|
---|
237 | die "w3mfix version $VERSION\n" if $f eq 'v'; # Version
|
---|
238 | die "rtfm\n" if ($f eq 'help' || $f eq 'h' || $f eq '?');
|
---|
239 |
|
---|
240 | if ($f eq 'editref') {
|
---|
241 | die "Sorry, can only have one -editref pr. run\n"
|
---|
242 | if $editthis;
|
---|
243 | $editthis=quotemeta(shift);
|
---|
244 | next;
|
---|
245 | }
|
---|
246 |
|
---|
247 | if ($f eq 'd') { # Debugging level
|
---|
248 | $f=shift;
|
---|
249 | unless (($debug = $f) > 0) {
|
---|
250 | die "w3mfix: debug level must be a number greater than zero.\n";
|
---|
251 | }
|
---|
252 | next;
|
---|
253 | }
|
---|
254 |
|
---|
255 | # Those were all the options...
|
---|
256 | warn "w3mfix: Unknown option: -$f. Use -h for usage info.\n";
|
---|
257 | exit(1);
|
---|
258 | } else {
|
---|
259 | # If we get this far then ... it's a configuration file name:
|
---|
260 | $rc = $f;
|
---|
261 | die "w3mfix: Got a non-option argument that wasn't the name of a\n".
|
---|
262 | "(configuration) file either\n"
|
---|
263 | unless -f $f;
|
---|
264 | }
|
---|
265 | }
|
---|
266 | }
|
---|
267 |
|
---|
268 |
|
---|
269 | sub parse_cfg_file {
|
---|
270 | # Read the configuration file. Aborts on errors.
|
---|
271 | # Ignores w3mir options w3mfix does not need itself.
|
---|
272 |
|
---|
273 | my ( $file ) = @_ ;
|
---|
274 | my ($key, $value, $authserver,$authrealm,$authuser,$authpasswd);
|
---|
275 | my $i;
|
---|
276 |
|
---|
277 | die "w3mfix: config file $file is not a file.\n" unless -f $file;
|
---|
278 | open(CFGF, $file) || die "Could not open config file $file: $!\n";
|
---|
279 |
|
---|
280 | # print STDERR "Reading $file\n";
|
---|
281 |
|
---|
282 | $i=0;
|
---|
283 |
|
---|
284 | while (<CFGF>) {
|
---|
285 | # Trim off various junk
|
---|
286 | chomp;
|
---|
287 | s/^#.*//;
|
---|
288 | s/^\s+|\s$//g;
|
---|
289 | # Anything left?
|
---|
290 | next if $_ eq '';
|
---|
291 | # Examine remains
|
---|
292 | $i++;
|
---|
293 | ($key, $value) = split(/\s*:\s*/,$_,2);
|
---|
294 | $key = lc $key;
|
---|
295 |
|
---|
296 | # These are no-ops in w3mfix
|
---|
297 | next if ( $key eq 'initial-referer' );
|
---|
298 | next if ( $key eq 'header' );
|
---|
299 | next if ( $key eq 'pause' );
|
---|
300 | next if ( $key eq 'retry-pause' );
|
---|
301 | next if ( $key eq 'retries' );
|
---|
302 | next if ( $key eq 'robot-rules' );
|
---|
303 | next if ( $key eq 'remove-nomirror' );
|
---|
304 | next if ( $key eq 'file-disposition' );
|
---|
305 | next if ( $key eq 'http-proxy' );
|
---|
306 | next if ( $key eq 'proxy-options' );
|
---|
307 | next if ( $key eq 'auth-domain' );
|
---|
308 | next if ( $key eq 'auth-user' );
|
---|
309 | next if ( $key eq 'auth-passwd' );
|
---|
310 | next if ( $key eq 'disable-headers' );
|
---|
311 | next if ( $key eq 'agent' );
|
---|
312 |
|
---|
313 | $debug=numeric($value),next if ( $key eq 'debug' );
|
---|
314 | umask(numeric($value)),next if ( $key eq 'umask' );
|
---|
315 | $indexname=$value,next if ($key eq 'index-name');
|
---|
316 | $verbose=nway($value,'quiet','brief','chatty')-1,next
|
---|
317 | if ( $key eq 'verbosity' );
|
---|
318 |
|
---|
319 | if ( $key eq 'cd' ) {
|
---|
320 | $chdirto=$value;
|
---|
321 | next;
|
---|
322 | }
|
---|
323 |
|
---|
324 | if ($key eq 'url') {
|
---|
325 | my ($rum_url_o,$lf_dir,$rum_reurl,$rum_rebase);
|
---|
326 |
|
---|
327 | # A two argument URL: line?
|
---|
328 | if ($value =~ m/^(.+)\s+(.+)/i) {
|
---|
329 | # Two arguments.
|
---|
330 | $rum_url_o=url $1;
|
---|
331 | # The last is a directory, it must end in /
|
---|
332 | $lf_dir=$2;
|
---|
333 | $lf_dir.='/' unless $lf_dir =~ m~/$~;
|
---|
334 |
|
---|
335 | # The first is a URL, make it more canonical, find the base.
|
---|
336 | # The namespace confusion in this section is correct.(??)
|
---|
337 | $rum_rebase = quotemeta( ($rum_url_o->as_string =~ m~^(.*/)~)[0] );
|
---|
338 |
|
---|
339 | # print "URL: ",$rum_url_o->as_string,"\n";
|
---|
340 | # print "Base: $rum_rebase\n";
|
---|
341 |
|
---|
342 | # Translate from rum space to lf space:
|
---|
343 | push(@internal_apply,"s/^".$rum_rebase."/".quotemeta($lf_dir)."/");
|
---|
344 |
|
---|
345 | # That translation could lead to information loss.
|
---|
346 | $infoloss=1;
|
---|
347 |
|
---|
348 | # Fetch rules tests the rum_url_o->as_string. Fetch whatever
|
---|
349 | # matches the base.
|
---|
350 | $scope_fetch.="return 1 if m/^".$rum_rebase."/".$ipost.";\n";
|
---|
351 |
|
---|
352 | # Ignore whatever did not match the base.
|
---|
353 | $scope_ignore.="return 0 if m/^".
|
---|
354 | quotemeta("http://".$rum_url_o->netloc."/")."/".$ipost.";\n";
|
---|
355 |
|
---|
356 | } else {
|
---|
357 | # $rum_url_o=root_quene($value);
|
---|
358 |
|
---|
359 | $rum_url_o=url $value;
|
---|
360 |
|
---|
361 | $rum_rebase = quotemeta( ($rum_url_o->as_string =~ m~^(.*/)~)[0] );
|
---|
362 |
|
---|
363 | # Translate from rum space to lf space:
|
---|
364 | push(@internal_apply,"s/^".$rum_rebase."//");
|
---|
365 |
|
---|
366 | $scope_fetch.="return 1 if m/^".$rum_rebase."/".$ipost.";\n";
|
---|
367 | $scope_ignore.="return 0 if m/^".
|
---|
368 | quotemeta("http://".$rum_url_o->netloc."/")."/".$ipost.";\n";
|
---|
369 | }
|
---|
370 | next;
|
---|
371 | }
|
---|
372 |
|
---|
373 | if ($key eq 'also' || $key eq 'also-quene') {
|
---|
374 | if ($value =~ m/^(.+)\s+(.+)/i) {
|
---|
375 | my ($rum_url_o,$lf_dir,$rum_reurl,$rum_rebase);
|
---|
376 | # Two arguments.
|
---|
377 | $rum_url_o=url $1;
|
---|
378 | $rum_url_o->host(lc $rum_url_o->host);
|
---|
379 | # The last is a directory, it must end in /
|
---|
380 | $lf_dir=$2;
|
---|
381 | $lf_dir.='/' unless $lf_dir =~ m~/$~;
|
---|
382 |
|
---|
383 | # The first is a URL, find the base
|
---|
384 | $rum_rebase = quotemeta( ($rum_url_o->as_string =~ m~^(.*/)~)[0] );
|
---|
385 |
|
---|
386 | # Ok, now we can transform and select stuff the right way
|
---|
387 | push(@internal_apply,"s/^".$rum_rebase."/".quotemeta($lf_dir)."/");
|
---|
388 | $infoloss=1;
|
---|
389 |
|
---|
390 | # Fetch rules tests the rum_url_o->as_string. Fetch whatever
|
---|
391 | # matches the base.
|
---|
392 | $scope_fetch.="return 1 if m/^".$rum_rebase."/".$ipost.";\n";
|
---|
393 |
|
---|
394 | # Ignore whatever did not match the base. This cures problem
|
---|
395 | # with '..' from base in in rum space pointing within the the
|
---|
396 | # scope in ra space. We introduced a extra level (or more) of
|
---|
397 | # directories with the apply above. Must do same with 'Also:'
|
---|
398 | # directives.
|
---|
399 | $scope_ignore.="return 0 if m/^".
|
---|
400 | quotemeta("http://".$rum_url_o->netloc."/")."/".$ipost.";\n";
|
---|
401 | } else {
|
---|
402 | die "Also: requires 2 arguments\n";
|
---|
403 | }
|
---|
404 | next;
|
---|
405 | }
|
---|
406 |
|
---|
407 | if ($key eq 'quene') {
|
---|
408 | # root_quene($value);
|
---|
409 | next;
|
---|
410 | }
|
---|
411 |
|
---|
412 | if ($key eq 'ignore-re' || $key eq 'fetch-re') {
|
---|
413 | # Check that it's a re, better that I am strict than for perl to
|
---|
414 | # make compilation errors.
|
---|
415 | unless ($value =~ /^m(.).*\1[gimosx]*$/) {
|
---|
416 | print STDERR "w3mfix: $value is not a recognized regular expression\n";
|
---|
417 | exit 1;
|
---|
418 | }
|
---|
419 | }
|
---|
420 |
|
---|
421 | if ($key eq 'fetch' || $key eq 'fetch-re') {
|
---|
422 | my $expr=$value;
|
---|
423 | $expr = wild_re($expr).$ipost if ($key eq 'fetch');
|
---|
424 | $rule_text.=' return 1 if '.$expr.";\n";
|
---|
425 | next;
|
---|
426 | }
|
---|
427 |
|
---|
428 | if ($key eq 'ignore' || $key eq 'ignore-re') {
|
---|
429 | my $expr=$value;
|
---|
430 | $expr = wild_re($expr).$ipost if ($key eq 'ignore');
|
---|
431 | $rule_text.=' return 0 if '.$expr.";\n";
|
---|
432 | next;
|
---|
433 | }
|
---|
434 |
|
---|
435 |
|
---|
436 | if ($key eq 'apply') {
|
---|
437 | unless ($value =~ /^s(.).*\1.*\1[gimosxe]*$/) {
|
---|
438 | print STDERR
|
---|
439 | "w3mfix: '$value' is not a recognized regular expression\n";
|
---|
440 | exit 1;
|
---|
441 | }
|
---|
442 | push(@user_apply,$value) ;
|
---|
443 | $infoloss=1;
|
---|
444 | next;
|
---|
445 | }
|
---|
446 |
|
---|
447 | if ($key eq 'options') {
|
---|
448 |
|
---|
449 | my($val,$nval);
|
---|
450 | foreach $val (split(/\s*,\s*/,lc $value)) {
|
---|
451 | if ($i==1) {
|
---|
452 | $nval=nway($val,'recurse','no-date-check','only-nonexistent',
|
---|
453 | 'list-urls','lowercase','remove','batch','read-urls',
|
---|
454 | 'abs');
|
---|
455 | $r=1,next if $nval==0;
|
---|
456 | next if $nval==1;
|
---|
457 | next if $nval==2;
|
---|
458 | $list=1,next if $nval==3;
|
---|
459 | if ($nval==4) {
|
---|
460 | $lc=1;
|
---|
461 | $iinline=($lc?"(?i)":"");
|
---|
462 | $ipost=($lc?"i":"");
|
---|
463 | next ;
|
---|
464 | }
|
---|
465 | next if $nval==5;
|
---|
466 | next if $nval==6;
|
---|
467 | next if $nval==7;
|
---|
468 | $abs=1,next if $nval==8;
|
---|
469 | } else {
|
---|
470 | die "w3mfix: options must be the first directive in the config file.\n";
|
---|
471 | }
|
---|
472 | }
|
---|
473 | }
|
---|
474 |
|
---|
475 | if ($key eq 'fixup') {
|
---|
476 |
|
---|
477 | # chomp($fixrc=`pwd` || '.');
|
---|
478 | # $fixrc.="/$file";
|
---|
479 | # warn "Fixrc: $fixrc\n";
|
---|
480 | # $fixup=1;
|
---|
481 |
|
---|
482 | my($val,$nval);
|
---|
483 | foreach $val (split(/\s*,\s*/,lc $value)) {
|
---|
484 | $nval=nway($val,'on','run','noindex');
|
---|
485 | next if $nval==0;
|
---|
486 | next if $nval==1;
|
---|
487 | $doindex=0 if $nval==2;
|
---|
488 | # Ignore everyting else
|
---|
489 | }
|
---|
490 | }
|
---|
491 |
|
---|
492 | }
|
---|
493 | close(CFGF);
|
---|
494 | }
|
---|
495 |
|
---|
496 | sub wild_re {
|
---|
497 | # Here we translate unix wildcard subset to to perlre
|
---|
498 | local($_) = shift;
|
---|
499 | s#\*#\.\*#;
|
---|
500 | s#\?#\.#;
|
---|
501 | s#([\/\(\)\\\|\{\}\+)\$\^])#\\$1#g;
|
---|
502 | return $_ = '/'.$_.'/';
|
---|
503 | }
|
---|
504 |
|
---|
505 |
|
---|
506 | sub numeric {
|
---|
507 | # Check if argument is numeric?
|
---|
508 | my ( $number ) = @_ ;
|
---|
509 | return oct($number) if ($number =~ /\d+/ || $number =~ /\d+.\d+/);
|
---|
510 | die "Expected a number, got \"$number\"\n";
|
---|
511 | }
|
---|
512 |
|
---|
513 |
|
---|
514 | sub boolean {
|
---|
515 | my ( $boolean ) = @_ ;
|
---|
516 |
|
---|
517 | $boolean = lc $boolean;
|
---|
518 |
|
---|
519 | return 0 if ($boolean eq 'false' || $boolean eq 'off' || $boolean eq '0');
|
---|
520 | return 1 if ($boolean eq 'true' || $boolean eq 'on' || $boolean eq '1');
|
---|
521 | die "Expected a boolean, got \"$boolean\"\n";
|
---|
522 | }
|
---|
523 |
|
---|
524 |
|
---|
525 | sub nway {
|
---|
526 | my ( $value ) = shift;
|
---|
527 | my ( @values ) = @_;
|
---|
528 | my ( $val ) = 0;
|
---|
529 |
|
---|
530 | $value = lc $value;
|
---|
531 | while (@_) {
|
---|
532 | return $val if $value eq shift;
|
---|
533 | $val++;
|
---|
534 | }
|
---|
535 | die "Expected one of ".join(", ",@values).", got \"$value\"\n";
|
---|
536 | }
|
---|
537 |
|
---|
538 |
|
---|
539 | sub stat {
|
---|
540 |
|
---|
541 | my $file = shift;
|
---|
542 |
|
---|
543 | if (exists($stat{$file})) {
|
---|
544 | print STDERR "++Cache hit: $file\n" if $debug;
|
---|
545 | } else {
|
---|
546 | stat($file);
|
---|
547 | if (-e _) {
|
---|
548 | $stat{$file}=(-d _)?'d':'f';
|
---|
549 | } else {
|
---|
550 | $stat{$file}='n';
|
---|
551 | }
|
---|
552 | print STDERR "--Cache miss: $file\n" if $debug;
|
---|
553 | }
|
---|
554 | return $stat{$file};
|
---|
555 | }
|
---|
556 |
|
---|
557 | # ######################## Read 'state' files ##############################
|
---|
558 |
|
---|
559 | sub read_state {
|
---|
560 |
|
---|
561 | my $reffile='.referers';
|
---|
562 | my $refered;
|
---|
563 | my @referers;
|
---|
564 |
|
---|
565 | $reffile="referers" if $win32;
|
---|
566 |
|
---|
567 | print STDERR "reading $reffile\n" if $verbose>0;
|
---|
568 | open(REFERERS,"< $reffile") ||
|
---|
569 | die "Could not open $reffile for reading: $!\n";
|
---|
570 |
|
---|
571 | while (<REFERERS>) {
|
---|
572 | chomp;
|
---|
573 | ($refered,undef,@referers) = split(/\s+/);
|
---|
574 | $rum_referers{$refered}= [ @referers ];
|
---|
575 | # print STDERR $refered," <- ",join(' and ',@referers),"\n";
|
---|
576 | }
|
---|
577 |
|
---|
578 | close(REFERERS);
|
---|
579 |
|
---|
580 |
|
---|
581 | # Read redirection report
|
---|
582 |
|
---|
583 | my $redirfile='.redirs';
|
---|
584 | my $wrong;
|
---|
585 | my $right;
|
---|
586 | my $tmp;
|
---|
587 |
|
---|
588 | $redirfile="redirs" if $win32;
|
---|
589 |
|
---|
590 | print STDERR "reading $redirfile\n" if $verbose>0;
|
---|
591 | open(REDIRS,"< $redirfile") ||
|
---|
592 | die "Could not open $redirfile for reading: $!\n";
|
---|
593 |
|
---|
594 | while (<REDIRS>) {
|
---|
595 | chomp;
|
---|
596 | ($wrong,undef,$right) = split(/\s+/);
|
---|
597 |
|
---|
598 | $rum_redirected{$wrong}=$right;
|
---|
599 | }
|
---|
600 |
|
---|
601 | close(REDIRS);
|
---|
602 | }
|
---|
603 |
|
---|
604 | # ######################### Process every single tag ########################
|
---|
605 |
|
---|
606 | sub process_tag {
|
---|
607 | # Process a tag in html file
|
---|
608 | my $lf_referer = shift;
|
---|
609 | my $base_url = shift;
|
---|
610 | my $tag_name = shift;
|
---|
611 | my $url_attrs = shift;
|
---|
612 |
|
---|
613 | # Retrun quickly if no URL attributes
|
---|
614 | return unless defined($url_attrs);
|
---|
615 |
|
---|
616 | my $attrs = shift;
|
---|
617 |
|
---|
618 | # Information loss through apply or processing in this procedure?
|
---|
619 | my $il = $infoloss;
|
---|
620 | my $redirs;
|
---|
621 | my $stat;
|
---|
622 |
|
---|
623 | my $rum_url; # The absolute URL
|
---|
624 | my $lf_url; # The local filesystem url
|
---|
625 | my $lf_url_o; # ... and it's object
|
---|
626 | my $key;
|
---|
627 | my $orig_rum_url;
|
---|
628 | # my $debug = 1;
|
---|
629 |
|
---|
630 | # print STDERR "\nProcess Tag: $tag_name, URL attributes: ", join(', ',@{$url_attrs}),"\nOrigin:",$base_url,"\n"; # if $debug>2;
|
---|
631 |
|
---|
632 | substr($lf_referer,0,0)='./' unless substr($lf_referer,0,1) eq '/';
|
---|
633 |
|
---|
634 | foreach $key (@{$url_attrs}) {
|
---|
635 | if (defined($$attrs{$key})) {
|
---|
636 | $orig_rum_url=$rum_url=$$attrs{$key};
|
---|
637 |
|
---|
638 | print STDERR "\n$key = $rum_url\n" if $debug;
|
---|
639 |
|
---|
640 | # Apply redirects:
|
---|
641 | $redirs=0;
|
---|
642 | if (exists($rum_redirected{$rum_url})) {
|
---|
643 | $il=1;
|
---|
644 | while (exists($rum_redirected{$rum_url})) {
|
---|
645 | die "Too many redirects in a row\n" if $redirs++>32;
|
---|
646 | print STDERR "$rum_url -> ".$rum_redirected{$rum_url}."\n"
|
---|
647 | if $debug;
|
---|
648 | $rum_url=$rum_redirected{$rum_url};
|
---|
649 | }
|
---|
650 | }
|
---|
651 |
|
---|
652 | # Apply program/user apply rules
|
---|
653 | $lf_url=apply($rum_url);
|
---|
654 |
|
---|
655 | if (defined($lf_url)) {
|
---|
656 | # Apply directory/file check here
|
---|
657 | $stat=&stat($lf_url);
|
---|
658 |
|
---|
659 | if ($stat eq 'f' && $lf_url =~ m~/$~) {
|
---|
660 | # It's a file, remove trailing /
|
---|
661 | print STDERR "****** File / fixup of $lf_url\n" if $debug;
|
---|
662 | $il=1;
|
---|
663 | substr($lf_url,length($lf_url)) = '';
|
---|
664 | } elsif ($stat eq 'd' && !($lf_url =~ m~/$~) ) {
|
---|
665 | # It's a directory, add a trailing /
|
---|
666 | print STDERR "****** Directory / fixup of $lf_url\n" if $debug;
|
---|
667 | $il=1;
|
---|
668 | $lf_url .= '/';
|
---|
669 | }
|
---|
670 |
|
---|
671 | substr($lf_url,0,0)='./' unless substr($lf_url,0,1) eq '/';
|
---|
672 |
|
---|
673 | $lf_url_o=url $lf_url;
|
---|
674 | my $tmp=$lf_url_o->clone;
|
---|
675 | $tmp->basename($indexname);
|
---|
676 |
|
---|
677 | if ( $doindex && $lf_url_o->basename eq "" &&
|
---|
678 | &stat($tmp->unix_path) eq 'f' &&
|
---|
679 | !$htmlop::isdir{$key}) {
|
---|
680 | $lf_url_o=$tmp;
|
---|
681 | $il=1;
|
---|
682 | print STDERR "indexname adjusted to ",$lf_url_o->as_string,"\n"
|
---|
683 | if $debug;
|
---|
684 | }
|
---|
685 |
|
---|
686 | # Save new value in the hash, make it a file url to get 'rel' working
|
---|
687 | $lf_url_o->scheme('file');
|
---|
688 | $$attrs{$key} = ($lf_url_o->rel("file:".$lf_referer))->as_string;
|
---|
689 |
|
---|
690 | printf STDERR "Saved ".$$attrs{$key}."\n" if $debug;
|
---|
691 |
|
---|
692 | # If there is potential information loss save the old value too
|
---|
693 | $$attrs{"W3MIR".$key}=$orig_rum_url if $il;
|
---|
694 |
|
---|
695 | } elsif ($redirs>0) {
|
---|
696 | $$attrs{$key}=$rum_url;
|
---|
697 | print STDERR "Saved ".$$attrs{$key}."\n" if $debug;
|
---|
698 | $$attrs{"W3MIR".$key}=$orig_rum_url;
|
---|
699 | }
|
---|
700 | }
|
---|
701 | }
|
---|
702 | }
|
---|
703 |
|
---|
704 |
|
---|
705 | # ###################### Edit the URLs in one file... ########################
|
---|
706 |
|
---|
707 | sub edit_html_file {
|
---|
708 | # Check if it's a html file. I know this tag is in all html
|
---|
709 | # files, because w3mir put it there.
|
---|
710 |
|
---|
711 | my($lf_url)=shift;
|
---|
712 | my($rum_url)=shift;
|
---|
713 |
|
---|
714 | # Figure out the filename for our local filesystem.
|
---|
715 | $lf_url.=$indexname if $lf_url =~ m~/$~ || $lf_url eq '';
|
---|
716 |
|
---|
717 | # Stuff in need of unquoting?
|
---|
718 | $lf_url = (url "file:$lf_url")->local_path if $lf_url =~ /\%\d\d/;
|
---|
719 |
|
---|
720 | if (exists($lf_edited{$lf_url})) {
|
---|
721 | print STDERR "Already edited $lf_url\n" if $debug;
|
---|
722 | return ;
|
---|
723 | }
|
---|
724 |
|
---|
725 | $lf_edited{$lf_url}=1;
|
---|
726 |
|
---|
727 | my $page;
|
---|
728 | my $newpage;
|
---|
729 | my $read;
|
---|
730 | my $atime;
|
---|
731 | my $mtime;
|
---|
732 |
|
---|
733 | # dev uno mode nlink uid gid rdev size atime mtime
|
---|
734 | (undef,undef,undef,undef,undef,undef,undef,undef,$atime,$mtime)
|
---|
735 | = stat($lf_url);
|
---|
736 |
|
---|
737 | if (!open(TMPF,"< $lf_url\n")) {
|
---|
738 | warn "Cannot read $lf_url: $!\n" if $verbose>=0;
|
---|
739 | return;
|
---|
740 | }
|
---|
741 |
|
---|
742 | $read=sysread(TMPF,$page,10240,0);
|
---|
743 | close(TMPF);
|
---|
744 |
|
---|
745 | if (! $page =~ /<HTML/i) {
|
---|
746 | print STDERR "$lf_url is not html\n" if $verbose>0;
|
---|
747 | return ;
|
---|
748 | }
|
---|
749 |
|
---|
750 | $files++;
|
---|
751 |
|
---|
752 | print STDERR "w3mfix: $lf_url" if $verbose>=0;
|
---|
753 |
|
---|
754 | print STDERR "$lf_url is a html file\n" if $debug;
|
---|
755 |
|
---|
756 | print STDERR " reading" if $verbose>0;
|
---|
757 |
|
---|
758 | open(TMPF,$lf_url) ||
|
---|
759 | die "Could not open $lf_url for reading: $!\n";
|
---|
760 | # read the whole file.
|
---|
761 | {
|
---|
762 | local($/)=undef;
|
---|
763 | $page = <TMPF>;
|
---|
764 | }
|
---|
765 | close(TMPF);
|
---|
766 |
|
---|
767 | print STDERR " ",length($page)," bytes" if $verbose>0;
|
---|
768 |
|
---|
769 | # It's a html document
|
---|
770 |
|
---|
771 | print STDERR ", editing" if $verbose>0;
|
---|
772 |
|
---|
773 | ($newpage,undef) =
|
---|
774 | &htmlop::process($page, # $htmlop::NODOC,
|
---|
775 | $htmlop::ABS,$rum_url,
|
---|
776 | $htmlop::USESAVED,'W3MIR',
|
---|
777 | $htmlop::TAGCALLBACK,\&process_tag,$lf_url);
|
---|
778 |
|
---|
779 | open(TMPF,">$lf_url") ||
|
---|
780 | die "\nCould not open $lf_url for writing: $!\n";
|
---|
781 |
|
---|
782 | print STDERR ", saving" if $verbose>0;
|
---|
783 |
|
---|
784 | if (length($newpage)) {
|
---|
785 | # This is ODD: close does not seem to flush the buffers. So we
|
---|
786 | # force the issue.
|
---|
787 | local($|)=1;
|
---|
788 | print TMPF $newpage ||
|
---|
789 | die "\nCould not write to $lf_url (disk full?): $!\n";
|
---|
790 | }
|
---|
791 |
|
---|
792 | close(TMPF) ||
|
---|
793 | die "\nCould not close $lf_url after writing: $!\n";
|
---|
794 |
|
---|
795 | # Set times back to what they were.
|
---|
796 | utime $atime,$mtime,$lf_url;
|
---|
797 |
|
---|
798 | print STDERR ".\n" if $verbose>=0;
|
---|
799 | }
|
---|
800 |
|
---|
801 | # ############################### Scope test
|
---|
802 |
|
---|
803 | sub want_this {
|
---|
804 | # Find out if we want the url passed. Just pass it on to the
|
---|
805 | # generated functions.
|
---|
806 | my($rum_url)=shift;
|
---|
807 |
|
---|
808 | # Does scope rule want this?
|
---|
809 | return &$scope_code($rum_url) &&
|
---|
810 | # Does user rule want this too?
|
---|
811 | &$rule_code($rum_url)
|
---|
812 |
|
---|
813 | }
|
---|
814 |
|
---|
815 | # ############################### Apply the apply rules
|
---|
816 |
|
---|
817 | sub user_apply {
|
---|
818 | # Apply the user apply rules
|
---|
819 | return &$user_apply_code(shift);
|
---|
820 | }
|
---|
821 |
|
---|
822 |
|
---|
823 | sub internal_apply {
|
---|
824 | # Apply the w3mir generated apply rules
|
---|
825 | return &$apply_code(shift);
|
---|
826 | }
|
---|
827 |
|
---|
828 |
|
---|
829 | sub apply {
|
---|
830 | # Apply the user apply rules. Then if URL is wanted return result of
|
---|
831 | # w3mir apply rules. Return the undefined value otherwise.
|
---|
832 |
|
---|
833 | my $url = user_apply(shift);
|
---|
834 |
|
---|
835 | return undef unless want_this($url);
|
---|
836 |
|
---|
837 | internal_apply($url);
|
---|
838 | }
|
---|
839 |
|
---|
840 | # ############################### Decide what URLs to edit
|
---|
841 |
|
---|
842 | sub edit_as_needed {
|
---|
843 | my $rum_redirected;
|
---|
844 | my $rum_url;
|
---|
845 | my $o_rum_url;
|
---|
846 | my $rum_referer;
|
---|
847 | my $lf_url;
|
---|
848 | my $foo;
|
---|
849 | my $redirs;
|
---|
850 |
|
---|
851 | if ($editthis) {
|
---|
852 |
|
---|
853 | # Find the URLs that match $edithis
|
---|
854 | foreach $o_rum_url (keys %rum_referers) {
|
---|
855 | # Work on them if they (now) fall within the scope of retrival
|
---|
856 | $redirs=0;
|
---|
857 | $rum_url=$o_rum_url;
|
---|
858 | if (exists($rum_redirected{$o_rum_url})) {
|
---|
859 | while (exists($rum_redirected{$rum_url})) {
|
---|
860 | die "Too many redirects in a row\n" if $redirs++>32;
|
---|
861 | print STDERR "$rum_url -> ".$rum_redirected{$rum_url}."\n"
|
---|
862 | if $debug;
|
---|
863 | $rum_url=$rum_redirected{$rum_url};
|
---|
864 | }
|
---|
865 | }
|
---|
866 | next unless $rum_url =~ /$editthis/io;
|
---|
867 | next unless want_this($rum_url);
|
---|
868 | # Find and edit the documents containing references to $o_rum_url
|
---|
869 | foreach $rum_referer (@{$rum_referers{$o_rum_url}}) {
|
---|
870 | next if $rum_referer eq '(commandline)';
|
---|
871 | $lf_url=apply($rum_referer);
|
---|
872 | next unless defined($lf_url);
|
---|
873 | edit_html_file($lf_url,$rum_referer);
|
---|
874 | }
|
---|
875 | }
|
---|
876 |
|
---|
877 | # Don't do anything else when invoked thus
|
---|
878 | return;
|
---|
879 | }
|
---|
880 |
|
---|
881 | if ($doindex) {
|
---|
882 | # Edit everything that refers anything with trailing /
|
---|
883 | foreach $rum_url (grep(/\/$/,keys %rum_referers)) {
|
---|
884 | foreach $rum_referer (@{$rum_referers{$rum_url}}) {
|
---|
885 | next if $rum_referer eq '(commandline)';
|
---|
886 | $lf_url=apply($rum_referer);
|
---|
887 | next unless defined($lf_url);
|
---|
888 | edit_html_file($lf_url,$rum_referer);
|
---|
889 | }
|
---|
890 | }
|
---|
891 | }
|
---|
892 |
|
---|
893 | # Edit only redirected stuff
|
---|
894 | foreach $rum_redirected (keys %rum_redirected) {
|
---|
895 | # print "Redirected $rum_redirected\n";
|
---|
896 | foreach $rum_url (@{$rum_referers{$rum_redirected}}) {
|
---|
897 | # print "- Found in $rum_url\n";
|
---|
898 | $lf_url=apply($rum_url);
|
---|
899 | next unless defined($lf_url);
|
---|
900 | edit_html_file($lf_url,$rum_url);
|
---|
901 | }
|
---|
902 | }
|
---|
903 | }
|
---|
904 |
|
---|
905 | # ############################### 'main'
|
---|
906 |
|
---|
907 | &parse_args(@ARGV);
|
---|
908 |
|
---|
909 | if (!$rc) {
|
---|
910 | $rc='.w3mirc';
|
---|
911 | $rc='w3mir.ini' if $win32;
|
---|
912 | }
|
---|
913 |
|
---|
914 | print STDERR "w3mfix: rc file: $rc\n" if $verbose>0;
|
---|
915 |
|
---|
916 | &parse_cfg_file($rc);
|
---|
917 |
|
---|
918 | # $chdirto is ignored, w3mir already did it
|
---|
919 |
|
---|
920 | # Compile second order code
|
---|
921 |
|
---|
922 | # - The rum scope tests
|
---|
923 | my $full_rules=$scope_prefix.$scope_fetch.$scope_ignore.$scope_postfix;
|
---|
924 | eval $full_rules;
|
---|
925 |
|
---|
926 | # warn "Scope rules:\n-------------\n$full_rules\n---------------\n";
|
---|
927 |
|
---|
928 | die "Program generated rules did not compile. The code is:\n----\n".
|
---|
929 | $full_rules."\n----\n"
|
---|
930 | if !defined($scope_code);
|
---|
931 |
|
---|
932 | $full_rules=$rule_prefix.$rule_text.$rule_postfix;
|
---|
933 | eval $full_rules;
|
---|
934 |
|
---|
935 | # warn "User rules:\n-------------\n$full_rules\n---------------\n";
|
---|
936 |
|
---|
937 | # - The user specified rum tests
|
---|
938 | die "Ignore/Fetch rules did not compile. The code is:\n----\n".
|
---|
939 | $full_rules."\n----\n"
|
---|
940 | if !defined($rule_code);
|
---|
941 |
|
---|
942 | # - The user specified apply rules
|
---|
943 |
|
---|
944 | my $full_apply=$apply_prefix.($lc?$apply_lc:'').
|
---|
945 | join($ipost.";\n",@user_apply).(($#user_apply>=0)?$ipost:"").";\n".
|
---|
946 | $apply_postfix;
|
---|
947 | eval $full_apply;
|
---|
948 |
|
---|
949 | die "User apply rules did not compile. The code is:
|
---|
950 | ----
|
---|
951 | ".$full_apply."
|
---|
952 | ----\n" if !defined($apply_code);
|
---|
953 |
|
---|
954 | $user_apply_code=$apply_code;
|
---|
955 |
|
---|
956 | # - The w3mir generated apply rules
|
---|
957 |
|
---|
958 | $full_apply=$apply_prefix.($lc?$apply_lc:'').
|
---|
959 | join($ipost.";\n",@internal_apply).(($#internal_apply>=0)?$ipost:"").";\n".
|
---|
960 | $apply_postfix;
|
---|
961 | eval $full_apply;
|
---|
962 |
|
---|
963 | die "Internal apply rules did not compile. The code is:
|
---|
964 | ----
|
---|
965 | ".$full_apply."
|
---|
966 | ----\n" if !defined($apply_code);
|
---|
967 |
|
---|
968 | &read_state;
|
---|
969 |
|
---|
970 | &edit_as_needed;
|
---|
971 |
|
---|
972 | exit 0;
|
---|
973 | __END__
|
---|
974 | # -*- perl -*- There must be a blank line here:
|
---|
975 |
|
---|
976 | =head1 NAME
|
---|
977 |
|
---|
978 | w3mfix - fixup program for w3mir
|
---|
979 |
|
---|
980 | =head1 SYNOPSIS
|
---|
981 |
|
---|
982 | B<w3mfix> [B<options>] [B<configuration-file>]
|
---|
983 |
|
---|
984 | =head1 DESCRIPTION
|
---|
985 |
|
---|
986 | B<w3mfix> is the companion program to L<w3mir>. It can be used for
|
---|
987 | several URL editing operations usefull in different situations.
|
---|
988 |
|
---|
989 | When starting B<w3mfix> will read it's configuration file. It's name is
|
---|
990 | either .w3mirc (w3mir.ini on win32) or specified on the commandline.
|
---|
991 |
|
---|
992 | B<w3mfix> is controlled by the 'Fixup' directive of the configuration
|
---|
993 | file (described in the L<w3mir> documentation). B<w3mfix> is also
|
---|
994 | affected by 'Index-name' and the one special commandline option it
|
---|
995 | knows, as well as the directives/options controlling verbosity and
|
---|
996 | debugging information.
|
---|
997 |
|
---|
998 | =head1 DESCRIPTION
|
---|
999 |
|
---|
1000 | B<w3mfix> can rewrite URLs in these ways:
|
---|
1001 |
|
---|
1002 | =over 4
|
---|
1003 |
|
---|
1004 | =item * Rewrite URLs that resutled in redirects to point to the place
|
---|
1005 | redirected to. This is needed in all cases and will always be fixed
|
---|
1006 | by B<w3mfix>.
|
---|
1007 |
|
---|
1008 | =item * Change URLs ending in .../ into .../index.html (or
|
---|
1009 | .../Welcome.html). This is, probably, not needed when the mirror is
|
---|
1010 | meant do be used with a web-server. It is usefull for browsing
|
---|
1011 | directly from disk or CDROM, but in this case it's, most often,
|
---|
1012 | required. To disable this specify the I<noindex> option with the
|
---|
1013 | 'Fixup' directive.
|
---|
1014 |
|
---|
1015 | The default is to transform URLs ending in .../ into .../index.html.
|
---|
1016 |
|
---|
1017 | To controll the name of the index file use the I<Index-name> directive
|
---|
1018 | as documented in L<w3mir>
|
---|
1019 |
|
---|
1020 | =item * Change URL links to documents outside the mirror to point to
|
---|
1021 | some local document. Could be usefull if the mirror is destined for a
|
---|
1022 | CDROM to be used on a unconnected machine.
|
---|
1023 |
|
---|
1024 | THIS IS NOT YET IMPLEMENTED
|
---|
1025 |
|
---|
1026 | =item * Change URL links to documents that L<w3mir> was
|
---|
1027 | unable/forbidden to retrive to point to some local document. Pointing
|
---|
1028 | these to a nice informative document is probably better than random
|
---|
1029 | error messages from the browser.
|
---|
1030 |
|
---|
1031 | THIS IS NOT YET IMPLEMENTED
|
---|
1032 |
|
---|
1033 | =item * And, least, but far from last, B<w3mfix> can be used to
|
---|
1034 | prepare an established mirror for enlargement.
|
---|
1035 |
|
---|
1036 | This feature is used thus: Add the new site or subsite to be mirrored
|
---|
1037 | on the configuration file (by adding B<Also:> and B<Also-quene:>
|
---|
1038 | directives). Then run B<w3mfix> with the B<-editref> option. When
|
---|
1039 | the B<-editref> option is specified B<w3mfix> will not perform any
|
---|
1040 | other editing tasks.
|
---|
1041 |
|
---|
1042 | E.g.; To add I<http://www.yahoo.com/Science/Artificial_Life/> to your
|
---|
1043 | mirror add something like
|
---|
1044 |
|
---|
1045 | Also: http://www.yahoo.com/Science/Artificial_Life/ yahoo
|
---|
1046 |
|
---|
1047 | to the configuration file, then run w3mfix:
|
---|
1048 |
|
---|
1049 | w3mfix -editref www.yahoo.com/Science/Artificial_Life
|
---|
1050 |
|
---|
1051 | This will cause all references to
|
---|
1052 | I<www.yahoo.com/Science/Artificial_Life> (and under) to be edited so they
|
---|
1053 | point to within the mirror. After B<w3mfix> has finished you can run
|
---|
1054 | L<w3mir> in the normal manner.
|
---|
1055 |
|
---|
1056 | =back
|
---|
1057 |
|
---|
1058 | =head1 BUGS
|
---|
1059 |
|
---|
1060 | Naah.
|
---|
1061 |
|
---|
1062 | =head1 SEE ALSO
|
---|
1063 |
|
---|
1064 | L<w3mir>
|
---|
1065 |
|
---|
1066 | =head1 AUTHORS
|
---|
1067 |
|
---|
1068 | B<w3mir>s authors can be reached at I<[email protected]>.
|
---|
1069 | B<w3mir>s home page is at http://www.math.uio.no/~janl/w3mir/
|
---|
1070 |
|
---|