Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: gsdl/tags/gsdl-2_71-distribution/gsdl/packages/w3mir/w3mir-1.0.8/htmlop.pm@ 14121

Last change on this file since 14121 was 719, checked in by davidb, 25 years ago
added w3mir package
Property svn:keywords set to `Author Date Id Revision`
File size: 19.8 KB

Line
1	# --perl--
2	# htmlop.pl: Do operations on html documents.
3	$VERSION=0.2.4;
4	#
5	# Original source from Bjørn Borud, without it I would not have atempted
6	# this. In this incarnation it bears no resemblance to Bjørns code.
7	# - Nicolai Langfeldt 18/11/95.
8	#
9	# htmlop.pl does operations on html files, possebly many at a time.
10	# Operations:
11	# - Absolitify urls
12	# - Relativify urls
13	# - Gather list of urls
14	# - Callback with url
15	# - Canonify document w.r.t. SGML.
16	#
17	# Authors:
18	# - Nicolai Langfeldt ([email protected])
19	# - Chris Szurgot ([email protected])
20	#
21	# Changes:
22	# janl 18/11/95 - Initial version
23	# szurgot 09/02/96 - Code in htmlop'process to remove <BASE> Tag if we are
24	# returning the form. An unchanged base destroys local
25	# fragments.
26	# janl 22/02/96 - Added URLSUB functions. <BASE> will only be removed from
27	# the returned doc, never from the origninal doc.
28	# janl 16/05/96 - Removed I* options and added single NODOC option/modifier
29	# to replace them.
30	# janl 09/09/96 - Added URLPROC and NREL opcodes, for a better URL
31	# processing model. -> 0.1.7
32	# janl 24/09/96 - Various cosmetics, no longer inserting a !SGML tag,
33	# only !DOCTYPE and HTML tags. -> 0.1.8
34	# janl 11/10/96 - URLs in HTML 3.2 tags are now found.
35	# janl 6/11/96 - Added netscape SCRIPT tag.
36	# janl 20/11/96 - 'BORDER' is different from 'BORDER=0' -> 0.1.10
37	# janl 06/11/96 - Added userdata argument to URLPROC.
38	# Added SAVEURLS and USESAVED opcodes. -> 0.1.11
39	# janl 11/04/97 - Changed to URI::URL and got into strict harness
40	# janl 13/04/97 - Fixed comment processing, extended it to processing
41	# directives. Bug reported by Chris Johnson. -> 0.1.12
42	# janl 08/04/97 - Made ISMAP into ISMAP=, bad! -> 0.1.13
43	# janl 23/05/97 - Treating <script> and <style> as verbatim -> 0.1.14
44	# janl 06/06/97 - Canonify: <!SGML tag confused it and !DOCTYPE was mangled
45	# -> 0.1.15
46	# janl 30/07/97 - Now supporting <BASE> tag.
47	# - Added TAGCALLBACK opcode -> 0.1.16
48	# janl 16/10/97 - Empty string must be quoted, reported by Bart Barenburg
49	# -> 0.1.17
50	# janl 01/12/97 - More HTML URL tags/attributes recognized (Greg Lindhorst)
51	# janl 13/12/97 - Can't delete the whole <BASE> tag. Netscape has
52	# extended it for frame use. So only delete the HREF
53	# attr -> 0.1.18
54	# janl 01/01/98 - Realized applet/object support can't work and changed
55	# so it's able to work, if conditions are right.
56	# janl 04/01/98 - Added html 4.0 tags and attributes.
57	# janl 08/02/98 - Hacked for speed. Went from 43s to 9s on a 170K
58	# document -> 0.2. Thanks to Rune Frøysa who taunted me.
59	# janl 09/04/98 - More tolerant about what constitutes a newline -> 0.2.1
60	# janl 09/05/98 - Export %isdir -> 0.2.2
61	# janl 13/04/99 - Remove leading /../ sequences in path component in
62	# ABS code. -> 0.2.3
63	# janl 28/05/99 - The code was buggy, now it's not. -> 0.2.4
64
65	package htmlop;
66
67	use URI::URL;
68
69	use strict;
70	# Global variables
71	use vars qw($ABS $REL $LIST $CANON $URLSUB $NODOC $URLPROC $NREL);
72	use vars qw($SAVEURL $USESAVED $TAGCALLBACK $debug %isdir);
73
74	# These are for the smartrel routines
75	my $url_origin;
76	my $doc_top;
77	my $doc_top_re;
78	my $choped_url_or;
79
80	# HTML operation codes for process_html. The first argument is the
81	# html document to do operations on. It will not be changed. If
82	# NODOC is not passed a new document edited as specified will be
83	# returned. No more than one document will be returned by one
84	# invocation of htmlop.
85
86	$ABS = 1; # Absolutify urls. Arg: Origin. The
87	# ABS function absolutifies URLS,
88	# assuming they are relative to the
89	# argument.
90	# If a <BASE> tag is found the origin
91	# given by it will be used instead of
92	# the arguemnt given.
93
94	$REL = 2; # Relativize all urls. Arg: Origin.
95	# The REL function simply removes the
96	# argument string from any urls
97	# matching it. For this reason the
98	# origin string is interprated as a
99	# RE, which may have unexpected
100	# results, unless RE specials are
101	# escaped, which they should be.
102
103	$LIST = 3; # List all urls, Ret: urls
104
105	$CANON = 4; # See that missing opening (and
106	# ending) SGML tags are injected.
107
108	$URLSUB = 5; # Do regular expression substitution
109	# on urls. Arg: RE, substitute.
110
111	$NODOC = 6; # Do not return a rebuilt document.
112	# Saves memory, and time
113
114	# NOTE: I SUSPECT URLPROC IS BROKEN IF A BASE TAG APPEARS IN THE TEXT.
115	# THE URL PROCESSOR NEEDS TO BE PASSED htmlop::process' IDEA OF WHAT THE
116	# BASE URL IS.
117
118	$URLPROC = 7; # Apply function on urls (process
119	# urls). Arg: Pointer to function to
120	# apply, userdata. The function will
121	# be passed the url, modified by any
122	# previous operations and the
123	# userdata. The function must return
124	# the new url.
125
126	$NREL = 8; # New relativisation function, works
127	# much better than the old one. Arg:
128	# Origin, Top.
129
130	$SAVEURL = 9; # Save urls in tag with modified name.
131	# Arg: attribute prefix.
132	# Example: <a href=foo> becomes
133	# <a href=foo w3mir-href=foo> if no other
134	# processing of the url is done.
135
136	$USESAVED = 10; # Use saved urls. Arg: attribute prefix
137
138	$TAGCALLBACK = 11; # Procedure to call for each Tag.
139	# Args: procedure, userdata (one item)
140
141	# Args to procedure: userdata, Base
142	# URL, tag name, reference to array of
143	# URL attributes, reference to hash of
144	# all attributes. The base url is
145	# derived from the one used in ABS or
146	# the BASE tag.
147
148	$debug=0; # Debugging level in this package
149
150	# process_html returns a array. The first component of the array is
151	# the new html document. The rest of the array is the urls. If a
152	# document is not to be returned a empty string is returned. If a url
153	# list is not to be returned a empty array is returned.
154
155	# HERE BE DRAGONS:
156
157	# Where to find URLs in various tags. The second compoent is a array
158	# reference.
159
160	my(%urls) = (
161	HEAD => [ 'PROFILE' ],
162	BLOCKQUOTE => [ 'CITE' ],
163	Q => [ 'CITE' ],
164	INS => [ 'CITE' ],
165	DEL => [ 'CITE' ],
166	A => [ 'HREF' ] ,
167	IMG => [ 'SRC' ,'LOWSRC' ,'USEMAP', 'LONGDESC' ] ,
168	EMBED => [ 'SRC' ],
169	FRAME => [ 'SRC', 'LONGDESC' ],
170	IFRAME => [ 'SRC', 'LONGDESC' ],
171	BODY => [ 'BACKGROUND' ],
172	AREA => [ 'HREF' ],
173	LINK => [ 'HREF' ],
174
175	# The APPLET and OBJECT tags do not fit into my model for URL
176	# manipulation. Just looking at CODEBASE might work, if the
177	# URL it names is a browseable directory...
178	APPLET => [ 'CODEBASE' ], # If the codebase dir is browseable
179	OBJECT => [ 'CODEBASE' ], # Ditto. Can't handle DATA attribute now
180
181	INPUT => [ 'SRC', 'USEMAP' ],
182	MAP => [ 'HREF' ],
183	SCRIPT => [ 'SRC', 'FOR' ],# 'FOR's semantics is not defined, the
184	# attribute is just reserved for possible
185	# future use...
186	BGSOUND => [ 'SRC' ],
187	FORM => [ 'ACTION' ], # Is this asking for trouble?
188	# Maybe it should just be absolutized...
189	# On the other hand: It's CGI...
190	);
191
192	my(%relative) = (
193	# Identify URL attributes containing urls that are relative to
194	# the named URL attribute. When processing these they should
195	# be absolitized and then relativized relative to the BASE attribute.
196	# This is just window dressing for now; it is not used for anything.
197
198	# ARCHIVE is really a URI _list_.
199	CODEBASE => [ 'CLASSID', 'DATA', 'CODE', 'ARCHIVE' ],
200	);
201
202	%isdir = (
203	# These tags refer to directories:
204	CODEBASE => 1
205	);
206
207	# Tags that enclose bits we want to leave absolutely alone because they
208	# are not very like HTML, or some such.
209
210	# The material between the start and end tags is copied with no
211	# processing at all. The end tag is left to be processed.
212	# The endtag match is case insensitive.
213	my(%verbatim) = (
214	SCRIPT => quotemeta('</SCRIPT>'), # Embeded scripts
215	STYLE => quotemeta('</STYLE>'), # Embeded stylesheet
216	);
217
218	# These are the functions that pick the HTML to pieces. It will not
219	# work esp. good on a random SGML document since the HTML application
220	# of SGML has simpler quoting than it might.
221
222	sub gettoken {
223	# Get one token from the argument, removing it from the argument.
224	# BUG: There should be whitespace at the end of the examined string.
225	my($c,$token,$i);
226
227	# Skip whitespace and newlines
228	return '' unless defined(@_) && defined($_[0]);
229	$_[0] =~ s/^[\r\n\s]*//;
230
231	return '' if ($_[0] eq '');
232
233	$c = substr($_[0],0,1);
234	substr($_[0],0,1)='';
235
236	if ($c eq '"' \|\| $c eq "\'") { # Quoted material
237	$i=index($_[0],$c);
238	# End-quote missing, just gobble the rest of the doc
239	$i=length($_[0]) if $i == -1;
240	# Extract and remove token
241	$token=substr($_[0],0,$i);
242	substr($_[0],0,$i+1)='';
243	} elsif ($c eq '=') {
244	$token='=';
245	} else { # Non-quoted material, ends in whitespace or =
246	$_[0] =~ m/[=\s\n\r]/;
247	$_[0] = $&.$';
248	$token=$c.$`;
249	}
250	# print "Token: '$token'\t\tRest: '",$_[0],"'\n";
251	return $token;
252	}
253
254
255	sub tagtoken {
256	# Pick the tag to pieces (also knonw as tokens). Return an
257	# associative array of attributes. The attribute-names are changed
258	# to uppercase. The attribute-values are left as is.
259	my($tok,$lasttok,%tokens);
260	# Append a space, gettoken needs it - silly? Yes!
261	# Change it to test on boundrary things rather than ...?
262	$_[0].=' ';
263
264	$lasttok='';
265	while (1) {
266	last if (($tok=uc &gettoken($_[0])) eq '');
267
268	if ($tok eq '=') {
269	# print STDERR " -bad html-" if ($lasttok eq '');
270	$tokens{$lasttok}.=&gettoken($_[0]);
271	print STDERR "STORED: $lasttok = ",$tokens{$lasttok},"\n" if $debug;
272	} else {
273	$tokens{$tok}=undef;
274	$lasttok=$tok;
275	}
276	}
277	return %tokens;
278	}
279
280
281	sub gettag {
282	# Pick out the following things from the remaining html doc:
283	# Everything leading up to the first tag. The first tag, and its
284	# contents. Modify @_ directly to reduce number of copies of
285	# possebly huge documents kept in memory at once. Return the body,
286	# the tag name, and the attributes (associative array)
287	my(%attr,$tagn,$tagc,$body,$tag,$doc);
288
289	$doc=\$_[0];
290
291	my($start,$end,$length);
292
293	$start=index($$doc,'<');
294
295	if ($start<$[) {
296	# EOF
297	$body=$$doc;
298	$$doc='';
299	return ($body,'',());
300	}
301
302	$end=index($$doc,'>',$start+1);
303
304	if ($end<$[) {
305	# This sucks, found no end of the tag...
306	$body=$$doc;
307	$$doc='';
308	return ($body,'',());
309	}
310
311	$length=$end-$start-1;
312
313	$body=substr($$doc,0,$start);
314	$tag=substr($$doc,$start+1,$length);
315
316	# This shortens the string in each itteration, some kind of mechanism
317	# to do it once in a while would speed things up further. HOWEVER, when
318	# I tried to code this all I got was a _nasty_ memory leak.
319	substr($$doc,0,$end+1)='';
320
321	# print STDERR "------\n";
322
323	# print STDERR "BODY: /$body/\n";
324	print STDERR "COMPLETE TAG: /$tag/\n" if $debug;
325	# print STDERR "REST: /",substr($$doc,0,20),"/\n";
326
327	# print STDERR "------\n";
328
329	# Examine tag contents
330	if ($tag =~ /^([!?]--)/ \|\| $tag =~ /^(!\w+)/) {
331	# Comment or processing dicective, handle specially
332	$tagn=$&;
333	$tagc=$';
334	return ($body,$tagn,("$tagc",undef));
335	}
336
337	# Everything else
338	($tagn,$tagc) = split(/[\s\n\r]+/,$tag,2);
339	$tagn="\U$tagn";
340
341	return ($body,$tagn,()) if !defined($tagc);
342
343	return ($body,$tagn,&tagtoken($tagc));
344	}
345
346
347	# This is meant for general consumption:
348
349	sub process {
350	# Process a html file. Into one end you put a html file. Out
351	# of the other end you get something dependent on the operations
352	# you specified.
353	# I cannot gobble my arguments. I need to examine them several times.
354
355	my($arg,$doc,$i,$retdoc,$canon,$newdoc,@urllist,$Q,$cont);
356	my($origin,$baseurl);
357
358	$origin=$baseurl='';
359
360	$retdoc=$canon=0;
361
362	# Get the document from the argument list
363	$doc=shift(@_);
364
365	$retdoc=1;
366	$i=0;
367	# Argument checking
368	while (defined($arg=$_[$i++])) {
369	if (! ($arg =~ /^\d+$/)) {
370	print STDERR "ERROR IN HTMLOP::process:\n";
371	print STDERR "Args: ",join(',',@_),"\n";
372	print STDERR "ARG: $arg is not a opcode.\n";
373	exit(1);
374	}
375	if ($arg == $ABS) {
376	$baseurl=$origin=$_[$i++];
377	} elsif ($arg == $REL \|\| $arg == $SAVEURL \|\| $arg == $USESAVED) {
378	$i++; # Skip one arg
379	} elsif ($arg == $URLSUB \|\| $arg == $NREL \|\| $arg == $URLPROC \|\|
380	$arg == $TAGCALLBACK ) {
381	$i += 2; # Skip two args
382	} elsif ($arg == $LIST) {
383	# do nothing
384	} elsif ($arg == $CANON) {
385	$canon=1;
386	} elsif ($arg == $NODOC) {
387	$retdoc=0;
388	} else {
389	die "htmlop: Incorrect invocation of html_process\n";
390	}
391	}
392
393	my($endhtml)=0; # Have we seen </html>?
394	my($SGML)=1; # 1: !DOCTYPE not seen, 2: !DOCTYPE seen, -1: it's OK
395
396	# These are used to store the tag components
397	my($textpart);
398	my($tagname);
399	my(%attrval);
400
401	my($wholetag); # The whole tag, put together again
402	my($moretag); # TMP storage of tag attributes
403	my($attr); # Looping thru attributes, misnomer.
404	my($RE);
405	my($subst);
406	my($prefix);
407	my($fun); # Function to apply to url
408	my($verbatim)='';
409	my($url_o,$path); # URL object Used in ABS to remove leading ..
410
411	# Welcome to the machine
412
413	$newdoc='';
414	while ($doc ne '') {
415	($textpart,$tagname,%attrval)=&gettag($doc);
416
417	# Process the tag
418	{ # Need a way to get out of this, last is my friend
419	# 'Canonize'
420	if ($canon) {
421	$endhtml=1 if ($tagname eq '/HTML');
422	if ($SGML!=-1 && $canon && $tagname ne '!--' && $tagname ne '!SGML') {
423	$wholetag=$tagname;
424	$moretag=join('',keys %attrval);
425	$wholetag.=' '.$moretag if ($moretag ne '');
426	if ($SGML==1) {
427	$SGML=2;
428	if ($tagname eq '!DOCTYPE') {
429	$tagname=$wholetag;
430	%attrval=();
431	last;
432	}
433
434	# We have no idea what DTD this doc follows so we put in
435	# something kinda non-comitting
436	$newdoc.='<!DOCTYPE HTML PUBLIC "html.dtd">'."\n";
437	}
438	if ($SGML==2) {
439	$SGML=-1;
440	if ($tagname eq 'HTML') {
441	$tagname=$wholetag;
442	%attrval=();
443	last;
444	}
445	$newdoc.='<HTML>'."\n";
446	}
447	} # sgml!=-1
448	} #canon
449
450	# Tack on text part before I bail out, if wanted
451	$newdoc.=$textpart if $retdoc;
452
453	if ($tagname eq 'BASE') {
454	if (exists($attrval{'HREF'})) {
455	if ($origin) {
456	$baseurl=(url($attrval{'HREF'})->abs($origin,1))->as_string;
457	} else {
458	$baseurl=$attrval{'HREF'};
459	}
460	# Get rid of the HREF attribute. Netscape 4.0 puts (other) stuff
461	# into BASE that is not found in the HTML 4.0 spec.
462	delete $attrval{'HREF'};
463	print STDERR "\nBase tag: $baseurl\n" if $debug;
464	}
465	}
466
467	# URL processing
468	$i=0;
469	while (defined($arg=$_[$i])) {
470	if (! ($arg =~ /^\d+$/)) {
471	print STDERR "Args: ",join(',',@_),"\n";
472	print STDERR "ARG: $arg ($i) is not a opcode.\n";
473	exit(1);
474	}
475	$i++;
476	if ($arg == $ABS) {
477	$origin=$baseurl \|\| $_[$i++];
478	$i++;
479	# Want it to be a URL object
480	$origin=url $origin unless ref $origin;
481	print STDERR 'ABS: ',$origin->as_string,"\n" if $debug;
482	next unless defined($urls{$tagname});
483	foreach $attr (@{$urls{$tagname}}) {
484	if (defined($attrval{$attr})) {
485	# Ugly: Remove leading /../ sequences in path component
486	$url_o=url($attrval{$attr})->abs($origin,1);
487	if (defined($path=$url_o->path)) {
488	# mailto: URLs does not have path components
489	$path =~ s~/\.\.(?=/)~~g;
490	$url_o->path($path);
491	}
492	$attrval{$attr}=$url_o->as_string;
493	}
494	}
495	} elsif ($arg == $REL) {
496	$origin=$_[$i++];
497	# Want it to be a string
498	$origin=$origin->as_string if ref $origin;
499	print STDERR 'REL: ',$origin,"\n" if $debug;
500	next unless defined($urls{$tagname});
501	foreach $attr (@{$urls{$tagname}}) {
502	$attrval{$attr} =~ s/^$origin//
503	if defined($attrval{$attr});
504	}
505	} elsif ($arg == $URLSUB) {
506	$RE=$_[$i++];
507	$subst=$_[$i++];
508	warn "URLSUB: $RE -> $subst\n" if $debug;
509	next unless defined($urls{$tagname});
510	foreach $attr (@{$urls{$tagname}}) {
511	$attrval{$attr} =~ s/$RE/$subst/
512	if defined($attrval{$attr});
513	}
514	} elsif ($arg == $LIST) {
515	warn "LIST;\n" if $debug;
516	next unless exists($urls{$tagname});
517	foreach $attr (@{$urls{$tagname}}) {
518	if (exists($attrval{$attr})) {
519	$attrval{$attr}.='/'
520	if $isdir{$attr} && $attrval{$attr} =~ m~[^/]$~;
521	push(@urllist,$attrval{$attr});
522	}
523	}
524	} elsif ($arg == $SAVEURL) {
525	warn "SAVEURL;\n" if $debug;
526	$prefix=$_[$i++];
527	next unless exists($urls{$tagname});
528	foreach $attr (@{$urls{$tagname}}) {
529	$attrval{"$prefix$attr"}=$attrval{$attr}
530	if defined($attrval{$attr});
531	}
532	} elsif ($arg == $USESAVED) {
533	warn "USESAVED;\n" if $debug;
534	$prefix=$_[$i++];
535	next unless exists($urls{$tagname});
536	foreach $attr (@{$urls{$tagname}}) {
537	$attrval{$attr}=$attrval{"$prefix$attr"}
538	if (defined($attrval{"$prefix$attr"}));
539	# bug compatability, drop sometime after w3mir 1.0
540	$attrval{$attr}=$attrval{"$prefix-$attr"}
541	if (defined($attrval{"$prefix-$attr"}));
542	}
543	} elsif ($arg == $NODOC) {
544	warn "NODOC;\n" if $debug;
545	} elsif ($arg == $CANON) {
546	warn "CANON;\n" if $debug;
547	} elsif ($arg == $URLPROC \|\| $arg == $NREL) {
548	# Apply a function to all urls.
549	# NREL = Special case of $URLPROC, apply internal function.
550	if ($arg == $URLPROC) {
551	warn "URLPROC;\n" if $debug;
552	$fun=$_[$i++];
553	$arg=$_[$i++];
554	} else {
555	$fun=\&smartrel;
556	$arg=undef;
557	$url_origin=$_[$i++];
558	$doc_top=$_[$i++];
559	$doc_top_re=quotemeta $doc_top;
560	($choped_url_or=$url_origin) =~ s/^$doc_top_re//;
561	warn "NREL $url_origin $doc_top\n" if $debug;
562	}
563
564	next unless (defined($urls{$tagname}));
565
566	foreach $attr (@{$urls{$tagname}}) {
567	$attrval{$attr}=&$fun($attrval{$attr},$arg)
568	if defined($attrval{$attr});
569	}
570	} elsif ($arg == $TAGCALLBACK) {
571	$fun=$_[$i++];
572	$arg=$_[$i++];
573	warn "TAGCALLBACK($tagname);\n" if $debug;
574	&$fun($arg,$baseurl,$tagname,
575	(defined($urls{$tagname})?($urls{$tagname}):undef),
576	\%attrval);
577	} else {
578	print STDERR "Internal error. opcode: $arg, i: $i, args: ",
579	join(',',@_),"\n";
580	}
581	}
582	last;
583	}
584
585	# That ends URL processing
586
587	# Was this a verbatim leadin tag? If yes, atempt to fish out text
588	# between here and the end tag, (minimal match) and substitute it
589	# with nothing. The end tag is kept. And the fished out text is
590	# re-inserted in the result with no changes.
591
592	$verbatim=$1
593	if defined($verbatim{$tagname}) &&
594	($doc =~ s/^(.*?)($verbatim{$tagname})/$2/is);
595
596	# Tack on the tag, if wanted.
597	if ($retdoc) {
598	if ($tagname) {
599	$newdoc.='<'.$tagname;
600	foreach $attr (keys %attrval) {
601	$newdoc.=' '.$attr;
602	if (defined($cont=$attrval{$attr})) {
603	$Q='"';
604	$Q="'" if $cont =~ m/\"/;
605	$newdoc.='='.$Q.$cont.$Q;
606	}
607	}
608	$newdoc.='>'.$verbatim;
609	$verbatim='';
610	}
611	}
612	print STDERR "NEW: $newdoc\n" if $debug > 2;
613	}
614	$newdoc.="</HTML>\n" if ($canon && !$endhtml);
615	return ($newdoc,@urllist);
616	}
617
618
619	sub smartrel {
620	# 'smart' relativisation function, uses .. to refer to things up to
621	# $doc_top level. Outside that scope it stays absolute.
622	#
623	# The rel function itself is now in libwww-perl
624
625	my($url_o)=@_;
626
627	return if (!(defined($url_o) && $url_o));
628
629	$url_o=url $url_o unless ref $url_o;
630
631	# print STDERR "\nsmartrel: $url_o->as_string\n",
632	# " from: $url_origin\n",
633	# " within: $doc_top\n";
634
635	# Check if within scope of our doc
636	$url_o = $url_o->rel($url_origin)
637	if $url_o->as_string =~ m/^$doc_top_re/;
638
639	return $url_o->as_string;
640	}
641
642
643	sub smart_setup {
644	# Setup routine for smartrel.
645
646	($url_origin,$doc_top) = @_;
647
648	$url_origin=$url_origin->as_string if ref $url_origin;
649	$doc_top=$doc_top->as_string if ref $doc_top;
650
651	$doc_top_re=quotemeta $doc_top;
652
653	($choped_url_or=$url_origin) =~ s/^$doc_top_re//;
654	}
655
656	1;

Note: See TracBrowser for help on using the repository browser.

Download in other formats: