source: main/trunk/greenstone2/perllib/cpan/WWW/RobotRules/AnyDBM_File.pm@ 27181

Last change on this file since 27181 was 27181, checked in by davidb, 11 years ago

Latest libwww-perl (v6x) isn't as self-sufficeint as earlier (v5.x) in terms of supporting Perl modules. Dropping back to to this earlier version so activate.pl runs smoothly when system-installed Perl on Unix system does not have the LWP and related modules installed

File size: 3.5 KB
Line 
1package WWW::RobotRules::AnyDBM_File;
2
3require WWW::RobotRules;
4@ISA = qw(WWW::RobotRules);
5$VERSION = "5.835";
6
7use Carp ();
8use AnyDBM_File;
9use Fcntl;
10use strict;
11
12=head1 NAME
13
14WWW::RobotRules::AnyDBM_File - Persistent RobotRules
15
16=head1 SYNOPSIS
17
18 require WWW::RobotRules::AnyDBM_File;
19 require LWP::RobotUA;
20
21 # Create a robot useragent that uses a diskcaching RobotRules
22 my $rules = WWW::RobotRules::AnyDBM_File->new( 'my-robot/1.0', 'cachefile' );
23 my $ua = WWW::RobotUA->new( 'my-robot/1.0', '[email protected]', $rules );
24
25 # Then just use $ua as usual
26 $res = $ua->request($req);
27
28=head1 DESCRIPTION
29
30This is a subclass of I<WWW::RobotRules> that uses the AnyDBM_File
31package to implement persistent diskcaching of F<robots.txt> and host
32visit information.
33
34The constructor (the new() method) takes an extra argument specifying
35the name of the DBM file to use. If the DBM file already exists, then
36you can specify undef as agent name as the name can be obtained from
37the DBM database.
38
39=cut
40
41sub new
42{
43 my ($class, $ua, $file) = @_;
44 Carp::croak('WWW::RobotRules::AnyDBM_File filename required') unless $file;
45
46 my $self = bless { }, $class;
47 $self->{'filename'} = $file;
48 tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_CREAT|O_RDWR, 0640
49 or Carp::croak("Can't open $file: $!");
50
51 if ($ua) {
52 $self->agent($ua);
53 }
54 else {
55 # Try to obtain name from DBM file
56 $ua = $self->{'dbm'}{"|ua-name|"};
57 Carp::croak("No agent name specified") unless $ua;
58 }
59
60 $self;
61}
62
63sub agent {
64 my($self, $newname) = @_;
65 my $old = $self->{'dbm'}{"|ua-name|"};
66 if (defined $newname) {
67 $newname =~ s!/?\s*\d+.\d+\s*$!!; # loose version
68 unless ($old && $old eq $newname) {
69 # Old info is now stale.
70 my $file = $self->{'filename'};
71 untie %{$self->{'dbm'}};
72 tie %{$self->{'dbm'}}, 'AnyDBM_File', $file, O_TRUNC|O_RDWR, 0640;
73 %{$self->{'dbm'}} = ();
74 $self->{'dbm'}{"|ua-name|"} = $newname;
75 }
76 }
77 $old;
78}
79
80sub no_visits {
81 my ($self, $netloc) = @_;
82 my $t = $self->{'dbm'}{"$netloc|vis"};
83 return 0 unless $t;
84 (split(/;\s*/, $t))[0];
85}
86
87sub last_visit {
88 my ($self, $netloc) = @_;
89 my $t = $self->{'dbm'}{"$netloc|vis"};
90 return undef unless $t;
91 (split(/;\s*/, $t))[1];
92}
93
94sub fresh_until {
95 my ($self, $netloc, $fresh) = @_;
96 my $old = $self->{'dbm'}{"$netloc|exp"};
97 if ($old) {
98 $old =~ s/;.*//; # remove cleartext
99 }
100 if (defined $fresh) {
101 $fresh .= "; " . localtime($fresh);
102 $self->{'dbm'}{"$netloc|exp"} = $fresh;
103 }
104 $old;
105}
106
107sub visit {
108 my($self, $netloc, $time) = @_;
109 $time ||= time;
110
111 my $count = 0;
112 my $old = $self->{'dbm'}{"$netloc|vis"};
113 if ($old) {
114 my $last;
115 ($count,$last) = split(/;\s*/, $old);
116 $time = $last if $last > $time;
117 }
118 $count++;
119 $self->{'dbm'}{"$netloc|vis"} = "$count; $time; " . localtime($time);
120}
121
122sub push_rules {
123 my($self, $netloc, @rules) = @_;
124 my $cnt = 1;
125 $cnt++ while $self->{'dbm'}{"$netloc|r$cnt"};
126
127 foreach (@rules) {
128 $self->{'dbm'}{"$netloc|r$cnt"} = $_;
129 $cnt++;
130 }
131}
132
133sub clear_rules {
134 my($self, $netloc) = @_;
135 my $cnt = 1;
136 while ($self->{'dbm'}{"$netloc|r$cnt"}) {
137 delete $self->{'dbm'}{"$netloc|r$cnt"};
138 $cnt++;
139 }
140}
141
142sub rules {
143 my($self, $netloc) = @_;
144 my @rules = ();
145 my $cnt = 1;
146 while (1) {
147 my $rule = $self->{'dbm'}{"$netloc|r$cnt"};
148 last unless $rule;
149 push(@rules, $rule);
150 $cnt++;
151 }
152 @rules;
153}
154
155sub dump
156{
157}
158
1591;
160
161=head1 SEE ALSO
162
163L<WWW::RobotRules>, L<LWP::RobotUA>
164
165=head1 AUTHORS
166
167Hakan Ardo E<lt>[email protected]>, Gisle Aas E<lt>[email protected]>
168
169=cut
170
Note: See TracBrowser for help on using the repository browser.