Changeset 34131
- Timestamp:
- 2020-05-30T15:18:25+12:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/NutchTextDumpPlugin.pm
r34130 r34131 329 329 my $fh; 330 330 if (open($fh,'<:encoding(UTF-8)', $urls_file)) { 331 while (defined (my $line = <$fh>)) { 331 while (defined (my $line = <$fh>)) { 332 332 $line = &util::trim($line); #$line =~ s/^\s+|\s+$//g; # trim whitespace 333 if($line =~ m@^https?://@) { # add only URLs 333 334 if($line =~ m@^https?://@) { # add only URLs 335 # remove any ",COUNTRYCODE" at end 336 # country code can be NZ but also UNKNOWN, so not 2 chars 337 $line =~ s/,[A-Z]+$//; 338 #print STDERR "LINE: |$line|\n"; 334 339 $self->{'keep_urls'}->{$line} = 1; # add the url to our perl hash 335 340 }
Note:
See TracChangeset
for help on using the changeset viewer.