Changeset 34125
- Timestamp:
- 2020-05-27T18:07:26+12:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/perllib/plugins/NutchTextDumpPlugin.pm
r34124 r34125 612 612 #$title_meta = $self->to_utf8($encoding, $title_meta) if ($encoding); 613 613 } else { # if we have "null" as title metadata, set it to the record URL? 614 #my $srcURLs = $doc_obj->get_metadata($cursection, "ex.srcURL");615 #print STDERR "@@@@ null title to be replaced with ".$srcURLs->[0]."\n";616 #$title_meta = $srcURLs->[0] if (scalar @$srcURLs > 0);617 614 my $srcURL = $doc_obj->get_metadata_element($cursection, "srcURL", 1); # TODO: why does ex.srcURL not work, nor srcURL without 3rd param 618 615 my ($basicURL) = $srcURL =~ m@^https?://(?:www\.)?(.*)$@; # use basicURL for title instead of srcURL, else many docs get classified under "Htt" bucket for https
Note:
See TracChangeset
for help on using the changeset viewer.