Ignore:
Timestamp:
2000-07-13T10:21:53+12:00 (24 years ago)
Author:
sjboddie
Message:

merged changes to trunk into New_Config_Format branch

File:
1 edited

Legend:

Unmodified
Added
Removed
  • branches/New_Config_Format-branch/gsdl/perllib/plugins/TEXTPlug.pm

    r732 r1279  
    2424###########################################################################
    2525
    26 # creates simple single-level document from .txt or .text files
    27 # (case-insensitive match on filenames). Adds Title metadata
    28 # of first 100 characters found.
     26# creates simple single-level document. Adds Title metadata
     27# of first line of text (up to 100 characters long).
    2928
    3029package TEXTPlug;
    3130
    3231use BasPlug;
    33 use sorttools;
    3432
    3533sub BEGIN {
     
    3735}
    3836
     37use strict;
     38
    3939sub new {
    4040    my ($class) = @_;
    41     $self = new BasPlug ();
     41    my $self = new BasPlug ("TEXTPlug", @_);
    4242
    4343    return bless $self, $class;
    4444}
    4545
    46 sub is_recursive {
     46sub get_default_process_exp {
    4747    my $self = shift (@_);
    4848
    49     return 0; # this is not a recursive plugin
     49    return q^(?i)\.te?xt$^;
    5050}
    5151
    52 
    53 # return number of files processed, undef if can't process
    54 # Note that $base_dir might be "" and that $file might
    55 # include directories
    56 sub read {
     52# do plugin specific processing of doc_obj
     53sub process {
    5754    my $self = shift (@_);
    58     my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
    59 
    60     my $filename = &util::filename_cat($base_dir, $file);
    61 
    62     return undef unless ($filename =~ /\.(te?xt(\.gz)?)$/i && (-e $filename));
    63 
    64     my $gz = 0;
    65     if (defined $2) {
    66     $gz = $2;
    67     $gz = 1 if ($gz =~ /\.gz/i);
    68     }
    69 
    70     print STDERR "TEXTPlug: processing $filename\n" if $processor->{'verbosity'};
    71 
    72     # create a new document
    73     my $doc_obj = new doc ($file, "indexed_doc");
    74 
    75     if ($gz) {
    76     open (FILE, "zcat $filename |") || die "TEXTPlug::read - zcat can't open $filename\n";
    77     } else {
    78     open (FILE, $filename) || die "TEXTPlug::read - can't open $filename\n";
    79     }
     55    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     56   
     57    print STDERR "TEXTPlug: processing $file\n"
     58    if $self->{'verbosity'} > 1;
     59   
    8060    my $cursection = $doc_obj->get_top_section();
    81 
    82     my $text = "";
    83     my $line = "";
    84     my $foundtitle = 0;
    85     # don't need to get title if it has been passed
    86     # in from another plugin
    87     if (defined $metadata->{'Title'}) {
    88     $foundtitle = 1;
    89     }
    90     while (defined ($line = <FILE>)) {
    91     # use first line as title (or first 100 characters if it's long)
    92     if (!$foundtitle && length($line) > 5) {
    93         my $title = "";
    94         if (length($line) > 100) {
    95         $title = substr ($line, 0, 100);
    96         } else {
    97         $title = $line;
    98         }
    99         $doc_obj->add_metadata ($cursection, "Title", $title);
    100         $foundtitle = 1;
     61   
     62    # get title metadata
     63    # (don't need to get title if it has been passed
     64    # in from another plugin)
     65    if (!defined $metadata->{'Title'}) {
     66    my ($title) = $$textref =~ /^([^\n]*)/;
     67    if (length($title) > 100) {
     68        $title = substr ($title, 0, 100);
    10169    }
    102     $text .= $line;
     70    $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
    10371    }
    10472   
    105     $doc_obj->add_text ($cursection, "<pre>\n$text\n</pre>");
     73    # insert preformat tags and add text to document object
     74    $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
    10675
    107 
    108     foreach $field (keys(%$metadata)) {
    109     # $metadata->{$field} may be an array reference
    110     if (ref ($metadata->{$field}) eq "ARRAY") {
    111         map {
    112         $doc_obj->add_metadata ($cursection, $field, $_);
    113         } @{$metadata->{$field}};
    114     } else {
    115         $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});
    116     }
    117     }
    118 
    119     # add OID
    120     $doc_obj->set_OID ();
    121 
    122     # process the document
    123     $processor->process($doc_obj);
    124 
    125     return 1; # processed the file
     76    return 1;
    12677}
    12778
Note: See TracChangeset for help on using the changeset viewer.