Ignore:
Timestamp:
2000-06-27T17:10:07+12:00 (24 years ago)
Author:
sjboddie
Message:

Caught up most general plugins (that's the ones in gsdlhome/perllib/plugins)
with changes to BasPlug so that they can all now use the new general plugin
options. Those I didn't do were FoxPlug (as it's not actually used anywhere
and I don't know what it does) and WebPlug (as it's kind of a work in
progress and doesn't really work anyway). All plugins will still work
(including all the collection specific ones that are laying around), some
of them just won't have access to the general options.
I also wrote a short perl script (pluginfo.pl) that prints out all the
options available to a given plugin.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/TEXTPlug.pm

    r732 r1244  
    2424###########################################################################
    2525
    26 # creates simple single-level document from .txt or .text files
    27 # (case-insensitive match on filenames). Adds Title metadata
    28 # of first 100 characters found.
     26# creates simple single-level document. Adds Title metadata
     27# of first line of text (up to 100 characters long).
    2928
    3029package TEXTPlug;
    3130
    3231use BasPlug;
    33 use sorttools;
    3432
    3533sub BEGIN {
     
    3735}
    3836
     37use strict;
     38
    3939sub new {
    4040    my ($class) = @_;
    41     $self = new BasPlug ();
     41    my $self = new BasPlug ("TEXTPlug", @_);
    4242
    4343    return bless $self, $class;
    4444}
    4545
    46 sub is_recursive {
     46sub get_default_process_exp {
    4747    my $self = shift (@_);
    4848
    49     return 0; # this is not a recursive plugin
     49    return q^(?i)\.te?xt$^;
    5050}
    5151
    52 
    53 # return number of files processed, undef if can't process
    54 # Note that $base_dir might be "" and that $file might
    55 # include directories
    56 sub read {
     52# do plugin specific processing of doc_obj
     53sub process {
    5754    my $self = shift (@_);
    58     my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
    59 
    60     my $filename = &util::filename_cat($base_dir, $file);
    61 
    62     return undef unless ($filename =~ /\.(te?xt(\.gz)?)$/i && (-e $filename));
    63 
    64     my $gz = 0;
    65     if (defined $2) {
    66     $gz = $2;
    67     $gz = 1 if ($gz =~ /\.gz/i);
    68     }
    69 
    70     print STDERR "TEXTPlug: processing $filename\n" if $processor->{'verbosity'};
    71 
    72     # create a new document
    73     my $doc_obj = new doc ($file, "indexed_doc");
    74 
    75     if ($gz) {
    76     open (FILE, "zcat $filename |") || die "TEXTPlug::read - zcat can't open $filename\n";
    77     } else {
    78     open (FILE, $filename) || die "TEXTPlug::read - can't open $filename\n";
    79     }
     55    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     56   
     57    print STDERR "TEXTPlug: processing $file\n"
     58    if $self->{'verbosity'} > 1;
     59   
    8060    my $cursection = $doc_obj->get_top_section();
    81 
    82     my $text = "";
    83     my $line = "";
    84     my $foundtitle = 0;
    85     # don't need to get title if it has been passed
    86     # in from another plugin
    87     if (defined $metadata->{'Title'}) {
    88     $foundtitle = 1;
    89     }
    90     while (defined ($line = <FILE>)) {
    91     # use first line as title (or first 100 characters if it's long)
    92     if (!$foundtitle && length($line) > 5) {
    93         my $title = "";
    94         if (length($line) > 100) {
    95         $title = substr ($line, 0, 100);
    96         } else {
    97         $title = $line;
    98         }
    99         $doc_obj->add_metadata ($cursection, "Title", $title);
    100         $foundtitle = 1;
     61   
     62    # get title metadata
     63    # (don't need to get title if it has been passed
     64    # in from another plugin)
     65    if (!defined $metadata->{'Title'}) {
     66    my ($title) = $$textref =~ /^([^\n]*)/;
     67    if (length($title) > 100) {
     68        $title = substr ($title, 0, 100);
    10169    }
    102     $text .= $line;
     70    $doc_obj->add_utf8_metadata ($cursection, "Title", $title);
    10371    }
    10472   
    105     $doc_obj->add_text ($cursection, "<pre>\n$text\n</pre>");
     73    # insert preformat tags and add text to document object
     74    $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>");
    10675
    107 
    108     foreach $field (keys(%$metadata)) {
    109     # $metadata->{$field} may be an array reference
    110     if (ref ($metadata->{$field}) eq "ARRAY") {
    111         map {
    112         $doc_obj->add_metadata ($cursection, $field, $_);
    113         } @{$metadata->{$field}};
    114     } else {
    115         $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});
    116     }
    117     }
    118 
    119     # add OID
    120     $doc_obj->set_OID ();
    121 
    122     # process the document
    123     $processor->process($doc_obj);
    124 
    125     return 1; # processed the file
     76    return 1;
    12677}
    12778
Note: See TracChangeset for help on using the changeset viewer.