Ignore:
Timestamp:
2000-06-27T17:10:07+12:00 (24 years ago)
Author:
sjboddie
Message:

Caught up most general plugins (that's the ones in gsdlhome/perllib/plugins)
with changes to BasPlug so that they can all now use the new general plugin
options. Those I didn't do were FoxPlug (as it's not actually used anywhere
and I don't know what it does) and WebPlug (as it's kind of a work in
progress and doesn't really work anyway). All plugins will still work
(including all the collection specific ones that are laying around), some
of them just won't have access to the general options.
I also wrote a short perl script (pluginfo.pl) that prints out all the
options available to a given plugin.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/EMAILPlug.pm

    r1206 r1244  
    7070}
    7171
     72use strict;
    7273
    7374# Create a new EMAILPlug object with which to parse a file.
     
    7778sub new {
    7879    my ($class) = @_;
    79     $self = new BasPlug ();
     80    my $self = new BasPlug ("EMAILPlug", @_);
     81
    8082    return bless $self, $class;
    8183}
    8284
    83 
    84 # Is EMAILPlug recursive?  No.
    85 
    86 sub is_recursive {
    87     return 0;
    88 }
    89 
    90 
    91 # Read a file and store its contents in a new document object.
    92 # First, we check to see if it is an email message we're dealing
    93 # with, then we extract the text and metadata, then we store
    94 # all this information.
    95 #
    96 # Returns: number of files processed or undef if it can't process
    97 # a file.  This plugin only processes one file at a time.
    98 
    99 sub read {
     85sub get_default_process_exp {
    10086    my $self = shift (@_);
    101     my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
    102 
    103     #
    104     # Check that we're dealig with a valid mail file
    105     #
    106 
    107     # Make sure file exists
    108     my $filename = &util::filename_cat($base_dir, $file);
    109     return undef unless (-e $filename);
    110     return undef unless ($filename =~ /\d+(\.email)?$/);
    111 
    112     # Read the text and make sure it is an email message
    113     open (FILE, $filename) || die "EMAILPlug::read - can't open $filename\n";
    114     my @text = <FILE>;
    115     my $text = join("", @text);
    116     return undef unless (($text =~ /From:/) || ($text =~ /To:/));
    117 
    118     print STDERR "EMAILPlug: processing $filename\n" if $processor->{'verbosity'};
     87
     88    return q^\d+(\.email)?$^;
     89}
     90
     91# do plugin specific processing of doc_obj
     92sub process {
     93    my $self = shift (@_);
     94    my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_;
     95   
     96    # Check that we're dealing with a valid mail file
     97    return undef unless (($$textref =~ /From:/) || ($$textref =~ /To:/));
     98
     99    print STDERR "EMAILPlug: processing $file\n"
     100    if $self->{'verbosity'} > 1;
     101
     102    my $cursection = $doc_obj->get_top_section();
    119103
    120104    #
     
    123107
    124108    # Separate header from body of message
    125     my $Headers = $text;
     109    my $Headers = $$textref;
    126110    $Headers =~ s/\n\n.*//s;
    127     $text = substr $text, (length $Headers);
     111    $$textref = substr $$textref, (length $Headers);
    128112
    129113    # Extract basic metadata from header
     
    158142
    159143
    160     #
    161     # Create a new document object
    162     #
    163 
    164     my $doc_obj = new doc ($file, "indexed_doc");
    165     my $cursection = $doc_obj->get_top_section();
    166 
    167     # Add specilised metadata
     144    # Add extracted metadata to document object
    168145    foreach my $name (keys %raw) {
    169146    $value = $raw{$name};
     
    173150        $value = "No $name field";
    174151    }
    175     $doc_obj->add_metadata ($cursection, $name, $value);
     152    $doc_obj->add_utf8_metadata ($cursection, $name, $value);
    176153    }
    177154
     
    179156    $Headers = &text_into_html($Headers);
    180157    $Headers = "No headers" unless ($Headers =~ /\w/);
    181     $doc_obj->add_metadata ($cursection, "Headers", $Headers);
    182 
    183     # Add document text
    184     $text = &text_into_html($text);
    185     $text = "No message" unless ($text =~ /\w/);
    186     $doc_obj->add_text ($cursection, $text);
    187    
    188     # Add the OID - that is, the big HASH value used as a unique ID
    189     $doc_obj->set_OID ();
    190 
    191     # Process the document
    192     $processor->process($doc_obj);
    193 
    194     # Return the number of documents processed
    195     return 1;
    196 
     158    $doc_obj->add_utf8_metadata ($cursection, "Headers", $Headers);
     159
     160    # Add text to document object
     161    $$textref = &text_into_html($$textref);
     162    $$textref = "No message" unless ($$textref =~ /\w/);
     163    $doc_obj->add_utf8_text($cursection, $$textref);
     164
     165    return 1;
    197166}
    198167
     
    213182    my ($text) = @_;
    214183
    215     # Convert problem charaters into HTML symbols
     184    # Convert problem characters into HTML symbols
    216185    $text =~ s/&/&amp;/go;
    217186    $text =~ s/</&lt;/go;
     
    236205# Perl packages have to return true if they are run.
    2372061;
    238    
    239 
    240 
    241 
    242 
    243 
    244 
Note: See TracChangeset for help on using the changeset viewer.