Ignore:
Timestamp:
2001-03-01T13:44:42+13:00 (23 years ago)
Author:
jrm21
Message:

Minor changes to regexs, so that header fields have to be at start of line.
(eg /From:/ -> /From:/m). Also changed file extension so that we process
files named with either a number (maildir) or .mbx (mbox). For now, .email
is also recognised...

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/perllib/plugins/EMAILPlug.pm

    r1895 r2096  
    3232#
    3333# Email plug reads email files.  These are named with a simple
    34 # number (i.e. as they appear in mh_mail folders) or with the
    35 # extension .email
     34# number (i.e. as they appear in maildir folders) or with the
     35# extension .mbx (for mbox mail file format)
    3636#
    3737# Document text:
     
    7474sub get_default_process_exp {
    7575    my $self = shift (@_);
    76 
    77     return q^\d+(\.email)?$^;
     76    # mbx/email for mailbox file format, \d+ for maildir (each message is
     77    # in a separate file, with a unique number for filename)
     78    return q@[\\/]\d+|\.(mbx|email)$@;
    7879}
    7980
    8081# This plugin splits the mbox mail files at lines starting with From<sp>
    8182sub get_default_split_exp {
    82     return q^From .*\n^;
     83    return q^\nFrom .*\n^;
    8384}
    8485
     
    9192
    9293    # Check that we're dealing with a valid mail file
    93     return undef unless (($$textref =~ /From:/) || ($$textref =~ /To:/));
     94    return undef unless (($$textref =~ /^From:/m) || ($$textref =~ /^To:/m));
    9495
    9596    # slightly more strict validity check, to prevent us from matching
    9697    # .so.x files ...
    9798    return undef unless (($$textref =~ /^From /) ||
    98              ($$textref =~ /^[-A-Za-z]{2,100}:/));
     99             ($$textref =~ /^[-A-Za-z]{2,100}:/m));
    99100
    100101    print $outhandle "EMAILPlug: processing $file\n"
Note: See TracChangeset for help on using the changeset viewer.