Changeset 1244 for trunk/gsdl/perllib/plugins/EMAILPlug.pm
- Timestamp:
- 2000-06-27T17:10:07+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/EMAILPlug.pm
r1206 r1244 70 70 } 71 71 72 use strict; 72 73 73 74 # Create a new EMAILPlug object with which to parse a file. … … 77 78 sub new { 78 79 my ($class) = @_; 79 $self = new BasPlug (); 80 my $self = new BasPlug ("EMAILPlug", @_); 81 80 82 return bless $self, $class; 81 83 } 82 84 83 84 # Is EMAILPlug recursive? No. 85 86 sub is_recursive { 87 return 0; 88 } 89 90 91 # Read a file and store its contents in a new document object. 92 # First, we check to see if it is an email message we're dealing 93 # with, then we extract the text and metadata, then we store 94 # all this information. 95 # 96 # Returns: number of files processed or undef if it can't process 97 # a file. This plugin only processes one file at a time. 98 99 sub read { 85 sub get_default_process_exp { 100 86 my $self = shift (@_); 101 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; 102 103 # 104 # Check that we're dealig with a valid mail file 105 # 106 107 # Make sure file exists 108 my $filename = &util::filename_cat($base_dir, $file); 109 return undef unless (-e $filename); 110 return undef unless ($filename =~ /\d+(\.email)?$/); 111 112 # Read the text and make sure it is an email message 113 open (FILE, $filename) || die "EMAILPlug::read - can't open $filename\n"; 114 my @text = <FILE>; 115 my $text = join("", @text); 116 return undef unless (($text =~ /From:/) || ($text =~ /To:/)); 117 118 print STDERR "EMAILPlug: processing $filename\n" if $processor->{'verbosity'}; 87 88 return q^\d+(\.email)?$^; 89 } 90 91 # do plugin specific processing of doc_obj 92 sub process { 93 my $self = shift (@_); 94 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 95 96 # Check that we're dealing with a valid mail file 97 return undef unless (($$textref =~ /From:/) || ($$textref =~ /To:/)); 98 99 print STDERR "EMAILPlug: processing $file\n" 100 if $self->{'verbosity'} > 1; 101 102 my $cursection = $doc_obj->get_top_section(); 119 103 120 104 # … … 123 107 124 108 # Separate header from body of message 125 my $Headers = $ text;109 my $Headers = $$textref; 126 110 $Headers =~ s/\n\n.*//s; 127 $ text = substr $text, (length $Headers);111 $$textref = substr $$textref, (length $Headers); 128 112 129 113 # Extract basic metadata from header … … 158 142 159 143 160 # 161 # Create a new document object 162 # 163 164 my $doc_obj = new doc ($file, "indexed_doc"); 165 my $cursection = $doc_obj->get_top_section(); 166 167 # Add specilised metadata 144 # Add extracted metadata to document object 168 145 foreach my $name (keys %raw) { 169 146 $value = $raw{$name}; … … 173 150 $value = "No $name field"; 174 151 } 175 $doc_obj->add_ metadata ($cursection, $name, $value);152 $doc_obj->add_utf8_metadata ($cursection, $name, $value); 176 153 } 177 154 … … 179 156 $Headers = &text_into_html($Headers); 180 157 $Headers = "No headers" unless ($Headers =~ /\w/); 181 $doc_obj->add_metadata ($cursection, "Headers", $Headers); 182 183 # Add document text 184 $text = &text_into_html($text); 185 $text = "No message" unless ($text =~ /\w/); 186 $doc_obj->add_text ($cursection, $text); 187 188 # Add the OID - that is, the big HASH value used as a unique ID 189 $doc_obj->set_OID (); 190 191 # Process the document 192 $processor->process($doc_obj); 193 194 # Return the number of documents processed 195 return 1; 196 158 $doc_obj->add_utf8_metadata ($cursection, "Headers", $Headers); 159 160 # Add text to document object 161 $$textref = &text_into_html($$textref); 162 $$textref = "No message" unless ($$textref =~ /\w/); 163 $doc_obj->add_utf8_text($cursection, $$textref); 164 165 return 1; 197 166 } 198 167 … … 213 182 my ($text) = @_; 214 183 215 # Convert problem chara ters into HTML symbols184 # Convert problem characters into HTML symbols 216 185 $text =~ s/&/&/go; 217 186 $text =~ s/</</go; … … 236 205 # Perl packages have to return true if they are run. 237 206 1; 238 239 240 241 242 243 244
Note:
See TracChangeset
for help on using the changeset viewer.