Changeset 1244 for trunk/gsdl/perllib/plugins/TEXTPlug.pm
- Timestamp:
- 2000-06-27T17:10:07+12:00 (24 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/perllib/plugins/TEXTPlug.pm
r732 r1244 24 24 ########################################################################### 25 25 26 # creates simple single-level document from .txt or .text files 27 # (case-insensitive match on filenames). Adds Title metadata 28 # of first 100 characters found. 26 # creates simple single-level document. Adds Title metadata 27 # of first line of text (up to 100 characters long). 29 28 30 29 package TEXTPlug; 31 30 32 31 use BasPlug; 33 use sorttools;34 32 35 33 sub BEGIN { … … 37 35 } 38 36 37 use strict; 38 39 39 sub new { 40 40 my ($class) = @_; 41 $self = new BasPlug ();41 my $self = new BasPlug ("TEXTPlug", @_); 42 42 43 43 return bless $self, $class; 44 44 } 45 45 46 sub is_recursive{46 sub get_default_process_exp { 47 47 my $self = shift (@_); 48 48 49 return 0; # this is not a recursive plugin49 return q^(?i)\.te?xt$^; 50 50 } 51 51 52 53 # return number of files processed, undef if can't process 54 # Note that $base_dir might be "" and that $file might 55 # include directories 56 sub read { 52 # do plugin specific processing of doc_obj 53 sub process { 57 54 my $self = shift (@_); 58 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_; 59 60 my $filename = &util::filename_cat($base_dir, $file); 61 62 return undef unless ($filename =~ /\.(te?xt(\.gz)?)$/i && (-e $filename)); 63 64 my $gz = 0; 65 if (defined $2) { 66 $gz = $2; 67 $gz = 1 if ($gz =~ /\.gz/i); 68 } 69 70 print STDERR "TEXTPlug: processing $filename\n" if $processor->{'verbosity'}; 71 72 # create a new document 73 my $doc_obj = new doc ($file, "indexed_doc"); 74 75 if ($gz) { 76 open (FILE, "zcat $filename |") || die "TEXTPlug::read - zcat can't open $filename\n"; 77 } else { 78 open (FILE, $filename) || die "TEXTPlug::read - can't open $filename\n"; 79 } 55 my ($textref, $pluginfo, $base_dir, $file, $metadata, $doc_obj) = @_; 56 57 print STDERR "TEXTPlug: processing $file\n" 58 if $self->{'verbosity'} > 1; 59 80 60 my $cursection = $doc_obj->get_top_section(); 81 82 my $text = ""; 83 my $line = ""; 84 my $foundtitle = 0; 85 # don't need to get title if it has been passed 86 # in from another plugin 87 if (defined $metadata->{'Title'}) { 88 $foundtitle = 1; 89 } 90 while (defined ($line = <FILE>)) { 91 # use first line as title (or first 100 characters if it's long) 92 if (!$foundtitle && length($line) > 5) { 93 my $title = ""; 94 if (length($line) > 100) { 95 $title = substr ($line, 0, 100); 96 } else { 97 $title = $line; 98 } 99 $doc_obj->add_metadata ($cursection, "Title", $title); 100 $foundtitle = 1; 61 62 # get title metadata 63 # (don't need to get title if it has been passed 64 # in from another plugin) 65 if (!defined $metadata->{'Title'}) { 66 my ($title) = $$textref =~ /^([^\n]*)/; 67 if (length($title) > 100) { 68 $title = substr ($title, 0, 100); 101 69 } 102 $ text .= $line;70 $doc_obj->add_utf8_metadata ($cursection, "Title", $title); 103 71 } 104 72 105 $doc_obj->add_text ($cursection, "<pre>\n$text\n</pre>"); 73 # insert preformat tags and add text to document object 74 $doc_obj->add_utf8_text($cursection, "<pre>\n$$textref\n</pre>"); 106 75 107 108 foreach $field (keys(%$metadata)) { 109 # $metadata->{$field} may be an array reference 110 if (ref ($metadata->{$field}) eq "ARRAY") { 111 map { 112 $doc_obj->add_metadata ($cursection, $field, $_); 113 } @{$metadata->{$field}}; 114 } else { 115 $doc_obj->add_metadata ($cursection, $field, $metadata->{$field}); 116 } 117 } 118 119 # add OID 120 $doc_obj->set_OID (); 121 122 # process the document 123 $processor->process($doc_obj); 124 125 return 1; # processed the file 76 return 1; 126 77 } 127 78
Note:
See TracChangeset
for help on using the changeset viewer.