source: trunk/gsdl/perllib/plugins/TEXTPlug.pm@ 732

Last change on this file since 732 was 732, checked in by sjboddie, 25 years ago

prevent from overriding Title metadata that may have been passed
in from another plugin (e.g. IndexPlug)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.5 KB
Line 
1###########################################################################
2#
3# TEXTPlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document from .txt or .text files
27# (case-insensitive match on filenames). Adds Title metadata
28# of first 100 characters found.
29
30package TEXTPlug;
31
32use BasPlug;
33use sorttools;
34
35sub BEGIN {
36 @ISA = ('BasPlug');
37}
38
39sub new {
40 my ($class) = @_;
41 $self = new BasPlug ();
42
43 return bless $self, $class;
44}
45
46sub is_recursive {
47 my $self = shift (@_);
48
49 return 0; # this is not a recursive plugin
50}
51
52
53# return number of files processed, undef if can't process
54# Note that $base_dir might be "" and that $file might
55# include directories
56sub read {
57 my $self = shift (@_);
58 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
59
60 my $filename = &util::filename_cat($base_dir, $file);
61
62 return undef unless ($filename =~ /\.(te?xt(\.gz)?)$/i && (-e $filename));
63
64 my $gz = 0;
65 if (defined $2) {
66 $gz = $2;
67 $gz = 1 if ($gz =~ /\.gz/i);
68 }
69
70 print STDERR "TEXTPlug: processing $filename\n" if $processor->{'verbosity'};
71
72 # create a new document
73 my $doc_obj = new doc ($file, "indexed_doc");
74
75 if ($gz) {
76 open (FILE, "zcat $filename |") || die "TEXTPlug::read - zcat can't open $filename\n";
77 } else {
78 open (FILE, $filename) || die "TEXTPlug::read - can't open $filename\n";
79 }
80 my $cursection = $doc_obj->get_top_section();
81
82 my $text = "";
83 my $line = "";
84 my $foundtitle = 0;
85 # don't need to get title if it has been passed
86 # in from another plugin
87 if (defined $metadata->{'Title'}) {
88 $foundtitle = 1;
89 }
90 while (defined ($line = <FILE>)) {
91 # use first line as title (or first 100 characters if it's long)
92 if (!$foundtitle && length($line) > 5) {
93 my $title = "";
94 if (length($line) > 100) {
95 $title = substr ($line, 0, 100);
96 } else {
97 $title = $line;
98 }
99 $doc_obj->add_metadata ($cursection, "Title", $title);
100 $foundtitle = 1;
101 }
102 $text .= $line;
103 }
104
105 $doc_obj->add_text ($cursection, "<pre>\n$text\n</pre>");
106
107
108 foreach $field (keys(%$metadata)) {
109 # $metadata->{$field} may be an array reference
110 if (ref ($metadata->{$field}) eq "ARRAY") {
111 map {
112 $doc_obj->add_metadata ($cursection, $field, $_);
113 } @{$metadata->{$field}};
114 } else {
115 $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});
116 }
117 }
118
119 # add OID
120 $doc_obj->set_OID ();
121
122 # process the document
123 $processor->process($doc_obj);
124
125 return 1; # processed the file
126}
127
1281;
129
130
131
132
133
134
135
136
137
138
139
Note: See TracBrowser for help on using the repository browser.