source: trunk/gsdl/perllib/plugins/TEXTPlug.pm@ 589

Last change on this file since 589 was 585, checked in by sjboddie, 25 years ago

new plugin

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 3.3 KB
Line 
1###########################################################################
2#
3# TEXTPlug.pm -- simple text plugin
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# creates simple single-level document from .txt or .text files
27# (case-insensitive match on filenames). Adds Title metadata
28# of first 100 characters found.
29
30package TEXTPlug;
31
32use BasPlug;
33use sorttools;
34
35sub BEGIN {
36 @ISA = ('BasPlug');
37}
38
39sub new {
40 my ($class) = @_;
41 $self = new BasPlug ();
42
43 return bless $self, $class;
44}
45
46sub is_recursive {
47 my $self = shift (@_);
48
49 return 0; # this is not a recursive plugin
50}
51
52
53# return number of files processed, undef if can't process
54# Note that $base_dir might be "" and that $file might
55# include directories
56sub read {
57 my $self = shift (@_);
58 my ($pluginfo, $base_dir, $file, $metadata, $processor) = @_;
59
60 my $filename = &util::filename_cat($base_dir, $file);
61
62 return undef unless ($filename =~ /\.(te?xt(\.gz)?)$/i && (-e $filename));
63
64 my $gz = 0;
65 if (defined $2) {
66 $gz = $2;
67 $gz = 1 if ($gz =~ /\.gz/i);
68 }
69
70 print STDERR "TEXTPlug: processing $filename\n" if $processor->{'verbosity'};
71
72 # create a new document
73 my $doc_obj = new doc ($file, "indexed_doc");
74
75 if ($gz) {
76 open (FILE, "zcat $filename |") || die "TEXTPlug::read - zcat can't open $filename\n";
77 } else {
78 open (FILE, $filename) || die "TEXTPlug::read - can't open $filename\n";
79 }
80 my $cursection = $doc_obj->get_top_section();
81
82 my $text = "";
83 my $line = "";
84 my $first = 1;
85 while (defined ($line = <FILE>)) {
86 # use first line as title (or first 100 characters if it's long)
87 if ($first && length($line) > 5) {
88 my $title = "";
89 if (length($line) > 100) {
90 $title = substr ($line, 0, 100);
91 } else {
92 $title = $line;
93 }
94 $doc_obj->add_metadata ($cursection, "Title", $title);
95 }
96 $text .= $line;
97 }
98
99 $doc_obj->add_text ($text);
100
101
102 foreach $field (keys(%$metadata)) {
103 # $metadata->{$field} may be an array reference
104 if (ref ($metadata->{$field}) eq "ARRAY") {
105 map {
106 $doc_obj->add_metadata ($cursection, $field, $_);
107 } @{$metadata->{$field}};
108 } else {
109 $doc_obj->add_metadata ($cursection, $field, $metadata->{$field});
110 }
111 }
112
113 # add OID
114 $doc_obj->set_OID ();
115
116 # process the document
117 $processor->process($doc_obj);
118
119 return 1; # processed the file
120}
121
1221;
123
124
125
126
127
128
129
130
131
132
133
Note: See TracBrowser for help on using the repository browser.