source: trunk/gsdl/perllib/plugins/IndexPlug.pm@ 11090

Last change on this file since 11090 was 10254, checked in by kjdon, 19 years ago

added 'use strict' to all plugins, and made modifications (mostly adding 'my') to make them compile

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 5.1 KB
Line 
1###########################################################################
2#
3# IndexPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This recursive plugin processes an index.txt file.
27# The index.txt file should contain the list of files to be
28# included in the collection followed by any extra metadata to
29# be associated with each file.
30
31# The index.txt file should be formatted as follows:
32# The first line may be a key (beginning with key:)
33# to name the metadata fields
34# (e.g. key: Subject Organization Date)
35# The following lines will contain a filename followed
36# by the value that metadata entry is to be set to.
37# (e.g. 'irma/iw097e 3.2 unesco 1993' will associate the
38# metadata Subject=3.2, Organization=unesco, and Date=1993
39# with the file irma/iw097e if the above key line was used)
40
41# Note that if any of the metadata fields use the Hierarchy
42# classifier plugin then the value they're set to should
43# correspond to the first field (the descriptor) in the
44# appropriate classification file.
45
46# Metadata values may be named separately using a tag
47# (e.g. <Subject>3.2) and this will override any name
48# given to them by the key line.
49# If there's no key line any unnamed metadata value will be
50# named 'Subject'.
51
52# 12/05/02 Added usage datastructure - John Thompson
53
54package IndexPlug;
55
56use plugin;
57use BasPlug;
58use doc;
59use util;
60use cfgread;
61
62use strict;
63no strict 'refs'; # allow filehandles to be variables and viceversa
64
65sub BEGIN {
66 @IndexPlug::ISA = ('BasPlug');
67}
68
69my $arguments = [
70 ];
71
72my $options = { 'name' => "IndexPlug",
73 'desc' => "{IndexPlug.desc}",
74 'abstract' => "no",
75 'inherits' => "yes" };
76
77sub new {
78 my ($class) = shift (@_);
79 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
80 push(@$pluginlist, $class);
81
82 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
83 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
84
85 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
86
87 return bless $self, $class;
88}
89
90# return 1 if this class might recurse using $pluginfo
91sub is_recursive {
92 my $self = shift (@_);
93
94 return 1;
95}
96
97# return number of files processed, undef if can't process
98# Note that $base_dir might be "" and that $file might
99# include directories
100sub read {
101 my $self = shift (@_);
102 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $total_count, $gli) = @_;
103 my $outhandle = $self->{'outhandle'};
104
105 my $indexfile = &util::filename_cat($base_dir, $file, "index.txt");
106 if (!-f $indexfile) {
107 # not a directory containing an index file
108 return undef;
109 }
110
111 # found an index.txt file
112 print STDERR "<Processing n='$file' p='IndexPlug'>\n" if ($gli);
113 print $outhandle "IndexPlug: processing $indexfile\n";
114
115 # read in the index.txt
116 my $list = &cfgread::read_cfg_file ($indexfile, undef, '^[^#]\w');
117 my @fields = ();
118 # see if there's a 'key:' line
119 if (defined $list->{'key:'}) {
120 @fields = @{$list->{'key:'}};
121 }
122
123 my $index_base_dir = &util::filename_cat($base_dir, $file);
124
125 # process each document
126 my $count = 0;
127 foreach my $docfile (keys (%$list)) {
128 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
129 $metadata = {}; # at present we can do this as metadata
130 # will always be empty when it arrives
131 # at this plugin - this might cause
132 # problems if things change though
133
134 # note that $list->{$docfile} is an array reference
135 if ($docfile !~ /key:/i) {
136 my $i = 0;
137 for ($i = 0; $i < scalar (@{$list->{$docfile}}); $i ++) {
138 if ($list->{$docfile}->[$i] =~ /^<([^>]+)>(.+)$/) {
139 unless (defined ($metadata->{$1})) {
140 $metadata->{$1} = [];
141 }
142 push (@{$metadata->{$1}}, $2);
143 } elsif (scalar @fields >= $i) {
144 unless (defined ($metadata->{$fields[$i]})) {
145 $metadata->{$fields[$i]} = [];
146 }
147 push (@{$metadata->{$fields[$i]}}, $list->{$docfile}->[$i]);
148 } else {
149 $metadata->{'Subject'} = $list->{$docfile};
150 }
151 }
152 $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $metadata, $processor, $maxdocs, ($total_count +$count), $gli);
153 }
154 }
155
156 return $count; # was processed
157}
158
159
1601;
Note: See TracBrowser for help on using the repository browser.