source: trunk/gsdl/perllib/plugins/IndexPlug.pm@ 9465

Last change on this file since 9465 was 6408, checked in by jmt12, 20 years ago

Added two new attributes for script arguments. HiddenGLI controls whether the argument will be visible at all in GLI, while ModeGLI defines the lowest detail mode under which the argument will be visible (only really for import and buildcol). Also ensured that the scripts were reporting their correct default process expressions, and further refined argument types by adding the catagory regexp for any regular expression (which can then be hidden under lower detail modes in GLI)

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1###########################################################################
2#
3# IndexPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This recursive plugin processes an index.txt file.
27# The index.txt file should contain the list of files to be
28# included in the collection followed by any extra metadata to
29# be associated with each file.
30
31# The index.txt file should be formatted as follows:
32# The first line may be a key (beginning with key:)
33# to name the metadata fields
34# (e.g. key: Subject Organization Date)
35# The following lines will contain a filename followed
36# by the value that metadata entry is to be set to.
37# (e.g. 'irma/iw097e 3.2 unesco 1993' will associate the
38# metadata Subject=3.2, Organization=unesco, and Date=1993
39# with the file irma/iw097e if the above key line was used)
40
41# Note that if any of the metadata fields use the Hierarchy
42# classifier plugin then the value they're set to should
43# correspond to the first field (the descriptor) in the
44# appropriate classification file.
45
46# Metadata values may be named separately using a tag
47# (e.g. <Subject>3.2) and this will override any name
48# given to them by the key line.
49# If there's no key line any unnamed metadata value will be
50# named 'Subject'.
51
52# 12/05/02 Added usage datastructure - John Thompson
53
54package IndexPlug;
55
56use plugin;
57use BasPlug;
58use doc;
59use util;
60use cfgread;
61
62sub BEGIN {
63 @ISA = ('BasPlug');
64}
65
66my $options = { 'name' => "IndexPlug",
67 'desc' => "{IndexPlug.desc}",
68 'abstract' => "no",
69 'inherits' => "yes" };
70
71sub new {
72 my ($class) = @_;
73 my $self = new BasPlug ("IndexPlug", @_);
74 # 14-05-02 To allow for proper inheritance of arguments - John Thompson
75 my $option_list = $self->{'option_list'};
76 push( @{$option_list}, $options );
77
78 return bless $self, $class;
79}
80
81# return 1 if this class might recurse using $pluginfo
82sub is_recursive {
83 my $self = shift (@_);
84
85 return 1;
86}
87
88# return number of files processed, undef if can't process
89# Note that $base_dir might be "" and that $file might
90# include directories
91sub read {
92 my $self = shift (@_);
93 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs, $gli) = @_;
94 my $outhandle = $self->{'outhandle'};
95
96 my $indexfile = &util::filename_cat($base_dir, $file, "index.txt");
97 if (!-f $indexfile) {
98 # not a directory containing an index file
99 return undef;
100 }
101
102 # found an index.txt file
103 print STDERR "<Processing n='$file' p='IndexPlug'>\n" if ($gli);
104 print $outhandle "IndexPlug: processing $indexfile\n";
105
106 # read in the index.txt
107 my $list = &cfgread::read_cfg_file ($indexfile, undef, '^[^#]\w');
108 my @fields = ();
109 # see if there's a 'key:' line
110 if (defined $list->{'key:'}) {
111 @fields = @{$list->{'key:'}};
112 }
113
114 my $index_base_dir = &util::filename_cat($base_dir, $file);
115
116 # process each document
117 my $count = 0;
118 foreach my $docfile (keys (%$list)) {
119 last if ($maxdocs != -1 && $count >= $maxdocs);
120 $metadata = {}; # at present we can do this as metadata
121 # will always be empty when it arrives
122 # at this plugin - this might cause
123 # problems if things change though
124
125 # note that $list->{$docfile} is an array reference
126 if ($docfile !~ /key:/i) {
127 my $i = 0;
128 for ($i = 0; $i < scalar (@{$list->{$docfile}}); $i ++) {
129 if ($list->{$docfile}->[$i] =~ /^<([^>]+)>(.+)$/) {
130 unless (defined ($metadata->{$1})) {
131 $metadata->{$1} = [];
132 }
133 push (@{$metadata->{$1}}, $2);
134 } elsif (scalar @fields >= $i) {
135 unless (defined ($metadata->{$fields[$i]})) {
136 $metadata->{$fields[$i]} = [];
137 }
138 push (@{$metadata->{$fields[$i]}}, $list->{$docfile}->[$i]);
139 } else {
140 $metadata->{'Subject'} = $list->{$docfile};
141 }
142 }
143 $count += &plugin::read ($pluginfo, $index_base_dir, $docfile, $metadata, $processor, $maxdocs);
144 }
145 }
146
147 return $count; # was processed
148}
149
150
1511;
Note: See TracBrowser for help on using the repository browser.