source: trunk/gsdl/perllib/plugins/ArcPlug.pm@ 11090

Last change on this file since 11090 was 10478, checked in by kjdon, 19 years ago

arcPlug now knows about keepold, and if its not set, it wont try to do incremental building

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1###########################################################################
2#
3# ArcPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which recurses through an archives.inf file
27# (i.e. the file generated in the archives directory
28# when an import is done), processing each file it finds
29
30# 12-05-02 Added usage datastructure - John Thompson
31
32package ArcPlug;
33
34use util;
35use BasPlug;
36use plugin;
37use arcinfo;
38use gsprintf;
39
40use strict;
41no strict 'refs'; # allow filehandles to be variables and viceversa
42
43BEGIN {
44 @ArcPlug::ISA = ('BasPlug');
45}
46
47my $arguments = [
48 ];
49
50my $options = { 'name' => "ArcPlug",
51 'desc' => "{ArcPlug.desc}",
52 'abstract' => "no",
53 'inherits' => "yes" };
54
55sub gsprintf
56{
57 return &gsprintf::gsprintf(@_);
58}
59
60sub new {
61 my ($class) = shift (@_);
62 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
63 push(@$pluginlist, $class);
64
65 if(defined $arguments){ push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
66 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
67
68 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
69
70 return bless $self, $class;
71}
72
73sub deinit {
74 my ($self) = @_;
75
76 my $archive_info = $self->{'archive_info'};
77
78 if (defined $archive_info) {
79 my $archive_info_filename = $self->{'archive_info_filename'};
80
81 my $file_list = $archive_info->get_file_list();
82
83 # change each file to "Been Indexed"
84
85 foreach my $subfile (@$file_list) {
86 my $doc_oid = $subfile->[1];
87 # why do we get this when it is not used???
88 my $index_status = $archive_info->get_status_info($doc_oid);
89 $archive_info->set_status_info($doc_oid,"B");
90 }
91
92 $archive_info->save_info($archive_info_filename);
93 }
94}
95
96# return 1 if this class might recurse using $pluginfo
97sub is_recursive {
98 my $self = shift (@_);
99
100 return 1;
101}
102
103
104
105
106# return number of files processed, undef if can't process
107# Note that $base_dir might be "" and that $file might
108# include directories
109sub read {
110 my $self = shift (@_);
111 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
112 my $outhandle = $self->{'outhandle'};
113
114 my $count = 0;
115
116 # see if this has a archives information file within it
117 my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
118 $self->{'archive_info_filename'} = $archive_info_filename;
119
120 if (-e $archive_info_filename) {
121
122 # found an archives.inf file
123 &gsprintf($outhandle, "ArcPlug: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
124
125 # read in the archives information file
126 my $archive_info = new arcinfo ();
127 $self->{'archive_info'} = $archive_info;
128
129 $archive_info->load_info ($archive_info_filename);
130
131 my $file_list = $archive_info->get_file_list();
132
133 # process each file
134 foreach my $subfile (@$file_list) {
135 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
136
137 my $tmp = &util::filename_cat ($file, $subfile->[0]);
138 next if $tmp eq $file;
139
140 # Decide if file needs to be processed
141 my $doc_oid = $subfile->[1];
142 my $index_status = $archive_info->get_status_info($doc_oid);
143
144 my $process_file = 0;
145
146 if ($processor->get_mode() eq "infodb") {
147 $process_file = 1;
148 }
149 else {
150 if (!$processor->is_incremental_capable() || !$self->{'keepold'}) {
151 $process_file = 1;
152 }
153 else {
154 # is incremental
155
156 # check to see if file needs to be indexed
157 $index_status = $archive_info->get_status_info($doc_oid);
158
159 if ($index_status eq "I") {
160 $process_file = 1;
161 }
162 }
163 }
164
165
166 if ($process_file) {
167 # note: metadata is not carried on to the next level
168 $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli);
169 }
170
171 }
172
173 return $count;
174 }
175
176 # wasn't an archives directory, someone else will have to process it
177 return undef;
178}
179
1801;
Note: See TracBrowser for help on using the repository browser.