source: gsdl/trunk/perllib/plugins/ArchivesInfPlugin.pm@ 16257

Last change on this file since 16257 was 16257, checked in by mdewsnip, 16 years ago

Tidied up the block of code that determines whether each doc.xml file should be processed (based on whether incremental building is enabled) to make it easier to follow.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.7 KB
Line 
1###########################################################################
2#
3# ArchivesInfPlugin.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which recurses through an archives.inf file
27# (i.e. the file generated in the archives directory
28# when an import is done), processing each file it finds
29
30# 12-05-02 Added usage datastructure - John Thompson
31
32package ArchivesInfPlugin;
33
34use util;
35use AbstractPlugin;
36use plugin;
37use arcinfo;
38use gsprintf;
39
40use strict;
41no strict 'refs'; # allow filehandles to be variables and viceversa
42
43BEGIN {
44 @ArchivesInfPlugin::ISA = ('AbstractPlugin');
45}
46
47my $arguments = [
48 ];
49
50my $options = { 'name' => "ArchivesInfPlugin",
51 'desc' => "{ArchivesInfPlugin.desc}",
52 'abstract' => "no",
53 'inherits' => "yes" };
54
55sub gsprintf
56{
57 return &gsprintf::gsprintf(@_);
58}
59
60sub new {
61 my ($class) = shift (@_);
62 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
63 push(@$pluginlist, $class);
64
65 push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});
66 push(@{$hashArgOptLists->{"OptList"}},$options);
67
68 my $self = new AbstractPlugin($pluginlist, $inputargs, $hashArgOptLists);
69
70 return bless $self, $class;
71}
72
73sub deinit {
74 my ($self) = @_;
75
76 my $archive_info = $self->{'archive_info'};
77
78 if (defined $archive_info) {
79 my $archive_info_filename = $self->{'archive_info_filename'};
80
81 my $file_list = $archive_info->get_file_list();
82
83 # change each file to "Been Indexed"
84
85 foreach my $subfile (@$file_list) {
86 my $doc_oid = $subfile->[1];
87 # why do we get this when it is not used???
88 my $index_status = $archive_info->get_status_info($doc_oid);
89 $archive_info->set_status_info($doc_oid,"B");
90 }
91
92 $archive_info->save_info($archive_info_filename);
93 }
94}
95
96# return 1 if this class might recurse using $pluginfo
97sub is_recursive {
98 my $self = shift (@_);
99
100 return 1;
101}
102
103
104
105
106# return number of files processed, undef if can't process
107# Note that $base_dir might be "" and that $file might
108# include directories
109sub read {
110 my $self = shift (@_);
111 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
112 my $outhandle = $self->{'outhandle'};
113
114 my $count = 0;
115
116 # see if this has a archives information file within it
117 my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
118
119 if (-e $archive_info_filename) {
120
121 # found an archives.inf file
122 &gsprintf($outhandle, "ArchivesInfPlugin: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
123
124 # read in the archives information file
125 my $archive_info = new arcinfo ();
126 $self->{'archive_info'} = $archive_info;
127 $self->{'archive_info_filename'} = $archive_info_filename;
128
129 $archive_info->load_info ($archive_info_filename);
130
131 my $file_list = $archive_info->get_file_list();
132
133 # process each file
134 foreach my $subfile (@$file_list) {
135 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
136
137 my $tmp = &util::filename_cat ($file, $subfile->[0]);
138 next if $tmp eq $file;
139
140 # We always process the file...
141 my $process_file = 1;
142
143 # ...unless the build processor is incremental capable and -incremental was specified
144 if ($processor->is_incremental_capable() && $self->{'incremental'})
145 {
146 # We don't need to process the file if it has already been built
147 my $doc_oid = $subfile->[1];
148 my $index_status = $archive_info->get_status_info($doc_oid);
149 if ($index_status eq "B")
150 {
151 # Don't process this file
152 $process_file = 0;
153 }
154 }
155
156 if ($process_file) {
157 # note: metadata is not carried on to the next level
158 $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli);
159 }
160
161 }
162
163 return $count;
164 }
165
166 # wasn't an archives directory, someone else will have to process it
167 return undef;
168}
169
1701;
Note: See TracBrowser for help on using the repository browser.