source: trunk/gsdl/perllib/plugins/ArcPlug.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.3 KB
Line 
1###########################################################################
2#
3# ArcPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# plugin which recurses through an archives.inf file
27# (i.e. the file generated in the archives directory
28# when an import is done), processing each file it finds
29
30# 12-05-02 Added usage datastructure - John Thompson
31
32package ArcPlug;
33
34use util;
35use BasPlug;
36use plugin;
37use arcinfo;
38use gsprintf;
39
40BEGIN {
41 @ArcPlug::ISA = ('BasPlug');
42}
43
44my $options = { 'name' => "ArcPlug",
45 'desc' => "{ArcPlug.desc}",
46 'abstract' => "no",
47 'inherits' => "yes" };
48
49sub gsprintf
50{
51 return &gsprintf::gsprintf(@_);
52}
53
54sub new {
55 my ($class) = shift (@_);
56 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
57 push(@$pluginlist, $class);
58
59 if(defined $arguments){print "SETSEGE\n"; push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
60 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
61
62 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
63
64 return bless $self, $class;
65}
66
67sub deinit {
68 my ($self) = @_;
69
70 my $archive_info = $self->{'archive_info'};
71
72 if (defined $archive_info) {
73 my $archive_info_filename = $self->{'archive_info_filename'};
74
75 my $file_list = $archive_info->get_file_list();
76
77 # change each file to "Been Indexed"
78
79 foreach my $subfile (@$file_list) {
80 my $doc_oid = $subfile->[1];
81 $index_status = $archive_info->get_status_info($doc_oid);
82 $archive_info->set_status_info($doc_oid,"B");
83 }
84
85 $archive_info->save_info($archive_info_filename);
86 }
87}
88
89# return 1 if this class might recurse using $pluginfo
90sub is_recursive {
91 my $self = shift (@_);
92
93 return 1;
94}
95
96
97
98
99# return number of files processed, undef if can't process
100# Note that $base_dir might be "" and that $file might
101# include directories
102sub read {
103 my $self = shift (@_);
104 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
105 my $outhandle = $self->{'outhandle'};
106
107 my $count = 0;
108
109 # see if this has a archives information file within it
110 my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
111 $self->{'archive_info_filename'} = $archive_info_filename;
112
113 if (-e $archive_info_filename) {
114
115 # found an archives.inf file
116 &gsprintf($outhandle, "ArcPlug: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
117
118 # read in the archives information file
119 my $archive_info = new arcinfo ();
120 $self->{'archive_info'} = $archive_info;
121
122 $archive_info->load_info ($archive_info_filename);
123
124 my $file_list = $archive_info->get_file_list();
125
126 # process each file
127 foreach my $subfile (@$file_list) {
128 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
129
130 my $tmp = &util::filename_cat ($file, $subfile->[0]);
131 next if $tmp eq $file;
132
133 my $doc_oid = $subfile->[1];
134
135 $index_status = $archive_info->get_status_info($doc_oid);
136
137 my $processor_mode = $processor->get_mode();
138
139 if (($processor_mode eq "infodb") || ($index_status eq "I")) {
140
141 # note: metadata is not carried on to the next level
142 $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli);
143 }
144
145 }
146
147 return $count;
148 }
149
150 # wasn't an archives directory, someone else will have to process it
151 return undef;
152}
153
1541;
Note: See TracBrowser for help on using the repository browser.