source: trunk/gsdl/perllib/plugins/ArcPlug.pm@ 10218

Last change on this file since 10218 was 10218, checked in by kjdon, 19 years ago

Jeffrey's new parsing modifications, committed approx 6 July, 15.16

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 4.3 KB
RevLine 
[537]1###########################################################################
2#
3# ArcPlug.pm --
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
[317]26# plugin which recurses through an archives.inf file
27# (i.e. the file generated in the archives directory
28# when an import is done), processing each file it finds
[4]29
[3540]30# 12-05-02 Added usage datastructure - John Thompson
31
[4]32package ArcPlug;
33
34use util;
35use BasPlug;
36use plugin;
37use arcinfo;
[5680]38use gsprintf;
[4]39
40BEGIN {
[8716]41 @ArcPlug::ISA = ('BasPlug');
[4]42}
43
[4744]44my $options = { 'name' => "ArcPlug",
[5680]45 'desc' => "{ArcPlug.desc}",
[6408]46 'abstract' => "no",
47 'inherits' => "yes" };
[3540]48
[5680]49sub gsprintf
50{
51 return &gsprintf::gsprintf(@_);
52}
53
[4]54sub new {
[10218]55 my ($class) = shift (@_);
56 my ($pluginlist,$inputargs,$hashArgOptLists) = @_;
57 push(@$pluginlist, $class);
[4]58
[10218]59 if(defined $arguments){print "SETSEGE\n"; push(@{$hashArgOptLists->{"ArgList"}},@{$arguments});}
60 if(defined $options) { push(@{$hashArgOptLists->{"OptList"}},$options)};
61
62 my $self = (defined $hashArgOptLists)? new BasPlug($pluginlist,$inputargs,$hashArgOptLists): new BasPlug($pluginlist,$inputargs);
63
[4]64 return bless $self, $class;
65}
66
[10156]67sub deinit {
68 my ($self) = @_;
69
70 my $archive_info = $self->{'archive_info'};
71
72 if (defined $archive_info) {
73 my $archive_info_filename = $self->{'archive_info_filename'};
74
75 my $file_list = $archive_info->get_file_list();
76
77 # change each file to "Been Indexed"
78
79 foreach my $subfile (@$file_list) {
80 my $doc_oid = $subfile->[1];
81 $index_status = $archive_info->get_status_info($doc_oid);
82 $archive_info->set_status_info($doc_oid,"B");
83 }
84
85 $archive_info->save_info($archive_info_filename);
86 }
87}
88
[4]89# return 1 if this class might recurse using $pluginfo
90sub is_recursive {
91 my $self = shift (@_);
92
93 return 1;
94}
95
[10156]96
97
98
[317]99# return number of files processed, undef if can't process
[4]100# Note that $base_dir might be "" and that $file might
101# include directories
102sub read {
103 my $self = shift (@_);
[9853]104 my ($pluginfo, $base_dir, $file, $metadata, $processor, $maxdocs,$total_count, $gli) = @_;
[1424]105 my $outhandle = $self->{'outhandle'};
[4]106
[317]107 my $count = 0;
108
[4]109 # see if this has a archives information file within it
[1244]110 my $archive_info_filename = &util::filename_cat($base_dir,$file,"archives.inf");
[10156]111 $self->{'archive_info_filename'} = $archive_info_filename;
[4]112
113 if (-e $archive_info_filename) {
114
[317]115 # found an archives.inf file
[6332]116 &gsprintf($outhandle, "ArcPlug: {common.processing} $archive_info_filename\n") if $self->{'verbosity'} > 1;
[317]117
[4]118 # read in the archives information file
119 my $archive_info = new arcinfo ();
[10156]120 $self->{'archive_info'} = $archive_info;
121
[4]122 $archive_info->load_info ($archive_info_filename);
123
[230]124 my $file_list = $archive_info->get_file_list();
[4]125
126 # process each file
[1244]127 foreach my $subfile (@$file_list) {
[9853]128 last if ($maxdocs != -1 && ($total_count + $count) >= $maxdocs);
[317]129
[4]130 my $tmp = &util::filename_cat ($file, $subfile->[0]);
131 next if $tmp eq $file;
[10156]132
133 my $doc_oid = $subfile->[1];
134
135 $index_status = $archive_info->get_status_info($doc_oid);
136
137 my $processor_mode = $processor->get_mode();
138
139 if (($processor_mode eq "infodb") || ($index_status eq "I")) {
140
141 # note: metadata is not carried on to the next level
142 $count += &plugin::read ($pluginfo, $base_dir, $tmp, {}, $processor, $maxdocs, ($total_count+$count), $gli);
143 }
144
[4]145 }
146
[317]147 return $count;
[4]148 }
149
150 # wasn't an archives directory, someone else will have to process it
[317]151 return undef;
[4]152}
153
1541;
Note: See TracBrowser for help on using the repository browser.