source: trunk/gsdl/perllib/docsave.pm@ 1483

Last change on this file since 1483 was 1454, checked in by stefan, 24 years ago

Lots of changes to perl building code for collectoraction

  • Property svn:keywords set to Author Date Id Revision
File size: 7.2 KB
Line 
1###########################################################################
2#
3# docsave.pm
4# A component of the Greenstone digital library software
5# from the New Zealand Digital Library Project at the
6# University of Waikato, New Zealand.
7#
8# Copyright (C) 1999 New Zealand Digital Library Project
9#
10# This program is free software; you can redistribute it and/or modify
11# it under the terms of the GNU General Public License as published by
12# the Free Software Foundation; either version 2 of the License, or
13# (at your option) any later version.
14#
15# This program is distributed in the hope that it will be useful,
16# but WITHOUT ANY WARRANTY; without even the implied warranty of
17# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18# GNU General Public License for more details.
19#
20# You should have received a copy of the GNU General Public License
21# along with this program; if not, write to the Free Software
22# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
23#
24###########################################################################
25
26# This document processor saves a document in the
27# archives directory of a collection
28
29
30package docsave;
31
32use arcinfo;
33use docproc;
34use util;
35
36
37sub BEGIN {
38 @ISA = ('docproc');
39}
40
41sub new {
42 my ($class, $collection, $archive_info, $verbosity,
43 $gzip, $groupsize, $outhandle) = @_;
44 my $self = new docproc ();
45
46
47 $groupsize=1 unless defined $groupsize;
48 $self->{'collection'} = $collection;
49 $self->{'archive_info'} = $archive_info;
50 $self->{'verbosity'} = $verbosity;
51 $self->{'gzip'} = $gzip;
52
53 $self->{'groupsize'} = $groupsize;
54 $self->{'gs_count'} = 0;
55
56 $self->{'outhandle'} = STDERR;
57 $self->{'outhandle'} = $outhandle if defined $outhandle;
58
59 # set a default for the archive directory
60 $self->{'archive_dir'} = &util::filename_cat ($ENV{'GSDLCOLLECTDIR'}, "archives");
61
62 $self->{'sortmeta'} = undef;
63
64 return bless $self, $class;
65}
66
67sub setarchivedir {
68 my $self = shift (@_);
69 my ($archive_dir) = @_;
70
71 $self->{'archive_dir'} = $archive_dir;
72}
73
74sub set_sortmeta {
75 my $self = shift (@_);
76 my ($sortmeta) = @_;
77
78 $self->{'sortmeta'} = $sortmeta;
79}
80
81sub process {
82 my $self = shift (@_);
83 my ($doc_obj) = @_;
84
85 my $outhandle = $self->{'outhandle'};
86
87 if ($self->{'groupsize'} > 1) {
88 $self->group_process ($doc_obj);
89
90 } else {
91 # groupsize is 1 (i.e. one document per GML file) so sortmeta
92 # may be used
93
94 my $OID = $doc_obj->get_OID();
95 $OID = "NULL" unless defined $OID;
96
97 # get document's directory
98 my $doc_dir = $self->get_doc_dir ($OID);
99
100 # copy all the associated files, add this information as metadata
101 # to the document
102 $self->process_assoc_files ($doc_obj, $doc_dir);
103
104 my $doc_file
105 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.gml");
106 my $short_doc_file = &util::filename_cat ($doc_dir, "doc.gml");
107
108 if (!open (OUTDOC, ">$doc_file")) {
109 print $outhandle "docsave::process could not write to file $doc_file\n";
110 return;
111 }
112
113 # save this document
114 $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section());
115 close OUTDOC;
116
117 if ($self->{'gzip'}) {
118 my $doc_file = $self->{'gs_filename'};
119 `gzip $doc_file`;
120 $doc_file .= ".gz";
121 $short_doc_file .= ".gz";
122 if (!-e $doc_file) {
123 print $outhandle "error while gzipping: $doc_file doesn't exist\n";
124 return 0;
125 }
126 }
127
128 # do the sortmeta thing
129 my ($metadata);
130 if (defined ($self->{'sortmeta'})) {
131 $metadata = $doc_obj->get_metadata_element ($doc_obj->get_top_section(), $self->{'sortmeta'});
132 }
133
134 # store reference in the archive_info
135 $self->{'archive_info'}->add_info($OID, $short_doc_file, $metadata);
136 }
137}
138
139sub group_process {
140 my $self = shift (@_);
141 my ($doc_obj) = @_;
142
143 my $outhandle = $self->{'outhandle'};
144
145 my $OID = $doc_obj->get_OID();
146 $OID = "NULL" unless defined $OID;
147
148 my $groupsize = $self->{'groupsize'};
149 my $gs_count = $self->{'gs_count'};
150 my $open_new_file = (($gs_count % $groupsize)==0);
151
152 # opening a new file, or document has assoicated files => directory needed
153 if (($open_new_file) || (scalar(@{$doc_obj->get_assoc_files()})>0)) {
154
155 # get document's directory
156 my $doc_dir = $self->get_doc_dir ($OID);
157
158 # copy all the associated files, add this information as metadata
159 # to the document
160 $self->process_assoc_files ($doc_obj, $doc_dir);
161
162
163 if ($open_new_file) {
164 # only if opening new file
165 my $doc_file
166 = &util::filename_cat ($self->{'archive_dir'}, $doc_dir, "doc.gml");
167 my $short_doc_file = &util::filename_cat ($doc_dir, "doc.gml");
168
169 if ($gs_count>0)
170 {
171 return if (!$self->close_file_output());
172 }
173
174 if (!open (OUTDOC, ">$doc_file")) {
175 print $outhandle "docsave::group_process could not write to file $doc_file\n";
176 return;
177 }
178 $self->{'gs_filename'} = $doc_file;
179 $self->{'gs_short_filename'} = $short_doc_file;
180 $self->{'gs_OID'} = $OID;
181 }
182 }
183
184 # save this document
185 $doc_obj->output_section('docsave::OUTDOC', $doc_obj->get_top_section());
186
187 $self->{'gs_count'}++;
188}
189
190
191sub get_doc_dir {
192 my $self = shift (@_);
193 my ($OID) = @_;
194
195 my $doc_info = $self->{'archive_info'}->get_info($OID);
196 my $doc_dir = "";
197 if (defined $doc_info && scalar(@$doc_info) >= 1) {
198 # this OID already has an assigned directory, use the
199 # same one.
200 $doc_dir = $doc_info->[0];
201 $doc_dir =~ s/\/?doc\.gml(\.gz)?$//;
202 } else {
203 # have to get a new document directory
204 my $doc_dir_rest = $OID;
205 my $doc_dir_num = 0;
206 do {
207 $doc_dir .= "/" if $doc_dir_num > 0;
208 if ($doc_dir_rest =~ s/^(.{1,8})//) {
209 $doc_dir .= $1;
210 $doc_dir_num++;
211 }
212 } while ($doc_dir_rest ne "" &&
213 ((-d &util::filename_cat ($self->{'archive_dir'}, "$doc_dir.dir")) ||
214 ($self->{'archive_info'}->size() >= 1024 && $doc_dir_num < 2)));
215 $doc_dir .= ".dir";
216
217 }
218
219 &util::mk_all_dir (&util::filename_cat ($self->{'archive_dir'}, $doc_dir));
220
221 return $doc_dir;
222}
223
224
225sub process_assoc_files {
226 my $self = shift (@_);
227 my ($doc_obj, $doc_dir) = @_;
228
229 my $outhandle = $self->{'outhandle'};
230
231 my @assoc_files = ();
232 foreach $assoc_file (@{$doc_obj->get_assoc_files()}) {
233 my ($dir, $afile) = $assoc_file->[1] =~ /^(.*?)([^\/\\]+)$/;
234 $dir = "" unless defined $dir;
235 if (-e $assoc_file->[0]) {
236 my $filepath = &util::filename_cat($self->{'archive_dir'}, $doc_dir, $afile);
237 &util::hard_link ($assoc_file->[0], $filepath);
238 $doc_obj->add_utf8_metadata ($doc_obj->get_top_section(),
239 "gsdlassocfile",
240 "$afile:$assoc_file->[2]:$dir");
241 } else {
242 print $outhandle "docsave::process couldn't copy the associated file " .
243 "$assoc_file->[0] to $afile\n";
244 }
245 }
246}
247
248
249sub close_file_output
250{
251 my ($self) = @_;
252
253 close OUTDOC;
254
255 my $OID = $self->{'gs_OID'};
256 my $short_doc_file = $self->{'gs_short_filename'};
257
258 if ($self->{'gzip'}) {
259 my $doc_file = $self->{'gs_filename'};
260 `gzip $doc_file`;
261 $doc_file .= ".gz";
262 $short_doc_file .= ".gz";
263 if (!-e $doc_file) {
264 my $outhandle = $self->{'outhandle'};
265 print $outhandle "error while gzipping: $doc_file doesn't exist\n";
266 return 0;
267 }
268 }
269
270 # store reference in the archive_info
271 $self->{'archive_info'}->add_info($OID, $short_doc_file);
272
273 return 1;
274}
275
2761;
Note: See TracBrowser for help on using the repository browser.