source: main/trunk/greenstone2/perllib/DocHistoryFileUtils.pm@ 37149

Last change on this file since 37149 was 37149, checked in by davidb, 15 months ago

Support routines to help implement new file-level document-version history feature for import.pl

File size: 12.2 KB
Line 
1###########################################################################
2#
3# DocHistoryFileUtils.pm
4# -- supporting routines for file-level document-version history (fldv-history)
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2023 New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27
28package DocHistoryFileUtils;
29
30# Pragma
31use strict;
32use warnings;
33
34# use FileHandle;
35
36# Greenstone modules
37use dbutil;
38use FileUtils;
39
40my $fldv_history_dir = "_fldv_history";
41
42sub prepend_document_version
43{
44 my ($keepold_doc_dirname,$doc_dirname) = @_;
45
46 my $status_ok = 1;
47
48 my $doc_fldv_history_dirname = &FileUtils::filenameConcatenate($doc_dirname,$fldv_history_dir);
49
50 if (&FileUtils::directoryExists($doc_fldv_history_dirname)) {
51 # need to shuffle nminus-1, nminus-2 down by one
52 my $matching_dirs = &FileUtils::readDirectoryFiltered($doc_fldv_history_dirname,undef,"^nminus-\\d+\$");
53
54 my @sorted_matching_dirs = sort {
55 my ($a_num) = ($a =~ m/(\d+)$/);
56 my ($b_num) = ($b =~ m/(\d+)$/);
57
58 # sort into descending order
59 return $b_num <=> $a_num;
60 } @$matching_dirs;
61
62 # want sort order to be higest to lowest, for moving 'n' vals up by one
63
64 foreach my $nminus_n (@sorted_matching_dirs) {
65
66 my $full_dir = &FileUtils::filenameConcatenate($doc_fldv_history_dirname,$nminus_n);
67 if (-d $full_dir) {
68 # print STDERR " Increasing by one the file-level document-version history number for:\n";
69 # print STDERR " $full_dir\n";
70
71 my ($n) = ($nminus_n =~ m/(\d+)$/);
72 my $new_n = $n + 1;
73
74 my $full_new_dir = &FileUtils::filenameConcatenate($doc_fldv_history_dirname,"nminus-$new_n");
75
76 &FileUtils::moveFiles($full_dir,$full_new_dir);
77 }
78 else {
79 print STDERR "Warning: skipping $full_dir as it is not a directory\n";
80 }
81 }
82
83 }
84 else {
85 my $mkdir_ok = &FileUtils::makeDirectory($doc_fldv_history_dirname);
86
87 if (!$mkdir_ok) {
88 print STDERR "Error: DocHistoryFileUtils::prepend_document_version() failed to make directory:\n";
89 print STDERR " '$doc_fldv_history_dirname'\n";
90
91 $status_ok = 0;
92 }
93 }
94
95 if ($status_ok) {
96
97 my $doc_fldv_history_dirname_nminus1 = &FileUtils::filenameConcatenate($doc_fldv_history_dirname,"nminus-1");
98
99 my $hard_link_ok = &FileUtils::hardlinkFilesRefRecursive([$keepold_doc_dirname],$doc_fldv_history_dirname_nminus1,
100 { 'strict' => 1, 'exclude_filter_re' => "^$fldv_history_dir\$" } );
101
102 if (!$hard_link_ok) {
103 print STDERR "Error: DocHistoryFileUtils::prepend_document_version() failed to hardLink\n";
104 print STDERR " '$keepold_doc_dirname' -> '$doc_fldv_history_dirname_nminus1'\n";
105
106 $status_ok = 0;
107 }
108 }
109
110
111 if (!$status_ok) {
112 print STDERR "**** Critical error occurred in creating/updating file-level document-version history\n";
113 print STDERR "**** After determining and correcting the cause of the error, to reset, delete\n";
114 print STDERR "**** your 'archives' folder, and replace it with 'archives_keep'\n";
115 exit 1;
116 }
117}
118
119sub replace_document_version
120{
121 my ($keepold_doc_dirname,$doc_dirname) = @_;
122
123 my $status_ok = 1;
124
125 my $doc_fldv_history_dirname = &FileUtils::filenameConcatenate($doc_dirname,$fldv_history_dir);
126 my $doc_fldv_history_dirname_nminus1 = &FileUtils::filenameConcatenate($doc_fldv_history_dirname,"nminus-1");
127
128 if (!&FileUtils::directoryExists($doc_fldv_history_dirname)) {
129 my $mkdir_ok = &FileUtils::makeDirectory($doc_fldv_history_dirname);
130
131 if (!$mkdir_ok) {
132 print STDERR "Error: DocHistoryFileUtils::replace_document_version() failed to make directory:\n";
133 print STDERR " '$doc_fldv_history_dirname'\n";
134
135 $status_ok = 0;
136 }
137 }
138 else {
139 # Better to upgrade this method to return a ok_status value
140 &FileUtils::removeFilesRecursive($doc_fldv_history_dirname_nminus1);
141 }
142
143 if ($status_ok) {
144
145 my $hard_link_ok = &FileUtils::hardlinkFilesRefRecursive([$keepold_doc_dirname],$doc_fldv_history_dirname_nminus1,
146 { 'strict' => 1, 'exclude_filter_re' => "^$fldv_history_dir\$" } );
147 if (!$hard_link_ok) {
148 print STDERR "Error: DocHistoryFileUtils::replace_document_version() failed to hardLink\n";
149 print STDERR " '$keepold_doc_dirname' -> '$doc_fldv_history_dirname_nminus1'\n";
150
151 $status_ok = 0;
152 }
153 }
154
155 if (!$status_ok) {
156 print STDERR "**** Critical error occurred in creating/updating file-level document-version history\n";
157 print STDERR "**** After determining and correcting the cause of the error, to reset, delete\n";
158 print STDERR "**** your 'archives' folder, and replace it with 'archives_keep'\n";
159 exit 1;
160 }
161}
162
163
164
165sub archivedir_keepold_to_archivedir
166{
167 my ($collectcfg, $keepold,$replaceold,$incremental_mode, $archive_info,$archivedir, $archivedir_keepold) = @_;
168
169 my $perform_firsttime_init = 1;
170 my $arcinfo_keepold_doc_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-doc", $archivedir_keepold, $perform_firsttime_init);
171 # my $arcinfo_keepold_src_filename = &dbutil::get_infodb_file_path($collectcfg->{'infodbtype'}, "archiveinf-src", $archivedir_keepold, $perform_firsttime_init);
172
173 # print STDERR "Creating/Updating archives File-Level Document-Version History (fldv-history) directories:\n";
174 my $status_ok = 1;
175
176 my $archive_keepold_info = new arcinfo($collectcfg->{'infodbtype'});
177 $archive_keepold_info->load_info($arcinfo_keepold_doc_filename);
178
179 my $archive_keepold_oidinfo_list = $archive_keepold_info->get_OID_list();
180
181 foreach my $keepold_info_array (@$archive_keepold_oidinfo_list) {
182
183 my $keepold_OID = $keepold_info_array->[0];
184 my $keepold_doc_file = $keepold_info_array->[1];
185 my $keepold_index_status = $keepold_info_array->[2];
186
187
188 # Work through all entries in arcinfo_keepold doc-id entries:
189 #
190 # If keepold entry does *not* exist in (the more up to date) archive_info
191 # => print out an error. This should not happen!
192
193 # If keepold entry *does* exist in (the more up to date) archive_info
194 #
195 # (1) if archive_info entry is marked for deletion (D)
196 # => Do nothing
197 # This will result in archive_keep and archives versions being wiped
198 # (respectively after import.pl and buildcol.pl
199 #
200 # (2) if -keepold on
201 # => Use hard-linking to turn 'archives_keepold' doc folder
202 # (without its fldv-history folder) into fldv-history/nminus-1 in the
203 # the 'archives' doc folder, having first shuffled any existing
204 # nminus-1, nminus-2 folders down by one value
205 #
206 # (3) if -replaceold on
207 # => then a similar process to (2), except the 'archives_keepold'
208 # doc folder (without its fldv-history folder), directly replaces the
209 # the 'nminus-1' folder in the 'archives' doc's 'fldv-history' folder
210
211
212 # Indexing cases to consider:
213 # I = needs to be Indexed
214 # R = needs to be Reindexed
215 # D = needs to be Deleted
216 # B = has Been indexed
217 #
218 # Specifics of what to do for (2) and (3) dependant at times incremental_mode
219 print STDERR "Document Versioning: Processing $keepold_OID\n";
220
221 my $oid_info_array = $archive_info->get_info($keepold_OID);
222
223 if (defined $oid_info_array) {
224
225 my $doc_file = $oid_info_array->[0];
226 my $index_status = $oid_info_array->[1];
227 # For completeness, the other position in oid_info_array are
228 # ->[2] is sort_meta
229 # ->[3] is group_position
230
231 # **** Need additional work if -groupsize option > 1 is to be supported!
232 # **** otherwise need to have earlier test to prevent -groupsize > 1 from been used with fldv-history
233
234 if ($index_status eq "D") {
235 #
236 # (1) if archive_info entry marked for deletion (D)
237 #
238 print STDERR " The latest version of $keepold_OID is marked for deletion\n";
239 print STDERR " => nothing to do right now (will be deleted when buildcol.pl run)\n";
240 }
241 else {
242 my $keepold_doc_filename = &FileUtils::filenameConcatenate($archivedir_keepold, $keepold_doc_file);
243 my $keepold_doc_dirname = &util::get_parent_folder($keepold_doc_filename);
244
245 my $doc_filename = &FileUtils::filenameConcatenate($archivedir, $doc_file);
246 my $doc_dirname = &util::get_parent_folder($doc_filename);
247
248 if ($keepold) {
249 #
250 # (2) if -keepold on
251 #
252
253 # Still need to consider: I, R, B
254 # R => always trigger store doc-history
255 # I => always trigger store doc-history
256 # B => trigger store doc-history if incremental_mode != "all"
257
258 if ($index_status eq "R") {
259 print STDERR " Updated version of document from import directory (Index-Status=R)\n";
260 print STDERR " => keepold: storing snapshot of previous version in $fldv_history_dir as 'nminus-1'\n";
261 prepend_document_version($keepold_doc_dirname,$doc_dirname);
262 }
263 elsif ($index_status eq "I") {
264 print STDERR " Newly generated version of document from 'import/' of existing document from 'archives/' (Index-Status=I)\n";
265 print STDERR " => keepold: store snapshot of previous version in $fldv_history_dir as 'nminus-1'\n";
266 prepend_document_version($keepold_doc_dirname,$doc_dirname);
267 }
268 elsif ($index_status eq "B") {
269 if ($incremental_mode ne "all") {
270 print STDERR " Unchanged version of document in 'archives/' (Index-Status=B)\n";
271 print STDERR " => keepold without incremental: store snapshot of previous version in $fldv_history_dir as 'nminus-1'\n";
272 prepend_document_version($keepold_doc_dirname,$doc_dirname);
273 }
274 else {
275 print STDERR " Unchanged version of document in 'archives/' (Index-Status=B)\n";
276 print STDERR " => no snapshot store needed when in incremental mode\n";
277 }
278 }
279 }
280 elsif ($replaceold) {
281 #
282 # (3) if -replaceold on
283 #
284
285 # Still need to consider: I, R, B
286 # R => always trigger replace nminus-1 doc-history
287 # I => always trigger replace nminus-1 doc-history
288 # B => trigger replace nminus-1 doc-history if incremental_mode != "all"
289
290 if ($index_status eq "R") {
291 print STDERR " Updated version of document from import directory (Index-Status=R)\n";
292 print STDERR " => replaceold: replacing snapshot of previous version in $fldv_history_dir as 'nminus-1'\n";
293 replace_document_version($keepold_doc_dirname,$doc_dirname);
294 }
295 elsif ($index_status eq "I") {
296 print STDERR " Newly generated version of document from 'import/' of existing document from 'archives/' (Index-Status=I)\n";
297 print STDERR " => keepold: store snapshot of previous version in $fldv_history_dir as 'nminus-1'\n";
298 replace_document_version($keepold_doc_dirname,$doc_dirname);
299 }
300 elsif ($index_status eq "B") {
301 if ($incremental_mode ne "all") {
302 print STDERR " Unchanged version of document in 'archives/' (Index-Status=B)\n";
303 print STDERR " => keepold without incremental: store snapshot of previous version in $fldv_history_dir as 'nminus-1'\n";
304 replace_document_version($keepold_doc_dirname,$doc_dirname);
305 }
306 else {
307 print STDERR " Unchanged version of document in 'archives/' (Index-Status=B)\n";
308 print STDERR " => no snapshot store needed when in incremental mode\n";
309 }
310 }
311
312
313 }
314 else {
315 print STDERR "**** Error: Encountered a condition for $keepold_OID that was not expected\n";
316 }
317 }
318
319 }
320 else {
321 print STDERR "**** Error: Did not find 'archives/' matching document for 'archives_keepold/' OID=$keepold_OID\n";
322 print STDERR "Skipping!\n";
323 }
324 }
325
326 if ($status_ok) {
327 &FileUtils::removeFilesRecursive($archivedir_keepold);
328 }
329}
330
3311;
Note: See TracBrowser for help on using the repository browser.