source: gsdl/trunk/perllib/GDBMUtils.pm@ 17087

Last change on this file since 17087 was 17087, checked in by davidb, 16 years ago

Introduction of new GDBM alternative for archives.inf as step towards full incremental building. Information traditionally stored in archives.inf PLUS additional information that will help with working out what files have changed since last build, and what doc-id they hashed to is stored in two GDBM databases. For now these databases aren't read, but in the future ArchivesInfPlugin will be upgraded to use these to support these.

  • Property svn:keywords set to Author Date Id Revision
File size: 4.8 KB
Line 
1package GDBMUtils;
2
3use strict;
4use util;
5
6my $debug = 0;
7
8# /** Global variables to hold a strings containing:
9# * the last collection, oid and value
10# * a gdbmCachedCollectionGet() was performed on.
11# */
12my $gdbmget_previous_collection = "";
13my $gdbmget_previous_oid = "";
14my $gdbmget_previous_value = "";
15
16
17
18sub gdbmDatabaseGet
19 {
20 my ($database, $oid) = @_;
21
22 # Are we in windows? Do we need .exe?
23 my $exe = &util::get_os_exe();
24
25 # Retrieve the raw document content
26 print STDERR "#Get document\ncmd: gdbmget$exe \"$database\" \"$oid\"\n" if $debug;
27 my $value = `gdbmget$exe "$database" "$oid"`;
28
29 # Done
30 return $value;
31 }
32
33sub gdbmDatabaseAppend
34 {
35 my ($database, $oid, $value) = @_;
36
37 # Are we in windows? Do we need .exe?
38 my $exe = &util::get_os_exe();
39
40 # Escape any speech marks in the value
41 $value =~ s/\"/\\\"/g;
42 # Set the document content
43 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\" append\n" if $debug;
44 `gdbmset$exe "$database" "$oid" "$value" append`;
45}
46
47
48sub gdbmDatabaseSet
49 {
50 my ($database, $oid, $value) = @_;
51
52 # Are we in windows? Do we need .exe?
53 my $exe = &util::get_os_exe();
54
55 # Escape any speech marks in the value
56 $value =~ s/\"/\\\"/g;
57 # Set the document content
58 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\"\n" if $debug;
59 `gdbmset$exe "$database" "$oid" "$value"`;
60}
61
62
63sub gdbmDatabaseRemove
64 {
65 my ($database, $oid) = @_;
66
67 # Are we in windows? Do we need .exe?
68 my $exe = &util::get_os_exe();
69
70 # Remove the document from the database
71 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\"\n" if $debug;
72
73 # Think it would be clearer if this funcctionality was done
74 # by a separate executable, e.g. gdbmremove
75 `gdbmset$exe "$database" "$oid"`;
76}
77
78
79
80# /** This wraps John T's gdbmget executable to get the gdbm database entry for
81# * a particular OID.
82# *
83# * @param $collection is the collection name.
84# * @param $oid is the internal document id.
85# *
86# *
87# * @author John Rowe, DL Consulting Ltd.
88# * @author John Thompson, DL Consulting Ltd.
89# */
90sub gdbmCachedCollectionGet
91 {
92 my ($collection, $oid) = @_;
93 # Start by checking if this request is the same as the previous one, and if
94 # so return the cache version instead. This is an optimization to improve
95 # performance when checking if a certain GDBM document exists before
96 # creating a new node object
97 if($collection eq $gdbmget_previous_collection
98 && $oid eq $gdbmget_previous_oid)
99 {
100 print STDERR "#Get document - using cached value\n" if $debug;
101 return $gdbmget_previous_value;
102 }
103
104 # Where's the database?
105 my $database = _getDatabasePath($collection);
106
107 my $value = gdbmDatbaseGet($database,$oid);
108
109 # Tidy up the ever growing number of newlines at the end of the value
110 $value =~ s/(\r?\n)+/$1/g;
111 # Why do we need the above line? At the very least it would seem
112 # better that the data going in to the database through 'set' is
113 # monitored for superfluous \r\n which are then removed before being
114 # saved in GDBM
115
116 # Cache this result
117 $gdbmget_previous_collection = $collection;
118 $gdbmget_previous_oid = $oid;
119 $gdbmget_previous_value = $value;
120
121 # Done
122 return $value;
123 }
124# /** gdbmCachedCollectionGet **/
125
126# /** This wraps John T's gdbmset executable to set the gdbm database entry for
127# * a particular OID. This does not yet report errors.
128# *
129# * @param $collection is the collection name.
130# * @param $oid is the internal document id.
131# * @param $value is the new value to set for the oid.
132# *
133# * @author John Rowe, DL Consulting Ltd.
134# */
135sub gdbmCachedCollectionSet
136 {
137 my ($collection, $oid, $value) = @_;
138
139 # Where's the database?
140 my $database = _getDatabasePath($collection);
141
142
143 # Check whether value is set
144 if (defined($value))
145 {
146 gdbmDatabaseSet($database,$oid,$value);
147 }
148 else
149 {
150 gdbmDtabaseRemove($database,$oid);
151 }
152
153 # Empty any cached values, as they may now be invalid
154
155 # Cache this result
156 $gdbmget_previous_collection = "";
157 $gdbmget_previous_oid = "";
158 $gdbmget_previous_value = 0;
159 }
160# /** gdbmCollectionSet **/
161
162# /** This works out the database path and returns it to the calling
163# * calling function.
164# *
165# * @param $collection The current collection name
166# *
167# * @author John Rowe, DL Consulting Ltd.
168# */
169
170sub _getDatabasePath
171 {
172 my $collection = shift(@_);
173
174 # Find out the database extension
175 my $ext = &util::is_little_endian() ? ".ldb" : ".bdb";
176
177 # Now return the full filename of the database
178
179 return &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text", $collection.$ext);
180 }
181# /** getDatabasePath **/
182
1831;
Note: See TracBrowser for help on using the repository browser.