source: gsdl/trunk/perllib/GDBMUtils.pm@ 18456

Last change on this file since 18456 was 18456, checked in by davidb, 15 years ago

Additions to support the deleting of documents from the index. Only works for indexers that support incremental building, e.g. lucene

  • Property svn:keywords set to Author Date Id Revision
File size: 5.0 KB
Line 
1package GDBMUtils;
2
3use strict;
4use util;
5
6my $debug = 0;
7
8# /** Global variables to hold a strings containing:
9# * the last collection, oid and value
10# * a gdbmCachedCollectionGet() was performed on.
11# */
12my $gdbmget_previous_collection = "";
13my $gdbmget_previous_oid = "";
14my $gdbmget_previous_value = "";
15
16
17
18sub gdbmDatabaseGet
19{
20 my ($database, $oid) = @_;
21
22 # Are we in windows? Do we need .exe?
23 my $exe = &util::get_os_exe();
24
25 # Retrieve the raw document content
26 print STDERR "#Get document\ncmd: gdbmget$exe \"$database\" \"$oid\"\n" if $debug;
27 my $value = `gdbmget$exe "$database" "$oid"`;
28
29 # Done
30 return $value;
31}
32
33sub gdbmRecordToHash
34{
35 my ($database, $oid) = @_;
36
37 my $val = gdbmDatabaseGet($database,$oid);
38
39 my $rec = {};
40
41 while ($val =~ m/^<(.*?)>(.*)$/mg) {
42 my $metaname = $1;
43 my $metavalue = $2;
44
45 if (!defined $rec->{$metaname}) {
46 $rec->{$metaname} = [ $metavalue ];
47 }
48 else {
49 push(@{$rec->{$metaname}},$metavalue);
50 }
51 }
52
53 return $rec;
54}
55
56
57sub gdbmDatabaseAppend
58{
59 my ($database, $oid, $value) = @_;
60
61 # Are we in windows? Do we need .exe?
62 my $exe = &util::get_os_exe();
63
64 # Escape any speech marks in the value
65 $value =~ s/\"/\\\"/g;
66 # Set the document content
67 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\" append\n" if $debug;
68 `gdbmset$exe "$database" "$oid" "$value" append`;
69}
70
71
72sub gdbmDatabaseSet
73 {
74 my ($database, $oid, $value) = @_;
75
76 # Are we in windows? Do we need .exe?
77 my $exe = &util::get_os_exe();
78
79 # Escape any speech marks in the value
80 $value =~ s/\"/\\\"/g;
81 # Set the document content
82 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\"\n" if $debug;
83 `gdbmset$exe "$database" "$oid" "$value"`;
84}
85
86
87sub gdbmDatabaseRemove
88 {
89 my ($database, $oid) = @_;
90
91 # Are we in windows? Do we need .exe?
92 my $exe = &util::get_os_exe();
93
94 # Remove the document from the database
95 print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\"\n" if $debug;
96
97 `gdbmdel$exe "$database" "$oid"`;
98}
99
100
101
102# /** This wraps John T's gdbmget executable to get the gdbm database entry for
103# * a particular OID.
104# *
105# * @param $collection is the collection name.
106# * @param $oid is the internal document id.
107# *
108# *
109# * @author John Rowe, DL Consulting Ltd.
110# * @author John Thompson, DL Consulting Ltd.
111# */
112sub gdbmCachedCollectionGet
113 {
114 my ($collection, $oid) = @_;
115 # Start by checking if this request is the same as the previous one, and if
116 # so return the cache version instead. This is an optimization to improve
117 # performance when checking if a certain GDBM document exists before
118 # creating a new node object
119 if($collection eq $gdbmget_previous_collection
120 && $oid eq $gdbmget_previous_oid)
121 {
122 print STDERR "#Get document - using cached value\n" if $debug;
123 return $gdbmget_previous_value;
124 }
125
126 # Where's the database?
127 my $database = _getDatabasePath($collection);
128
129 my $value = gdbmDatabaseGet($database,$oid);
130
131 # Tidy up the ever growing number of newlines at the end of the value
132 $value =~ s/(\r?\n)+/$1/g;
133 # Why do we need the above line? At the very least it would seem
134 # better that the data going in to the database through 'set' is
135 # monitored for superfluous \r\n which are then removed before being
136 # saved in GDBM
137
138 # Cache this result
139 $gdbmget_previous_collection = $collection;
140 $gdbmget_previous_oid = $oid;
141 $gdbmget_previous_value = $value;
142
143 # Done
144 return $value;
145 }
146# /** gdbmCachedCollectionGet **/
147
148# /** This wraps John T's gdbmset executable to set the gdbm database entry for
149# * a particular OID. This does not yet report errors.
150# *
151# * @param $collection is the collection name.
152# * @param $oid is the internal document id.
153# * @param $value is the new value to set for the oid.
154# *
155# * @author John Rowe, DL Consulting Ltd.
156# */
157sub gdbmCachedCollectionSet
158 {
159 my ($collection, $oid, $value) = @_;
160
161 # Where's the database?
162 my $database = _getDatabasePath($collection);
163
164
165 # Check whether value is set
166 if (defined($value))
167 {
168 gdbmDatabaseSet($database,$oid,$value);
169 }
170 else
171 {
172 gdbmDatabaseRemove($database,$oid);
173 }
174
175 # Empty any cached values, as they may now be invalid
176
177 # Cache this result
178 $gdbmget_previous_collection = "";
179 $gdbmget_previous_oid = "";
180 $gdbmget_previous_value = 0;
181 }
182# /** gdbmCollectionSet **/
183
184# /** This works out the database path and returns it to the calling
185# * calling function.
186# *
187# * @param $collection The current collection name
188# *
189# * @author John Rowe, DL Consulting Ltd.
190# */
191
192sub _getDatabasePath
193 {
194 my $collection = shift(@_);
195
196 # Find out the database extension
197 my $ext = &util::is_little_endian() ? ".ldb" : ".bdb";
198
199 # Now return the full filename of the database
200
201 return &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text", $collection.$ext);
202 }
203# /** getDatabasePath **/
204
2051;
Note: See TracBrowser for help on using the repository browser.