root/gsdl/trunk/perllib/GDBMUtils.pm @ 18456

Revision 18456, 5.0 KB (checked in by davidb, 12 years ago)

Additions to support the deleting of documents from the index. Only works for indexers that support incremental building, e.g. lucene

  • Property svn:keywords set to Author Date Id Revision
Line 
1package GDBMUtils;
2
3use strict;
4use util;
5
6my $debug = 0;
7
8# /** Global variables to hold a strings containing:
9#  *    the last collection, oid and value
10#  *  a gdbmCachedCollectionGet() was performed on.
11#  */
12my $gdbmget_previous_collection = "";
13my $gdbmget_previous_oid = "";
14my $gdbmget_previous_value = "";
15
16
17
18sub gdbmDatabaseGet
19{
20    my ($database, $oid) = @_;
21
22    # Are we in windows? Do we need .exe?
23    my $exe = &util::get_os_exe();
24
25    # Retrieve the raw document content
26    print STDERR "#Get document\ncmd: gdbmget$exe \"$database\" \"$oid\"\n" if $debug;
27    my $value = `gdbmget$exe "$database" "$oid"`;
28
29    # Done
30    return $value;
31}
32
33sub gdbmRecordToHash
34{
35    my ($database, $oid) = @_;
36
37    my $val = gdbmDatabaseGet($database,$oid);
38
39    my $rec = {};
40
41    while ($val =~ m/^<(.*?)>(.*)$/mg) {
42    my $metaname = $1;
43    my $metavalue = $2;
44
45    if (!defined $rec->{$metaname}) {
46        $rec->{$metaname} = [ $metavalue ];
47    }
48    else {
49        push(@{$rec->{$metaname}},$metavalue);
50    }
51    }
52
53    return $rec;
54}
55
56
57sub gdbmDatabaseAppend
58{
59    my ($database, $oid, $value) = @_;
60
61    # Are we in windows? Do we need .exe?
62    my $exe = &util::get_os_exe();
63
64    # Escape any speech marks in the value
65    $value =~ s/\"/\\\"/g;
66    # Set the document content
67    print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\" append\n" if $debug;
68    `gdbmset$exe "$database" "$oid" "$value" append`;
69}
70
71
72sub gdbmDatabaseSet
73  {
74    my ($database, $oid, $value) = @_;
75
76    # Are we in windows? Do we need .exe?
77    my $exe = &util::get_os_exe();
78
79    # Escape any speech marks in the value
80    $value =~ s/\"/\\\"/g;
81    # Set the document content
82    print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\" \"$value\"\n" if $debug;
83    `gdbmset$exe "$database" "$oid" "$value"`;
84}
85
86
87sub gdbmDatabaseRemove
88  {
89    my ($database, $oid) = @_;
90
91    # Are we in windows? Do we need .exe?
92    my $exe = &util::get_os_exe();
93
94    # Remove the document from the database
95    print STDERR "#Set document\ncmd: gdbmset$exe \"$database\" \"$oid\"\n" if $debug;
96
97    `gdbmdel$exe "$database" "$oid"`;
98}
99
100
101
102# /** This wraps John T's gdbmget executable to get the gdbm database entry for
103#  *  a particular OID.
104#  *
105#  *  @param $collection is the collection name.
106#  *  @param $oid is the internal document id.
107#  *
108#  *
109#  *  @author John Rowe, DL Consulting Ltd.
110#  *  @author John Thompson, DL Consulting Ltd.
111#  */
112sub gdbmCachedCollectionGet
113  {
114    my ($collection, $oid) = @_;
115    # Start by checking if this request is the same as the previous one, and if
116    # so return the cache version instead. This is an optimization to improve
117    # performance when checking if a certain GDBM document exists before
118    # creating a new node object
119    if($collection eq $gdbmget_previous_collection
120       && $oid eq $gdbmget_previous_oid)
121      {
122        print STDERR "#Get document - using cached value\n" if $debug;
123        return $gdbmget_previous_value;
124      }
125
126    # Where's the database?
127    my $database = _getDatabasePath($collection);
128
129    my $value = gdbmDatabaseGet($database,$oid);
130
131    # Tidy up the ever growing number of newlines at the end of the value
132    $value =~ s/(\r?\n)+/$1/g;
133    # Why do we need the above line?  At the very least it would seem
134    # better that the data going in to the database through 'set' is
135    # monitored for superfluous \r\n which are then removed before being
136    # saved in GDBM
137
138    # Cache this result
139    $gdbmget_previous_collection = $collection;
140    $gdbmget_previous_oid = $oid;
141    $gdbmget_previous_value = $value;
142
143    # Done
144    return $value;
145  }
146# /** gdbmCachedCollectionGet **/
147
148# /** This wraps John T's gdbmset executable to set the gdbm database entry for
149#  *  a particular OID. This does not yet report errors.
150#  *
151#  *  @param $collection is the collection name.
152#  *  @param $oid is the internal document id.
153#  *  @param $value is the new value to set for the oid.
154#  *
155#  *  @author John Rowe, DL Consulting Ltd.
156#  */
157sub gdbmCachedCollectionSet
158  {
159    my ($collection, $oid, $value) = @_;
160
161    # Where's the database?
162    my $database = _getDatabasePath($collection);
163
164
165    # Check whether value is set
166    if (defined($value))
167      {
168      gdbmDatabaseSet($database,$oid,$value);
169      }
170    else
171      {
172      gdbmDatabaseRemove($database,$oid);
173      }
174
175    # Empty any cached values, as they may now be invalid
176
177    # Cache this result
178    $gdbmget_previous_collection = "";
179    $gdbmget_previous_oid = "";
180    $gdbmget_previous_value = 0;
181  }
182# /** gdbmCollectionSet **/
183
184# /** This works out the database path and returns it to the calling
185#  *  calling function.
186#  *
187#  *  @param $collection The current collection name
188#  *
189#  *  @author John Rowe, DL Consulting Ltd.
190#  */
191
192sub _getDatabasePath
193  {
194    my $collection = shift(@_);
195
196    # Find out the database extension
197    my $ext = &util::is_little_endian() ? ".ldb" : ".bdb";
198
199    # Now return the full filename of the database
200
201    return &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text", $collection.$ext);
202  }
203# /** getDatabasePath **/
204
2051;
Note: See TracBrowser for help on using the browser.