source: gsdl/trunk/perllib/IncrementalDocument.pm@ 17087

Last change on this file since 17087 was 17087, checked in by davidb, 16 years ago

Introduction of new GDBM alternative for archives.inf as step towards full incremental building. Information traditionally stored in archives.inf PLUS additional information that will help with working out what files have changed since last build, and what doc-id they hashed to is stored in two GDBM databases. For now these databases aren't read, but in the future ArchivesInfPlugin will be upgraded to use these to support these.

  • Property svn:keywords set to Author Date Id Revision
File size: 7.3 KB
Line 
1###########################################################################
2#
3# IncrementalDocument.pm -- An object to encapsulate the Greenstone
4# document retrieved from the GDBM database.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2006 DL Consulting Ltd and New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package IncrementalDocument;
28
29use GDBMUtils;
30use strict;
31
32# /**
33# */
34sub new()
35 {
36 my ($class, $collection, $oid) = @_;
37
38 #rint STDERR "IncrementalDocument::new($collection, $oid)\n";
39
40 # Test the parameters
41 die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection;
42 die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid;
43
44 # Store the variables
45 my $self = {};
46
47 # The collection this document object has been loaded from.
48 $self->{'collection'} = $collection;
49
50 # An associative array of information retrieved from the GDBM database
51 # which maps a key string to a nested associative array listing values.
52 $self->{'data'} = {};
53
54 # The unique identifier of the document loaded
55 $self->{'oid'} = $oid;
56
57 # Stores the order in which metadata keys where discovered/added.
58 $self->{'order'} = {};
59
60 bless $self, $class;
61 return $self;
62 }
63# /** new() **/
64
65# /**
66# */
67sub addMetadata()
68 {
69 my ($self, $key, $value, $internal) = @_;
70
71 # Validate the arguments
72 die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/;
73 die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/;
74
75 # Is this a new key that we haven't encountered before? If so ensure an
76 # array exists for its values, and record the order in which we encountered
77 # this key.
78 if (!defined($self->{'data'}->{$key}))
79 {
80 # Determine how many data keys we're already storing, so we can add the next
81 # one at the appropriate index
82 my $index = scalar(keys %{$self->{'order'}});
83 $self->{'order'}->{$index} = $key;
84 $self->{'data'}->{$key} = {};
85 }
86
87 # Set the value of the associative path to 1.
88 $self->{'data'}->{$key}->{$value} = 1;
89 }
90# /** addMetadata() **/
91
92# /** Retrieve all the metadata of this document as an array of pairs.
93# *
94# */
95sub getAllMetadata()
96{
97 my ($self) = @_;
98 my @all_metadata;
99
100 print STDERR "IncrementalDocument.getAllMetadata()\n";
101
102 my $key_count = scalar(keys %{$self->{'order'}});
103 for (my $i = 0; $i < $key_count; $i++)
104 {
105 my $key = $self->{'order'}->{$i};
106 # Check if this key has been set
107 if ($self->{'data'}->{$key})
108 {
109 # Note: there may be zero values left
110 foreach my $value (sort keys %{$self->{'data'}->{$key}})
111 {
112 if ($self->{'data'}->{$key}->{$value})
113 {
114 print STDERR "* Storing $key => $value\n";
115 push(@all_metadata, [$key, $value]);
116 }
117 }
118 }
119 }
120 print STDERR "Complete!\n";
121 return \@all_metadata;
122 }
123# /** getAllMetadata() **/
124
125# /**
126# */
127sub getDocNum()
128 {
129 my ($self) = @_;
130 my $docnum = -1;
131 # Check the docnum path exists in the associated data
132 if(defined($self->{'data'}->{'docnum'}))
133 {
134 # Get the list of keys from that associative path
135 my @values = keys (%{$self->{'data'}->{'docnum'}});
136 # And since we know there will only ever be one value for docnum
137 $docnum = $values[0];
138 }
139 return $docnum;
140 }
141# /** getDocNum() **/
142
143# /**
144# */
145sub loadDocument()
146 {
147 my ($self) = @_;
148 #rint STDERR "IncrementalDocument::loadDocument()\n";
149 # Load the raw text for the document object from GDBM
150 my $collection = $self->{'collection'};
151 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $self->{'oid'});
152 # For each line in the raw text, extract the key (enclosed in angle
153 # brackets) and the value
154 $text =~ s/<([\w\d\.]+)>(.+?)\r?\n/&addMetadata($self, $1, $2, 1)/egs;
155 # Done
156 }
157# /** loadDocument() **/
158
159# /** Locates and removes the given key/value mappings from this document
160# * object.
161# *
162# * @param $self A reference to this IncrementalDocument object
163# * @param $key The metadata key as a string
164# * @param $value The obsolete metadata value as a string
165# *
166# * @author John Thompson, DL Consulting Ltd.
167# */
168sub removeMetadata()
169 {
170 my ($self, $key, $value) = @_;
171 # Ensure the value doesn't exist by simply setting to 0 the correct
172 # associative path
173 $self->{'data'}->{$key}->{$value} = 0;
174 }
175# /*** removeMetadat() **/
176
177# /**
178# */
179sub saveDocument()
180 {
181 my ($self) = @_;
182 # Get a textual version of this object
183 my $text = $self->toString();
184
185 # Now store the object in the database using the GDBM utilities
186 my $collection = $self->{'collection'};
187 &GDBMUtils::gdbmCachedCollectionSet($collection, $self->{'oid'}, $text);
188
189 # There is a little bit of extra complexity when saving an incremental
190 # document in that we should ensure that a reverse lookup-from DocNum or
191 # nodeID to Greenstone document hash-exists in the database.
192 my $doc_num = $self->getDocNum();
193 if($doc_num >= 0)
194 {
195 my $text = &GDBMUtils::gdbmCachedCollectionGet($collection, $doc_num);
196 # If there is no reverse lookup, then add one now
197 if($text !~ /<section>/)
198 {
199 &GDBMUtils::gdbmCachedCollectionSet($collection, $doc_num, "<section>" . $self->{'oid'});
200 }
201 }
202 # Done
203 #rint STDERR "Stored document:\n[" . $self->{'oid'} . "]\n$text\n";
204 }
205# /** saveDocument() **/
206
207# /** Produces a textual representation of this object.
208# *
209# * @return A string which describes this incremental document object
210# *
211# * @author John Thompson, DL Consulting Ltd.
212# */
213sub toString()
214 {
215 my ($self) = @_;
216 my $text = "";
217
218 my $key_count = scalar(keys %{$self->{'order'}});
219 for (my $i = 0; $i < $key_count; $i++)
220 {
221 my $key = $self->{'order'}->{$i};
222 # Check if this key has been set
223 if ($self->{'data'}->{$key})
224 {
225 # Note: there may be zero values left
226 foreach my $value (sort keys %{$self->{'data'}->{$key}})
227 {
228 if ($self->{'data'}->{$key}->{$value})
229 {
230 $text .= "<$key>$value\n";
231 }
232 }
233 }
234 }
235 return $text;
236 }
237# /** toString() **/
238
2391;
Note: See TracBrowser for help on using the repository browser.