root/main/trunk/greenstone2/perllib/IncrementalDocument.pm @ 21561

Revision 21561, 7.7 KB (checked in by mdewsnip, 10 years ago)

Changed calls to GDBMUtils::gdbmCachedCollectionGet() to dbutil::read_infodb_entry(). Part of removing GDBMUtils.pm and making the code less GDBM-specific.

  • Property svn:keywords set to Author Date Id Revision
Line 
1###########################################################################
2#
3# IncrementalDocument.pm -- An object to encapsulate the Greenstone
4#                           document retrieved from the GDBM database.
5#
6# A component of the Greenstone digital library software
7# from the New Zealand Digital Library Project at the
8# University of Waikato, New Zealand.
9#
10# Copyright (C) 2006 DL Consulting Ltd and New Zealand Digital Library Project
11#
12# This program is free software; you can redistribute it and/or modify
13# it under the terms of the GNU General Public License as published by
14# the Free Software Foundation; either version 2 of the License, or
15# (at your option) any later version.
16#
17# This program is distributed in the hope that it will be useful,
18# but WITHOUT ANY WARRANTY; without even the implied warranty of
19# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
20# GNU General Public License for more details.
21#
22# You should have received a copy of the GNU General Public License
23# along with this program; if not, write to the Free Software
24# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
25#
26###########################################################################
27package IncrementalDocument;
28
29use GDBMUtils;
30use strict;
31
32# /**
33#  */
34sub new()
35  {
36    my ($class, $collection, $oid) = @_;
37
38    #rint STDERR "IncrementalDocument::new($collection, $oid)\n";
39
40    # Test the parameters
41    die ("Error! Can't create a document that doesn't belong to a collection!") unless $collection;
42    die ("Error! Can't create a document that doesn't have a unique id (OID)!") unless $oid;
43
44    # Store the variables
45    my $self = {};
46
47    # The collection this document object has been loaded from.
48    $self->{'collection'} = $collection;
49
50    # An associative array of information retrieved from the GDBM database
51    # which maps a key string to a nested associative array listing values.
52    $self->{'data'} = {};
53
54    # The unique identifier of the document loaded
55    $self->{'oid'} = $oid;
56
57    # Stores the order in which metadata keys where discovered/added.
58    $self->{'order'} = {};
59
60    bless $self, $class;
61    return $self;
62  }
63# /** new() **/
64
65# /**
66#  */
67sub addMetadata()
68  {
69    my ($self, $key, $value, $internal) = @_;
70
71    # Validate the arguments
72    die ("Error! Can't add a metadata value to a document without a valid key!") unless $key =~ /[\w]+/;
73    die ("Error! Can't add a metadata key to a document without a valid value!") unless $value =~ /[\w\d]+/;
74
75    # Is this a new key that we haven't encountered before? If so ensure an
76    # array exists for its values, and record the order in which we encountered
77    # this key.
78    if (!defined($self->{'data'}->{$key}))
79      {
80        # Determine how many data keys we're already storing, so we can add the next
81        # one at the appropriate index
82        my $index = scalar(keys %{$self->{'order'}});
83        $self->{'order'}->{$index} = $key;
84        $self->{'data'}->{$key} = {};
85      }
86
87    # Set the value of the associative path to 1.
88    $self->{'data'}->{$key}->{$value} = 1;
89  }
90# /** addMetadata() **/
91
92# /** Retrieve all the metadata of this document as an array of pairs.
93#  *
94#  */
95sub getAllMetadata()
96{
97    my ($self) = @_;
98    my @all_metadata;
99
100    print STDERR "IncrementalDocument.getAllMetadata()\n";
101
102    my $key_count = scalar(keys %{$self->{'order'}});
103    for (my $i = 0; $i < $key_count; $i++)
104      {
105        my $key = $self->{'order'}->{$i};
106        # Check if this key has been set
107        if ($self->{'data'}->{$key})
108          {
109            # Note: there may be zero values left
110            foreach my $value (sort keys %{$self->{'data'}->{$key}})
111              {
112                if ($self->{'data'}->{$key}->{$value})
113                  {
114                    print STDERR "* Storing $key => $value\n";
115                    push(@all_metadata, [$key, $value]);
116                  }
117              }
118          }
119      }
120    print STDERR "Complete!\n";
121    return \@all_metadata;
122  }
123# /** getAllMetadata() **/
124
125# /**
126#  */
127sub getDocNum()
128  {
129    my ($self) = @_;
130    my $docnum = -1;
131    # Check the docnum path exists in the associated data
132    if(defined($self->{'data'}->{'docnum'}))
133      {
134        # Get the list of keys from that associative path
135        my @values = keys (%{$self->{'data'}->{'docnum'}});
136        # And since we know there will only ever be one value for docnum
137        $docnum = $values[0];
138      }
139    return $docnum;
140  }
141# /** getDocNum() **/
142
143# /**
144#  */
145sub loadDocument()
146  {
147    my ($self) = @_;
148    #rint STDERR "IncrementalDocument::loadDocument()\n";
149    # Load the raw text for the document object from GDBM
150    my $collection = $self->{'collection'};
151    my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
152    my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collection, $index_text_directory_path);
153    my $text = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $self->{'oid'});
154    # For each line in the raw text, extract the key (enclosed in angle
155    # brackets) and the value
156    $text =~ s/<([\w\d\.]+)>(.+?)\r?\n/&addMetadata($self, $1, $2, 1)/egs;
157    # Done
158  }
159# /** loadDocument() **/
160
161# /** Locates and removes the given key/value mappings from this document
162#  *  object.
163#  *
164#  *  @param  $self A reference to this IncrementalDocument object
165#  *  @param  $key The metadata key as a string
166#  *  @param  $value The obsolete metadata value as a string
167#  *
168#  *  @author John Thompson, DL Consulting Ltd.
169#  */
170sub removeMetadata()
171  {
172    my ($self, $key, $value) = @_;
173    # Ensure the value doesn't exist by simply setting to 0 the correct
174    # associative path
175    $self->{'data'}->{$key}->{$value} = 0;
176  }
177# /*** removeMetadat() **/
178
179# /**
180#  */
181sub saveDocument()
182  {
183    my ($self) = @_;
184    # Get a textual version of this object
185    my $text = $self->toString();
186
187    # Now store the object in the database using the GDBM utilities
188    my $collection = $self->{'collection'};
189    &GDBMUtils::gdbmCachedCollectionSet($collection, $self->{'oid'}, $text);
190
191    # There is a little bit of extra complexity when saving an incremental
192    # document in that we should ensure that a reverse lookup-from DocNum or
193    # nodeID to Greenstone document hash-exists in the database.
194    my $doc_num = $self->getDocNum();
195    if($doc_num >= 0)
196      {
197    my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $collection, "index", "text");
198    my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $collection, $index_text_directory_path);
199    my $text = &dbutil::read_infodb_entry("gdbm", $infodb_file_path, $doc_num);
200
201        # If there is no reverse lookup, then add one now
202        if($text !~ /<section>/)
203          {
204            &GDBMUtils::gdbmCachedCollectionSet($collection, $doc_num, "<section>" . $self->{'oid'});
205          }
206      }
207    # Done
208    #rint STDERR "Stored document:\n[" . $self->{'oid'} . "]\n$text\n";
209  }
210# /** saveDocument() **/
211
212# /** Produces a textual representation of this object.
213#  *
214#  *  @return A string which describes this incremental document object
215#  *
216#  *  @author John Thompson, DL Consulting Ltd.
217#  */
218sub toString()
219  {
220    my ($self) = @_;
221    my $text = "";
222
223    my $key_count = scalar(keys %{$self->{'order'}});
224    for (my $i = 0; $i < $key_count; $i++)
225      {
226        my $key = $self->{'order'}->{$i};
227        # Check if this key has been set
228        if ($self->{'data'}->{$key})
229          {
230            # Note: there may be zero values left
231            foreach my $value (sort keys %{$self->{'data'}->{$key}})
232              {
233                if ($self->{'data'}->{$key}->{$value})
234                  {
235                    $text .= "<$key>$value\n";
236                  }
237              }
238          }
239      }
240    return $text;
241  }
242# /** toString() **/
243
2441;
Note: See TracBrowser for help on using the browser.