source: main/trunk/greenstone2/perllib/ClassifyTreeModel.pm@ 21308

Last change on this file since 21308 was 17087, checked in by davidb, 16 years ago

Introduction of new GDBM alternative for archives.inf as step towards full incremental building. Information traditionally stored in archives.inf PLUS additional information that will help with working out what files have changed since last build, and what doc-id they hashed to is stored in two GDBM databases. For now these databases aren't read, but in the future ArchivesInfPlugin will be upgraded to use these to support these.

  • Property svn:keywords set to Author Date Id Revision
File size: 8.8 KB
Line 
1package ClassifyTreeModel;
2
3use ClassifyTreeNode;
4use GDBMUtils;
5use strict;
6
7# /** Constructor
8# *
9# * @param $class The name of the class to bless as a string
10# * @param $collection The name of the collection whose GDBM database we
11# * will be accessing as a string
12# * @param $root The oid of the root node of the classifier as a
13# * string
14# * @return A reference to the ClassifyTreeModel object
15# *
16# * @author John Thompson, DL Consulting Ltd.
17# */
18sub new()
19 {
20 my ($class, $collection, $root) = @_;
21 my $debug = 0;
22 print STDERR "ClassifyTreeModel.new(\"$collection\", \"$root\")\n" unless !$debug;
23 # Store the variables
24 my $self = {};
25 $self->{'collection'} = $collection;
26 $self->{'debug'} = $debug;
27 $self->{'root'} = $root;
28 # Bless me father for I have sinned
29 bless $self, $class;
30 return $self;
31 }
32# /** new() **/
33
34# /** Given a path and a document id, add this document to the classifier tree
35# * creating any necessary tree nodes first.
36# *
37# * @param $value The path to store this document in
38# * @param $oid Unique identifier of a document
39# *
40# * @author John Thompson, DL Consulting Ltd.
41# */
42sub addDocument()
43 {
44 my ($self, $value, $oid) = @_;
45 print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
46 # Generate a treepath object from the metadata value, remembering to prefix
47 # with the root nodes path
48 my $root_node_obj = $self->getRootNode();
49 my $path_obj = $root_node_obj->getPath();
50 $path_obj->addPathComponent($value);
51 # Ensure that this classifier node, and if necessary its ancestor nodes,
52 # exist in our tree.
53 my $node_obj = $self->getNodeByPath($path_obj);
54 if (!$node_obj)
55 {
56 # The node doesn't exist, so we need to add it
57 $node_obj = $self->addNode($path_obj);
58 }
59 # Add the document to the node.
60 $node_obj->addDocument($oid);
61 # Done.
62 }
63# /** addDocument() **/
64
65# /** Add a node into the tree first ensuring all its parent nodes are inserted
66# * to.
67# *
68# * @param $path_obj The path to insert the new node at
69# *
70# * @author John Thompson, DL Consulting Ltd.
71# */
72sub addNode()
73 {
74 my ($self, $path_obj) = @_;
75 print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
76 # Ensure the parent exists, assuming we aren't at the root
77 my $parent_path_obj = $path_obj->getParentPath();
78 #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
79 my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
80 #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
81 #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
82 if (!$parent_node_obj && !$parent_path_obj->isRootPath())
83 {
84 #rint STDERR "* recursive call!\n";
85 $parent_node_obj = $self->addNode($parent_path_obj);
86 }
87 # Insert this node into it's parent.
88 return $parent_node_obj->insertNode($path_obj);
89 }
90# /** addNode() **/
91
92# /** Retrieve the name of the collection this model is drawing from.
93# *
94# * @return The collection name as a string
95# *
96# * @author John Thompson, DL Consulting Ltd.
97# */
98sub getCollection()
99 {
100 my ($self) = @_;
101 print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
102 return $self->{'collection'};
103 }
104# /** getCollection() **/
105
106# /** Retrieve a node from this tree based upon its CLID (OID).
107# * @param $clid The CLID as a string
108# * @return The indicated ClassifyTreeNode or null
109sub getNodeByCLID()
110 {
111 my ($self, $clid) = @_;
112 print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
113 my $result = 0;
114 # Test if this clid is even in our tree
115 if($clid !~ /^$self->{'root'}/)
116 {
117 print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
118 return 0;
119 }
120 # Unfortunately I have to check that there is text to retrieve before I
121 # create a new node.
122 if(&GDBMUtils::gdbmCachedCollectionGet($self->getCollection(), $clid) =~ /\w+/)
123 {
124 # Since the CLID can directly reference the correct entry in the GDBM we
125 # just create the node and return it
126 $result = new ClassifyTreeNode($self, $clid);
127 }
128 return $result;
129 }
130
131# /** Retrieve a node from this tree based upon a path
132# *
133# * @param $path The path to the node as a ClassifyTreePath
134# * @return The indicated ClassifyTreeNode or null
135# *
136# * @author John Thompson, DL Consulting Ltd.
137# */
138sub getNodeByPath()
139 {
140 my ($self, $path_obj) = @_;
141 print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
142 # Starting at the ROOT of the tree, and with the first path component,
143 # recursively descend through the tree looking for the node - we can assume
144 # that we've found the root node (otherwise we won't be in a tree)
145 my $cur_node_obj = $self->getRootNode();
146 my $cur_path_obj = $cur_node_obj->getPath();
147 my $depth = 1;
148 # Continue till we either find the node we want, or run out a nodes
149 while(!$cur_node_obj->getPath()->equals($path_obj))
150 {
151 # Append the path component at this depth to the current path we
152 # are searching for
153 $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
154 $depth++;
155 #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
156 # Search through the current nodes children, looking for one that
157 # matches the current path
158 my $found = 0;
159 foreach my $child_node_obj ($cur_node_obj->getChildren())
160 {
161 #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
162 if($child_node_obj->getPath()->equals($cur_path_obj))
163 {
164 $cur_node_obj = $child_node_obj;
165 $found = 1;
166 last;
167 }
168 }
169 # Couldn't find any node with this path
170 if(!$found)
171 {
172 #rint STDERR "* no such node exists!\n";
173 return 0;
174 }
175 }
176 return $cur_node_obj;
177 }
178# /** getChild() **/
179
180# /** Retrieve the parent node of the given node.
181# *
182# * @param $child_node The node whose parent we want to retrieve
183# * @return The parent node, or 0 if this is the root
184# *
185# * @author John Thompson, DL Consulting Ltd.
186# */
187sub getParentNode()
188 {
189 my ($self, $child_node) = @_;
190 print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
191 return $child_node->getParentNode();
192 }
193# /** getParentNode() **/
194
195sub getRootNode()
196{
197 my ($self) = @_;
198 print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
199 return new ClassifyTreeNode($self, $self->{'root'});
200}
201
202# /** Remove the given document from the classifier tree, and then remove any
203# * empty nodes if required.
204# *
205# * @param $value The value which contains the path of the node to remove
206# * the document from
207# * @param $oid The unique identifier of the document to remove
208# * @param $remove_empty Sets whether empty nodes are removed
209# *
210# * @author John Thompson, DL Consulting Ltd.
211# */
212sub removeDocument()
213 {
214 my ($self, $path, $oid, $remove_empty) = @_;
215 print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
216 # Append to root path
217 my $root_node_obj = $self->getRootNode();
218 my $path_obj = $root_node_obj->getPath();
219 $path_obj->addPathComponent($path);
220 # Retrieve the node in question
221 my $node_obj = $self->getNodeByPath($path_obj);
222 # Check we retrieved a node
223 if ($node_obj)
224 {
225 # Remove the document
226 $node_obj->removeDocument($oid);
227 # If we have been asked to remove empty nodes, do so now.
228 if ($remove_empty)
229 {
230 my $cur_node_obj = $node_obj;
231 my $empty_node_obj = 0;
232 while ($cur_node_obj->getNumLeafDocs() == 0)
233 {
234 $empty_node_obj = $cur_node_obj;
235 $cur_node_obj = $cur_node_obj->getParentNode();
236 }
237 if ($empty_node_obj)
238 {
239 # Try to retrieve the parent of this node
240 my $parent_node_obj = $empty_node_obj->getParentNode();
241 # As long as we have a parent (i.e. we aren't the root node) go
242 # ahead and delete this subtree starting at empty node
243 if ($parent_node_obj)
244 {
245 $parent_node_obj->removeNode($empty_node_obj);
246 }
247 }
248 }
249 }
250 # If the node doesn't exist in this tree, then we can't very well remove
251 # anything from it!
252 }
253# /** removeDocument() **/
254
2551;
Note: See TracBrowser for help on using the repository browser.