source: main/trunk/greenstone2/perllib/ClassifyTreeModel.pm@ 21563

Last change on this file since 21563 was 21563, checked in by mdewsnip, 14 years ago

Deleted GDBMUtils.pm. What was the point of me spending all that time creating dbutil if people are just going to ignore it and continue writing GDBM-only code?!? Not impressed...

  • Property svn:keywords set to Author Date Id Revision
File size: 9.0 KB
Line 
1package ClassifyTreeModel;
2
3use ClassifyTreeNode;
4use strict;
5
6
7# /** Constructor
8# *
9# * @param $class The name of the class to bless as a string
10# * @param $collection The name of the collection whose GDBM database we
11# * will be accessing as a string
12# * @param $root The oid of the root node of the classifier as a
13# * string
14# * @return A reference to the ClassifyTreeModel object
15# *
16# * @author John Thompson, DL Consulting Ltd.
17# */
18sub new()
19 {
20 my ($class, $collection, $root) = @_;
21 my $debug = 0;
22 print STDERR "ClassifyTreeModel.new(\"$collection\", \"$root\")\n" unless !$debug;
23 # Store the variables
24 my $self = {};
25 $self->{'collection'} = $collection;
26 $self->{'debug'} = $debug;
27 $self->{'root'} = $root;
28 # Bless me father for I have sinned
29 bless $self, $class;
30 return $self;
31 }
32# /** new() **/
33
34# /** Given a path and a document id, add this document to the classifier tree
35# * creating any necessary tree nodes first.
36# *
37# * @param $value The path to store this document in
38# * @param $oid Unique identifier of a document
39# *
40# * @author John Thompson, DL Consulting Ltd.
41# */
42sub addDocument()
43 {
44 my ($self, $value, $oid) = @_;
45 print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
46 # Generate a treepath object from the metadata value, remembering to prefix
47 # with the root nodes path
48 my $root_node_obj = $self->getRootNode();
49 my $path_obj = $root_node_obj->getPath();
50 $path_obj->addPathComponent($value);
51 # Ensure that this classifier node, and if necessary its ancestor nodes,
52 # exist in our tree.
53 my $node_obj = $self->getNodeByPath($path_obj);
54 if (!$node_obj)
55 {
56 # The node doesn't exist, so we need to add it
57 $node_obj = $self->addNode($path_obj);
58 }
59 # Add the document to the node.
60 $node_obj->addDocument($oid);
61 # Done.
62 }
63# /** addDocument() **/
64
65# /** Add a node into the tree first ensuring all its parent nodes are inserted
66# * to.
67# *
68# * @param $path_obj The path to insert the new node at
69# *
70# * @author John Thompson, DL Consulting Ltd.
71# */
72sub addNode()
73 {
74 my ($self, $path_obj) = @_;
75 print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
76 # Ensure the parent exists, assuming we aren't at the root
77 my $parent_path_obj = $path_obj->getParentPath();
78 #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
79 my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
80 #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
81 #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
82 if (!$parent_node_obj && !$parent_path_obj->isRootPath())
83 {
84 #rint STDERR "* recursive call!\n";
85 $parent_node_obj = $self->addNode($parent_path_obj);
86 }
87 # Insert this node into it's parent.
88 return $parent_node_obj->insertNode($path_obj);
89 }
90# /** addNode() **/
91
92# /** Retrieve the name of the collection this model is drawing from.
93# *
94# * @return The collection name as a string
95# *
96# * @author John Thompson, DL Consulting Ltd.
97# */
98sub getCollection()
99 {
100 my ($self) = @_;
101 print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
102 return $self->{'collection'};
103 }
104# /** getCollection() **/
105
106# /** Retrieve a node from this tree based upon its CLID (OID).
107# * @param $clid The CLID as a string
108# * @return The indicated ClassifyTreeNode or null
109sub getNodeByCLID()
110 {
111 my ($self, $clid) = @_;
112 print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
113 my $result = 0;
114 # Test if this clid is even in our tree
115 if($clid !~ /^$self->{'root'}/)
116 {
117 print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
118 return 0;
119 }
120 # Unfortunately I have to check that there is text to retrieve before I
121 # create a new node.
122
123 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $self->getCollection(), "index", "text");
124 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $self->getCollection(), $index_text_directory_path);
125 if (&dbutil::read_infodb_entry("gdbm", $infodb_file_path, $clid) =~ /\w+/)
126 {
127 # Since the CLID can directly reference the correct entry in the GDBM we
128 # just create the node and return it
129 $result = new ClassifyTreeNode($self, $clid);
130 }
131 return $result;
132 }
133
134# /** Retrieve a node from this tree based upon a path
135# *
136# * @param $path The path to the node as a ClassifyTreePath
137# * @return The indicated ClassifyTreeNode or null
138# *
139# * @author John Thompson, DL Consulting Ltd.
140# */
141sub getNodeByPath()
142 {
143 my ($self, $path_obj) = @_;
144 print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
145 # Starting at the ROOT of the tree, and with the first path component,
146 # recursively descend through the tree looking for the node - we can assume
147 # that we've found the root node (otherwise we won't be in a tree)
148 my $cur_node_obj = $self->getRootNode();
149 my $cur_path_obj = $cur_node_obj->getPath();
150 my $depth = 1;
151 # Continue till we either find the node we want, or run out a nodes
152 while(!$cur_node_obj->getPath()->equals($path_obj))
153 {
154 # Append the path component at this depth to the current path we
155 # are searching for
156 $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
157 $depth++;
158 #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
159 # Search through the current nodes children, looking for one that
160 # matches the current path
161 my $found = 0;
162 foreach my $child_node_obj ($cur_node_obj->getChildren())
163 {
164 #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
165 if($child_node_obj->getPath()->equals($cur_path_obj))
166 {
167 $cur_node_obj = $child_node_obj;
168 $found = 1;
169 last;
170 }
171 }
172 # Couldn't find any node with this path
173 if(!$found)
174 {
175 #rint STDERR "* no such node exists!\n";
176 return 0;
177 }
178 }
179 return $cur_node_obj;
180 }
181# /** getChild() **/
182
183# /** Retrieve the parent node of the given node.
184# *
185# * @param $child_node The node whose parent we want to retrieve
186# * @return The parent node, or 0 if this is the root
187# *
188# * @author John Thompson, DL Consulting Ltd.
189# */
190sub getParentNode()
191 {
192 my ($self, $child_node) = @_;
193 print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
194 return $child_node->getParentNode();
195 }
196# /** getParentNode() **/
197
198sub getRootNode()
199{
200 my ($self) = @_;
201 print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
202 return new ClassifyTreeNode($self, $self->{'root'});
203}
204
205# /** Remove the given document from the classifier tree, and then remove any
206# * empty nodes if required.
207# *
208# * @param $value The value which contains the path of the node to remove
209# * the document from
210# * @param $oid The unique identifier of the document to remove
211# * @param $remove_empty Sets whether empty nodes are removed
212# *
213# * @author John Thompson, DL Consulting Ltd.
214# */
215sub removeDocument()
216 {
217 my ($self, $path, $oid, $remove_empty) = @_;
218 print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
219 # Append to root path
220 my $root_node_obj = $self->getRootNode();
221 my $path_obj = $root_node_obj->getPath();
222 $path_obj->addPathComponent($path);
223 # Retrieve the node in question
224 my $node_obj = $self->getNodeByPath($path_obj);
225 # Check we retrieved a node
226 if ($node_obj)
227 {
228 # Remove the document
229 $node_obj->removeDocument($oid);
230 # If we have been asked to remove empty nodes, do so now.
231 if ($remove_empty)
232 {
233 my $cur_node_obj = $node_obj;
234 my $empty_node_obj = 0;
235 while ($cur_node_obj->getNumLeafDocs() == 0)
236 {
237 $empty_node_obj = $cur_node_obj;
238 $cur_node_obj = $cur_node_obj->getParentNode();
239 }
240 if ($empty_node_obj)
241 {
242 # Try to retrieve the parent of this node
243 my $parent_node_obj = $empty_node_obj->getParentNode();
244 # As long as we have a parent (i.e. we aren't the root node) go
245 # ahead and delete this subtree starting at empty node
246 if ($parent_node_obj)
247 {
248 $parent_node_obj->removeNode($empty_node_obj);
249 }
250 }
251 }
252 }
253 # If the node doesn't exist in this tree, then we can't very well remove
254 # anything from it!
255 }
256# /** removeDocument() **/
257
2581;
Note: See TracBrowser for help on using the repository browser.