source: main/trunk/greenstone2/perllib/ClassifyTreeModel.pm@ 22352

Last change on this file since 22352 was 21661, checked in by mdewsnip, 14 years ago

Changed ClassifyTreeNode so it gets the infodbtype from the model, instead of assuming GDBM. Part of making the code less GDBM-specific.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.2 KB
Line 
1package ClassifyTreeModel;
2
3use ClassifyTreeNode;
4use strict;
5
6
7# /** Constructor
8# *
9# * @param $class The name of the class to bless as a string
10# * @param $collection The name of the collection whose info database we
11# * will be accessing as a string
12# * @param $root The oid of the root node of the classifier as a
13# * string
14# * @return A reference to the ClassifyTreeModel object
15# *
16# * @author John Thompson, DL Consulting Ltd.
17# */
18sub new()
19 {
20 my ($class, $collection, $infodbtype, $root) = @_;
21 my $debug = 0;
22 print STDERR "ClassifyTreeModel.new(\"$collection\", $infodbtype, \"$root\")\n" unless !$debug;
23 # Store the variables
24 my $self = {};
25 $self->{'collection'} = $collection;
26 $self->{'infodbtype'} = $infodbtype;
27 $self->{'debug'} = $debug;
28 $self->{'root'} = $root;
29 # Bless me father for I have sinned
30 bless $self, $class;
31 return $self;
32 }
33# /** new() **/
34
35# /** Given a path and a document id, add this document to the classifier tree
36# * creating any necessary tree nodes first.
37# *
38# * @param $value The path to store this document in
39# * @param $oid Unique identifier of a document
40# *
41# * @author John Thompson, DL Consulting Ltd.
42# */
43sub addDocument()
44 {
45 my ($self, $value, $oid) = @_;
46 print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
47 # Generate a treepath object from the metadata value, remembering to prefix
48 # with the root nodes path
49 my $root_node_obj = $self->getRootNode();
50 my $path_obj = $root_node_obj->getPath();
51 $path_obj->addPathComponent($value);
52 # Ensure that this classifier node, and if necessary its ancestor nodes,
53 # exist in our tree.
54 my $node_obj = $self->getNodeByPath($path_obj);
55 if (!$node_obj)
56 {
57 # The node doesn't exist, so we need to add it
58 $node_obj = $self->addNode($path_obj);
59 }
60 # Add the document to the node.
61 $node_obj->addDocument($oid);
62 # Done.
63 }
64# /** addDocument() **/
65
66# /** Add a node into the tree first ensuring all its parent nodes are inserted
67# * to.
68# *
69# * @param $path_obj The path to insert the new node at
70# *
71# * @author John Thompson, DL Consulting Ltd.
72# */
73sub addNode()
74 {
75 my ($self, $path_obj) = @_;
76 print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
77 # Ensure the parent exists, assuming we aren't at the root
78 my $parent_path_obj = $path_obj->getParentPath();
79 #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
80 my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
81 #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
82 #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
83 if (!$parent_node_obj && !$parent_path_obj->isRootPath())
84 {
85 #rint STDERR "* recursive call!\n";
86 $parent_node_obj = $self->addNode($parent_path_obj);
87 }
88 # Insert this node into it's parent.
89 return $parent_node_obj->insertNode($path_obj);
90 }
91# /** addNode() **/
92
93# /** Retrieve the name of the collection this model is drawing from.
94# *
95# * @return The collection name as a string
96# *
97# * @author John Thompson, DL Consulting Ltd.
98# */
99sub getCollection()
100 {
101 my ($self) = @_;
102 print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
103 return $self->{'collection'};
104 }
105# /** getCollection() **/
106
107sub getInfoDBType()
108{
109 my $self = shift(@_);
110 return $self->{'infodbtype'};
111}
112
113# /** Retrieve a node from this tree based upon its CLID (OID).
114# * @param $clid The CLID as a string
115# * @return The indicated ClassifyTreeNode or null
116sub getNodeByCLID()
117 {
118 my ($self, $clid) = @_;
119 print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
120 my $result = 0;
121 # Test if this clid is even in our tree
122 if($clid !~ /^$self->{'root'}/)
123 {
124 print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
125 return 0;
126 }
127 # Unfortunately I have to check that there is text to retrieve before I
128 # create a new node.
129
130 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $self->getCollection(), "index", "text");
131 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->getCollection(), $index_text_directory_path);
132 if (&dbutil::read_infodb_entry($self->{'infodbtype'}, $infodb_file_path, $clid) =~ /\w+/)
133 {
134 # Since the CLID can directly reference the correct entry in the info database we
135 # just create the node and return it
136 $result = new ClassifyTreeNode($self, $clid);
137 }
138 return $result;
139 }
140
141# /** Retrieve a node from this tree based upon a path
142# *
143# * @param $path The path to the node as a ClassifyTreePath
144# * @return The indicated ClassifyTreeNode or null
145# *
146# * @author John Thompson, DL Consulting Ltd.
147# */
148sub getNodeByPath()
149 {
150 my ($self, $path_obj) = @_;
151 print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
152 # Starting at the ROOT of the tree, and with the first path component,
153 # recursively descend through the tree looking for the node - we can assume
154 # that we've found the root node (otherwise we won't be in a tree)
155 my $cur_node_obj = $self->getRootNode();
156 my $cur_path_obj = $cur_node_obj->getPath();
157 my $depth = 1;
158 # Continue till we either find the node we want, or run out a nodes
159 while(!$cur_node_obj->getPath()->equals($path_obj))
160 {
161 # Append the path component at this depth to the current path we
162 # are searching for
163 $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
164 $depth++;
165 #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
166 # Search through the current nodes children, looking for one that
167 # matches the current path
168 my $found = 0;
169 foreach my $child_node_obj ($cur_node_obj->getChildren())
170 {
171 #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
172 if($child_node_obj->getPath()->equals($cur_path_obj))
173 {
174 $cur_node_obj = $child_node_obj;
175 $found = 1;
176 last;
177 }
178 }
179 # Couldn't find any node with this path
180 if(!$found)
181 {
182 #rint STDERR "* no such node exists!\n";
183 return 0;
184 }
185 }
186 return $cur_node_obj;
187 }
188# /** getChild() **/
189
190# /** Retrieve the parent node of the given node.
191# *
192# * @param $child_node The node whose parent we want to retrieve
193# * @return The parent node, or 0 if this is the root
194# *
195# * @author John Thompson, DL Consulting Ltd.
196# */
197sub getParentNode()
198 {
199 my ($self, $child_node) = @_;
200 print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
201 return $child_node->getParentNode();
202 }
203# /** getParentNode() **/
204
205sub getRootNode()
206{
207 my ($self) = @_;
208 print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
209 return new ClassifyTreeNode($self, $self->{'root'});
210}
211
212# /** Remove the given document from the classifier tree, and then remove any
213# * empty nodes if required.
214# *
215# * @param $value The value which contains the path of the node to remove
216# * the document from
217# * @param $oid The unique identifier of the document to remove
218# * @param $remove_empty Sets whether empty nodes are removed
219# *
220# * @author John Thompson, DL Consulting Ltd.
221# */
222sub removeDocument()
223 {
224 my ($self, $path, $oid, $remove_empty) = @_;
225 print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
226 # Append to root path
227 my $root_node_obj = $self->getRootNode();
228 my $path_obj = $root_node_obj->getPath();
229 $path_obj->addPathComponent($path);
230 # Retrieve the node in question
231 my $node_obj = $self->getNodeByPath($path_obj);
232 # Check we retrieved a node
233 if ($node_obj)
234 {
235 # Remove the document
236 $node_obj->removeDocument($oid);
237 # If we have been asked to remove empty nodes, do so now.
238 if ($remove_empty)
239 {
240 my $cur_node_obj = $node_obj;
241 my $empty_node_obj = 0;
242 while ($cur_node_obj->getNumLeafDocs() == 0)
243 {
244 $empty_node_obj = $cur_node_obj;
245 $cur_node_obj = $cur_node_obj->getParentNode();
246 }
247 if ($empty_node_obj)
248 {
249 # Try to retrieve the parent of this node
250 my $parent_node_obj = $empty_node_obj->getParentNode();
251 # As long as we have a parent (i.e. we aren't the root node) go
252 # ahead and delete this subtree starting at empty node
253 if ($parent_node_obj)
254 {
255 $parent_node_obj->removeNode($empty_node_obj);
256 }
257 }
258 }
259 }
260 # If the node doesn't exist in this tree, then we can't very well remove
261 # anything from it!
262 }
263# /** removeDocument() **/
264
2651;
Note: See TracBrowser for help on using the repository browser.