source: main/trunk/greenstone2/perllib/ClassifyTreeModel.pm@ 21646

Last change on this file since 21646 was 21646, checked in by mdewsnip, 14 years ago

Changed ClassifyTreeModel constructor to take the infodbtype as a parameter, instead of assuming GDBM. Part of making the code less GDBM-specific.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.1 KB
Line 
1package ClassifyTreeModel;
2
3use ClassifyTreeNode;
4use strict;
5
6
7# /** Constructor
8# *
9# * @param $class The name of the class to bless as a string
10# * @param $collection The name of the collection whose info database we
11# * will be accessing as a string
12# * @param $root The oid of the root node of the classifier as a
13# * string
14# * @return A reference to the ClassifyTreeModel object
15# *
16# * @author John Thompson, DL Consulting Ltd.
17# */
18sub new()
19 {
20 my ($class, $collection, $infodbtype, $root) = @_;
21 my $debug = 0;
22 print STDERR "ClassifyTreeModel.new(\"$collection\", $infodbtype, \"$root\")\n" unless !$debug;
23 # Store the variables
24 my $self = {};
25 $self->{'collection'} = $collection;
26 $self->{'infodbtype'} = $infodbtype;
27 $self->{'debug'} = $debug;
28 $self->{'root'} = $root;
29 # Bless me father for I have sinned
30 bless $self, $class;
31 return $self;
32 }
33# /** new() **/
34
35# /** Given a path and a document id, add this document to the classifier tree
36# * creating any necessary tree nodes first.
37# *
38# * @param $value The path to store this document in
39# * @param $oid Unique identifier of a document
40# *
41# * @author John Thompson, DL Consulting Ltd.
42# */
43sub addDocument()
44 {
45 my ($self, $value, $oid) = @_;
46 print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
47 # Generate a treepath object from the metadata value, remembering to prefix
48 # with the root nodes path
49 my $root_node_obj = $self->getRootNode();
50 my $path_obj = $root_node_obj->getPath();
51 $path_obj->addPathComponent($value);
52 # Ensure that this classifier node, and if necessary its ancestor nodes,
53 # exist in our tree.
54 my $node_obj = $self->getNodeByPath($path_obj);
55 if (!$node_obj)
56 {
57 # The node doesn't exist, so we need to add it
58 $node_obj = $self->addNode($path_obj);
59 }
60 # Add the document to the node.
61 $node_obj->addDocument($oid);
62 # Done.
63 }
64# /** addDocument() **/
65
66# /** Add a node into the tree first ensuring all its parent nodes are inserted
67# * to.
68# *
69# * @param $path_obj The path to insert the new node at
70# *
71# * @author John Thompson, DL Consulting Ltd.
72# */
73sub addNode()
74 {
75 my ($self, $path_obj) = @_;
76 print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
77 # Ensure the parent exists, assuming we aren't at the root
78 my $parent_path_obj = $path_obj->getParentPath();
79 #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
80 my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
81 #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
82 #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
83 if (!$parent_node_obj && !$parent_path_obj->isRootPath())
84 {
85 #rint STDERR "* recursive call!\n";
86 $parent_node_obj = $self->addNode($parent_path_obj);
87 }
88 # Insert this node into it's parent.
89 return $parent_node_obj->insertNode($path_obj);
90 }
91# /** addNode() **/
92
93# /** Retrieve the name of the collection this model is drawing from.
94# *
95# * @return The collection name as a string
96# *
97# * @author John Thompson, DL Consulting Ltd.
98# */
99sub getCollection()
100 {
101 my ($self) = @_;
102 print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
103 return $self->{'collection'};
104 }
105# /** getCollection() **/
106
107# /** Retrieve a node from this tree based upon its CLID (OID).
108# * @param $clid The CLID as a string
109# * @return The indicated ClassifyTreeNode or null
110sub getNodeByCLID()
111 {
112 my ($self, $clid) = @_;
113 print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
114 my $result = 0;
115 # Test if this clid is even in our tree
116 if($clid !~ /^$self->{'root'}/)
117 {
118 print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
119 return 0;
120 }
121 # Unfortunately I have to check that there is text to retrieve before I
122 # create a new node.
123
124 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $self->getCollection(), "index", "text");
125 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->getCollection(), $index_text_directory_path);
126 if (&dbutil::read_infodb_entry($self->{'infodbtype'}, $infodb_file_path, $clid) =~ /\w+/)
127 {
128 # Since the CLID can directly reference the correct entry in the info database we
129 # just create the node and return it
130 $result = new ClassifyTreeNode($self, $clid);
131 }
132 return $result;
133 }
134
135# /** Retrieve a node from this tree based upon a path
136# *
137# * @param $path The path to the node as a ClassifyTreePath
138# * @return The indicated ClassifyTreeNode or null
139# *
140# * @author John Thompson, DL Consulting Ltd.
141# */
142sub getNodeByPath()
143 {
144 my ($self, $path_obj) = @_;
145 print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
146 # Starting at the ROOT of the tree, and with the first path component,
147 # recursively descend through the tree looking for the node - we can assume
148 # that we've found the root node (otherwise we won't be in a tree)
149 my $cur_node_obj = $self->getRootNode();
150 my $cur_path_obj = $cur_node_obj->getPath();
151 my $depth = 1;
152 # Continue till we either find the node we want, or run out a nodes
153 while(!$cur_node_obj->getPath()->equals($path_obj))
154 {
155 # Append the path component at this depth to the current path we
156 # are searching for
157 $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
158 $depth++;
159 #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
160 # Search through the current nodes children, looking for one that
161 # matches the current path
162 my $found = 0;
163 foreach my $child_node_obj ($cur_node_obj->getChildren())
164 {
165 #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
166 if($child_node_obj->getPath()->equals($cur_path_obj))
167 {
168 $cur_node_obj = $child_node_obj;
169 $found = 1;
170 last;
171 }
172 }
173 # Couldn't find any node with this path
174 if(!$found)
175 {
176 #rint STDERR "* no such node exists!\n";
177 return 0;
178 }
179 }
180 return $cur_node_obj;
181 }
182# /** getChild() **/
183
184# /** Retrieve the parent node of the given node.
185# *
186# * @param $child_node The node whose parent we want to retrieve
187# * @return The parent node, or 0 if this is the root
188# *
189# * @author John Thompson, DL Consulting Ltd.
190# */
191sub getParentNode()
192 {
193 my ($self, $child_node) = @_;
194 print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
195 return $child_node->getParentNode();
196 }
197# /** getParentNode() **/
198
199sub getRootNode()
200{
201 my ($self) = @_;
202 print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
203 return new ClassifyTreeNode($self, $self->{'root'});
204}
205
206# /** Remove the given document from the classifier tree, and then remove any
207# * empty nodes if required.
208# *
209# * @param $value The value which contains the path of the node to remove
210# * the document from
211# * @param $oid The unique identifier of the document to remove
212# * @param $remove_empty Sets whether empty nodes are removed
213# *
214# * @author John Thompson, DL Consulting Ltd.
215# */
216sub removeDocument()
217 {
218 my ($self, $path, $oid, $remove_empty) = @_;
219 print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
220 # Append to root path
221 my $root_node_obj = $self->getRootNode();
222 my $path_obj = $root_node_obj->getPath();
223 $path_obj->addPathComponent($path);
224 # Retrieve the node in question
225 my $node_obj = $self->getNodeByPath($path_obj);
226 # Check we retrieved a node
227 if ($node_obj)
228 {
229 # Remove the document
230 $node_obj->removeDocument($oid);
231 # If we have been asked to remove empty nodes, do so now.
232 if ($remove_empty)
233 {
234 my $cur_node_obj = $node_obj;
235 my $empty_node_obj = 0;
236 while ($cur_node_obj->getNumLeafDocs() == 0)
237 {
238 $empty_node_obj = $cur_node_obj;
239 $cur_node_obj = $cur_node_obj->getParentNode();
240 }
241 if ($empty_node_obj)
242 {
243 # Try to retrieve the parent of this node
244 my $parent_node_obj = $empty_node_obj->getParentNode();
245 # As long as we have a parent (i.e. we aren't the root node) go
246 # ahead and delete this subtree starting at empty node
247 if ($parent_node_obj)
248 {
249 $parent_node_obj->removeNode($empty_node_obj);
250 }
251 }
252 }
253 }
254 # If the node doesn't exist in this tree, then we can't very well remove
255 # anything from it!
256 }
257# /** removeDocument() **/
258
2591;
Note: See TracBrowser for help on using the repository browser.