source: main/trunk/greenstone2/perllib/ClassifyTreeModel.pm@ 21562

Last change on this file since 21562 was 21561, checked in by mdewsnip, 14 years ago

Changed calls to GDBMUtils::gdbmCachedCollectionGet() to dbutil::read_infodb_entry(). Part of removing GDBMUtils.pm and making the code less GDBM-specific.

  • Property svn:keywords set to Author Date Id Revision
File size: 9.1 KB
Line 
1package ClassifyTreeModel;
2
3use ClassifyTreeNode;
4use GDBMUtils;
5use strict;
6
7# /** Constructor
8# *
9# * @param $class The name of the class to bless as a string
10# * @param $collection The name of the collection whose GDBM database we
11# * will be accessing as a string
12# * @param $root The oid of the root node of the classifier as a
13# * string
14# * @return A reference to the ClassifyTreeModel object
15# *
16# * @author John Thompson, DL Consulting Ltd.
17# */
18sub new()
19 {
20 my ($class, $collection, $root) = @_;
21 my $debug = 0;
22 print STDERR "ClassifyTreeModel.new(\"$collection\", \"$root\")\n" unless !$debug;
23 # Store the variables
24 my $self = {};
25 $self->{'collection'} = $collection;
26 $self->{'debug'} = $debug;
27 $self->{'root'} = $root;
28 # Bless me father for I have sinned
29 bless $self, $class;
30 return $self;
31 }
32# /** new() **/
33
34# /** Given a path and a document id, add this document to the classifier tree
35# * creating any necessary tree nodes first.
36# *
37# * @param $value The path to store this document in
38# * @param $oid Unique identifier of a document
39# *
40# * @author John Thompson, DL Consulting Ltd.
41# */
42sub addDocument()
43 {
44 my ($self, $value, $oid) = @_;
45 print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
46 # Generate a treepath object from the metadata value, remembering to prefix
47 # with the root nodes path
48 my $root_node_obj = $self->getRootNode();
49 my $path_obj = $root_node_obj->getPath();
50 $path_obj->addPathComponent($value);
51 # Ensure that this classifier node, and if necessary its ancestor nodes,
52 # exist in our tree.
53 my $node_obj = $self->getNodeByPath($path_obj);
54 if (!$node_obj)
55 {
56 # The node doesn't exist, so we need to add it
57 $node_obj = $self->addNode($path_obj);
58 }
59 # Add the document to the node.
60 $node_obj->addDocument($oid);
61 # Done.
62 }
63# /** addDocument() **/
64
65# /** Add a node into the tree first ensuring all its parent nodes are inserted
66# * to.
67# *
68# * @param $path_obj The path to insert the new node at
69# *
70# * @author John Thompson, DL Consulting Ltd.
71# */
72sub addNode()
73 {
74 my ($self, $path_obj) = @_;
75 print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
76 # Ensure the parent exists, assuming we aren't at the root
77 my $parent_path_obj = $path_obj->getParentPath();
78 #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
79 my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
80 #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
81 #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
82 if (!$parent_node_obj && !$parent_path_obj->isRootPath())
83 {
84 #rint STDERR "* recursive call!\n";
85 $parent_node_obj = $self->addNode($parent_path_obj);
86 }
87 # Insert this node into it's parent.
88 return $parent_node_obj->insertNode($path_obj);
89 }
90# /** addNode() **/
91
92# /** Retrieve the name of the collection this model is drawing from.
93# *
94# * @return The collection name as a string
95# *
96# * @author John Thompson, DL Consulting Ltd.
97# */
98sub getCollection()
99 {
100 my ($self) = @_;
101 print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
102 return $self->{'collection'};
103 }
104# /** getCollection() **/
105
106# /** Retrieve a node from this tree based upon its CLID (OID).
107# * @param $clid The CLID as a string
108# * @return The indicated ClassifyTreeNode or null
109sub getNodeByCLID()
110 {
111 my ($self, $clid) = @_;
112 print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
113 my $result = 0;
114 # Test if this clid is even in our tree
115 if($clid !~ /^$self->{'root'}/)
116 {
117 print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
118 return 0;
119 }
120 # Unfortunately I have to check that there is text to retrieve before I
121 # create a new node.
122
123 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $self->getCollection(), "index", "text");
124 my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $self->getCollection(), $index_text_directory_path);
125 if (&dbutil::read_infodb_entry("gdbm", $infodb_file_path, $clid) =~ /\w+/)
126 {
127 # Since the CLID can directly reference the correct entry in the GDBM we
128 # just create the node and return it
129 $result = new ClassifyTreeNode($self, $clid);
130 }
131 return $result;
132 }
133
134# /** Retrieve a node from this tree based upon a path
135# *
136# * @param $path The path to the node as a ClassifyTreePath
137# * @return The indicated ClassifyTreeNode or null
138# *
139# * @author John Thompson, DL Consulting Ltd.
140# */
141sub getNodeByPath()
142 {
143 my ($self, $path_obj) = @_;
144 print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
145 # Starting at the ROOT of the tree, and with the first path component,
146 # recursively descend through the tree looking for the node - we can assume
147 # that we've found the root node (otherwise we won't be in a tree)
148 my $cur_node_obj = $self->getRootNode();
149 my $cur_path_obj = $cur_node_obj->getPath();
150 my $depth = 1;
151 # Continue till we either find the node we want, or run out a nodes
152 while(!$cur_node_obj->getPath()->equals($path_obj))
153 {
154 # Append the path component at this depth to the current path we
155 # are searching for
156 $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
157 $depth++;
158 #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
159 # Search through the current nodes children, looking for one that
160 # matches the current path
161 my $found = 0;
162 foreach my $child_node_obj ($cur_node_obj->getChildren())
163 {
164 #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
165 if($child_node_obj->getPath()->equals($cur_path_obj))
166 {
167 $cur_node_obj = $child_node_obj;
168 $found = 1;
169 last;
170 }
171 }
172 # Couldn't find any node with this path
173 if(!$found)
174 {
175 #rint STDERR "* no such node exists!\n";
176 return 0;
177 }
178 }
179 return $cur_node_obj;
180 }
181# /** getChild() **/
182
183# /** Retrieve the parent node of the given node.
184# *
185# * @param $child_node The node whose parent we want to retrieve
186# * @return The parent node, or 0 if this is the root
187# *
188# * @author John Thompson, DL Consulting Ltd.
189# */
190sub getParentNode()
191 {
192 my ($self, $child_node) = @_;
193 print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
194 return $child_node->getParentNode();
195 }
196# /** getParentNode() **/
197
198sub getRootNode()
199{
200 my ($self) = @_;
201 print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
202 return new ClassifyTreeNode($self, $self->{'root'});
203}
204
205# /** Remove the given document from the classifier tree, and then remove any
206# * empty nodes if required.
207# *
208# * @param $value The value which contains the path of the node to remove
209# * the document from
210# * @param $oid The unique identifier of the document to remove
211# * @param $remove_empty Sets whether empty nodes are removed
212# *
213# * @author John Thompson, DL Consulting Ltd.
214# */
215sub removeDocument()
216 {
217 my ($self, $path, $oid, $remove_empty) = @_;
218 print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
219 # Append to root path
220 my $root_node_obj = $self->getRootNode();
221 my $path_obj = $root_node_obj->getPath();
222 $path_obj->addPathComponent($path);
223 # Retrieve the node in question
224 my $node_obj = $self->getNodeByPath($path_obj);
225 # Check we retrieved a node
226 if ($node_obj)
227 {
228 # Remove the document
229 $node_obj->removeDocument($oid);
230 # If we have been asked to remove empty nodes, do so now.
231 if ($remove_empty)
232 {
233 my $cur_node_obj = $node_obj;
234 my $empty_node_obj = 0;
235 while ($cur_node_obj->getNumLeafDocs() == 0)
236 {
237 $empty_node_obj = $cur_node_obj;
238 $cur_node_obj = $cur_node_obj->getParentNode();
239 }
240 if ($empty_node_obj)
241 {
242 # Try to retrieve the parent of this node
243 my $parent_node_obj = $empty_node_obj->getParentNode();
244 # As long as we have a parent (i.e. we aren't the root node) go
245 # ahead and delete this subtree starting at empty node
246 if ($parent_node_obj)
247 {
248 $parent_node_obj->removeNode($empty_node_obj);
249 }
250 }
251 }
252 }
253 # If the node doesn't exist in this tree, then we can't very well remove
254 # anything from it!
255 }
256# /** removeDocument() **/
257
2581;
Note: See TracBrowser for help on using the repository browser.