source: main/trunk/greenstone2/perllib/ClassifyTreeModel.pm@ 32578

Last change on this file since 32578 was 23485, checked in by davidb, 13 years ago

read_infodb_entry now returns a hashmap directly. Code updated to take advantage of this, and in places where the hashmap is not needed, the alternative read_infodb_rawentry is called.

  • Property svn:keywords set to Author Date Id Revision
File size: 10.3 KB
Line 
1###########################################################################
2#
3# ClassifyTreeModel.pm --
4#
5# A component of the Greenstone digital library software
6# from the New Zealand Digital Library Project at the
7# University of Waikato, New Zealand.
8#
9# Copyright (C) 2006-2010 DL Consulting Ltd
10#
11# This program is free software; you can redistribute it and/or modify
12# it under the terms of the GNU General Public License as published by
13# the Free Software Foundation; either version 2 of the License, or
14# (at your option) any later version.
15#
16# This program is distributed in the hope that it will be useful,
17# but WITHOUT ANY WARRANTY; without even the implied warranty of
18# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19# GNU General Public License for more details.
20#
21# You should have received a copy of the GNU General Public License
22# along with this program; if not, write to the Free Software
23# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24#
25###########################################################################
26
27
28package ClassifyTreeModel;
29
30use ClassifyTreeNode;
31use strict;
32
33
34# /** Constructor
35# *
36# * @param $class The name of the class to bless as a string
37# * @param $collection The name of the collection whose info database we
38# * will be accessing as a string
39# * @param $root The oid of the root node of the classifier as a
40# * string
41# * @return A reference to the ClassifyTreeModel object
42# *
43# * @author John Thompson, DL Consulting Ltd.
44# */
45sub new()
46 {
47 my ($class, $collection, $infodbtype, $root) = @_;
48 my $debug = 0;
49 print STDERR "ClassifyTreeModel.new(\"$collection\", $infodbtype, \"$root\")\n" unless !$debug;
50 # Store the variables
51 my $self = {};
52 $self->{'collection'} = $collection;
53 $self->{'infodbtype'} = $infodbtype;
54 $self->{'debug'} = $debug;
55 $self->{'root'} = $root;
56 # Bless me father for I have sinned
57 bless $self, $class;
58 return $self;
59 }
60# /** new() **/
61
62# /** Given a path and a document id, add this document to the classifier tree
63# * creating any necessary tree nodes first.
64# *
65# * @param $value The path to store this document in
66# * @param $oid Unique identifier of a document
67# *
68# * @author John Thompson, DL Consulting Ltd.
69# */
70sub addDocument()
71 {
72 my ($self, $value, $oid) = @_;
73 print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
74 # Generate a treepath object from the metadata value, remembering to prefix
75 # with the root nodes path
76 my $root_node_obj = $self->getRootNode();
77 my $path_obj = $root_node_obj->getPath();
78 $path_obj->addPathComponent($value);
79 # Ensure that this classifier node, and if necessary its ancestor nodes,
80 # exist in our tree.
81 my $node_obj = $self->getNodeByPath($path_obj);
82 if (!$node_obj)
83 {
84 # The node doesn't exist, so we need to add it
85 $node_obj = $self->addNode($path_obj);
86 }
87 # Add the document to the node.
88 $node_obj->addDocument($oid);
89 # Done.
90 }
91# /** addDocument() **/
92
93# /** Add a node into the tree first ensuring all its parent nodes are inserted
94# * to.
95# *
96# * @param $path_obj The path to insert the new node at
97# *
98# * @author John Thompson, DL Consulting Ltd.
99# */
100sub addNode()
101 {
102 my ($self, $path_obj) = @_;
103 print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
104 # Ensure the parent exists, assuming we aren't at the root
105 my $parent_path_obj = $path_obj->getParentPath();
106 #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
107 my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
108 #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
109 #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
110 if (!$parent_node_obj && !$parent_path_obj->isRootPath())
111 {
112 #rint STDERR "* recursive call!\n";
113 $parent_node_obj = $self->addNode($parent_path_obj);
114 }
115 # Insert this node into it's parent.
116 return $parent_node_obj->insertNode($path_obj);
117 }
118# /** addNode() **/
119
120# /** Retrieve the name of the collection this model is drawing from.
121# *
122# * @return The collection name as a string
123# *
124# * @author John Thompson, DL Consulting Ltd.
125# */
126sub getCollection()
127 {
128 my ($self) = @_;
129 print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
130 return $self->{'collection'};
131 }
132# /** getCollection() **/
133
134sub getInfoDBType()
135{
136 my $self = shift(@_);
137 return $self->{'infodbtype'};
138}
139
140# /** Retrieve a node from this tree based upon its CLID (OID).
141# * @param $clid The CLID as a string
142# * @return The indicated ClassifyTreeNode or null
143sub getNodeByCLID()
144 {
145 my ($self, $clid) = @_;
146 print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
147 my $result = 0;
148 # Test if this clid is even in our tree
149 if($clid !~ /^$self->{'root'}/)
150 {
151 print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
152 return 0;
153 }
154 # Unfortunately I have to check that there is text to retrieve before I
155 # create a new node.
156
157 my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $self->getCollection(), "index", "text");
158 my $infodb_file_path = &dbutil::get_infodb_file_path($self->{'infodbtype'}, $self->getCollection(), $index_text_directory_path);
159 if (&dbutil::read_infodb_rawentry($self->{'infodbtype'}, $infodb_file_path, $clid) =~ /\w+/)
160 {
161 # Since the CLID can directly reference the correct entry in the info database we
162 # just create the node and return it
163 $result = new ClassifyTreeNode($self, $clid);
164 }
165 return $result;
166 }
167
168# /** Retrieve a node from this tree based upon a path
169# *
170# * @param $path The path to the node as a ClassifyTreePath
171# * @return The indicated ClassifyTreeNode or null
172# *
173# * @author John Thompson, DL Consulting Ltd.
174# */
175sub getNodeByPath()
176 {
177 my ($self, $path_obj) = @_;
178 print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
179 # Starting at the ROOT of the tree, and with the first path component,
180 # recursively descend through the tree looking for the node - we can assume
181 # that we've found the root node (otherwise we won't be in a tree)
182 my $cur_node_obj = $self->getRootNode();
183 my $cur_path_obj = $cur_node_obj->getPath();
184 my $depth = 1;
185 # Continue till we either find the node we want, or run out a nodes
186 while(!$cur_node_obj->getPath()->equals($path_obj))
187 {
188 # Append the path component at this depth to the current path we
189 # are searching for
190 $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
191 $depth++;
192 #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
193 # Search through the current nodes children, looking for one that
194 # matches the current path
195 my $found = 0;
196 foreach my $child_node_obj ($cur_node_obj->getChildren())
197 {
198 #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
199 if($child_node_obj->getPath()->equals($cur_path_obj))
200 {
201 $cur_node_obj = $child_node_obj;
202 $found = 1;
203 last;
204 }
205 }
206 # Couldn't find any node with this path
207 if(!$found)
208 {
209 #rint STDERR "* no such node exists!\n";
210 return 0;
211 }
212 }
213 return $cur_node_obj;
214 }
215# /** getChild() **/
216
217# /** Retrieve the parent node of the given node.
218# *
219# * @param $child_node The node whose parent we want to retrieve
220# * @return The parent node, or 0 if this is the root
221# *
222# * @author John Thompson, DL Consulting Ltd.
223# */
224sub getParentNode()
225 {
226 my ($self, $child_node) = @_;
227 print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
228 return $child_node->getParentNode();
229 }
230# /** getParentNode() **/
231
232sub getRootNode()
233{
234 my ($self) = @_;
235 print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
236 return new ClassifyTreeNode($self, $self->{'root'});
237}
238
239# /** Remove the given document from the classifier tree, and then remove any
240# * empty nodes if required.
241# *
242# * @param $value The value which contains the path of the node to remove
243# * the document from
244# * @param $oid The unique identifier of the document to remove
245# * @param $remove_empty Sets whether empty nodes are removed
246# *
247# * @author John Thompson, DL Consulting Ltd.
248# */
249sub removeDocument()
250 {
251 my ($self, $path, $oid, $remove_empty) = @_;
252 print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
253 # Append to root path
254 my $root_node_obj = $self->getRootNode();
255 my $path_obj = $root_node_obj->getPath();
256 $path_obj->addPathComponent($path);
257 # Retrieve the node in question
258 my $node_obj = $self->getNodeByPath($path_obj);
259 # Check we retrieved a node
260 if ($node_obj)
261 {
262 # Remove the document
263 $node_obj->removeDocument($oid);
264 # If we have been asked to remove empty nodes, do so now.
265 if ($remove_empty)
266 {
267 my $cur_node_obj = $node_obj;
268 my $empty_node_obj = 0;
269 while ($cur_node_obj->getNumLeafDocs() == 0)
270 {
271 $empty_node_obj = $cur_node_obj;
272 $cur_node_obj = $cur_node_obj->getParentNode();
273 }
274 if ($empty_node_obj)
275 {
276 # Try to retrieve the parent of this node
277 my $parent_node_obj = $empty_node_obj->getParentNode();
278 # As long as we have a parent (i.e. we aren't the root node) go
279 # ahead and delete this subtree starting at empty node
280 if ($parent_node_obj)
281 {
282 $parent_node_obj->removeNode($empty_node_obj);
283 }
284 }
285 }
286 }
287 # If the node doesn't exist in this tree, then we can't very well remove
288 # anything from it!
289 }
290# /** removeDocument() **/
291
2921;
Note: See TracBrowser for help on using the repository browser.