root/main/trunk/greenstone2/perllib/ClassifyTreeModel.pm @ 21563

Revision 21563, 9.0 KB (checked in by mdewsnip, 9 years ago)

Deleted GDBMUtils.pm. What was the point of me spending all that time creating dbutil if people are just going to ignore it and continue writing GDBM-only code?!? Not impressed...

  • Property svn:keywords set to Author Date Id Revision
Line 
1package ClassifyTreeModel;
2
3use ClassifyTreeNode;
4use strict;
5
6
7# /** Constructor
8#  *
9#  *  @param  $class       The name of the class to bless as a string
10#  *  @param  $collection  The name of the collection whose GDBM database we
11#  *                       will be accessing as a string
12#  *  @param  $root        The oid of the root node of the classifier as a
13#  *                       string
14#  *  @return A reference to the ClassifyTreeModel object
15#  *
16#  *  @author John Thompson, DL Consulting Ltd.
17#  */
18sub new()
19  {
20    my ($class, $collection, $root) = @_;
21    my $debug = 0;
22    print STDERR "ClassifyTreeModel.new(\"$collection\", \"$root\")\n" unless !$debug;
23    # Store the variables
24    my $self = {};
25    $self->{'collection'} = $collection;
26    $self->{'debug'} = $debug;
27    $self->{'root'} = $root;
28    # Bless me father for I have sinned
29    bless $self, $class;
30    return $self;
31  }
32# /** new() **/
33
34# /** Given a path and a document id, add this document to the classifier tree
35#  *  creating any necessary tree nodes first.
36#  *
37#  *  @param  $value The path to store this document in
38#  *  @param  $oid Unique identifier of a document
39#  *
40#  *  @author John Thompson, DL Consulting Ltd.
41#  */
42sub addDocument()
43  {
44    my ($self, $value, $oid) = @_;
45    print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
46    # Generate a treepath object from the metadata value, remembering to prefix
47    # with the root nodes path
48    my $root_node_obj = $self->getRootNode();
49    my $path_obj = $root_node_obj->getPath();
50    $path_obj->addPathComponent($value);
51    # Ensure that this classifier node, and if necessary its ancestor nodes,
52    # exist in our tree.
53    my $node_obj = $self->getNodeByPath($path_obj);
54    if (!$node_obj)
55      {
56        # The node doesn't exist, so we need to add it
57        $node_obj = $self->addNode($path_obj);
58      }
59    # Add the document to the node.
60    $node_obj->addDocument($oid);
61    # Done.
62  }
63# /** addDocument() **/
64
65# /** Add a node into the tree first ensuring all its parent nodes are inserted
66#  *  to.
67#  *
68#  *  @param  $path_obj The path to insert the new node at
69#  *
70#  *  @author John Thompson, DL Consulting Ltd.
71#  */
72sub addNode()
73  {
74    my ($self, $path_obj) = @_;
75    print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
76    # Ensure the parent exists, assuming we aren't at the root
77    my $parent_path_obj = $path_obj->getParentPath();
78    #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
79    my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
80    #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
81    #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
82    if (!$parent_node_obj && !$parent_path_obj->isRootPath())
83      {
84        #rint STDERR "* recursive call!\n";
85        $parent_node_obj = $self->addNode($parent_path_obj);
86      }
87    # Insert this node into it's parent.
88    return $parent_node_obj->insertNode($path_obj);
89  }
90# /** addNode() **/
91
92# /** Retrieve the name of the collection this model is drawing from.
93#  *
94#  *  @return The collection name as a string
95#  *
96#  *  @author John Thompson, DL Consulting Ltd.
97#  */
98sub getCollection()
99  {
100    my ($self) = @_;
101    print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
102    return $self->{'collection'};
103  }
104# /** getCollection() **/
105
106# /** Retrieve a node from this tree based upon its CLID (OID).
107#  *  @param  $clid  The CLID as a string
108#  *  @return The indicated ClassifyTreeNode or null
109sub getNodeByCLID()
110  {
111    my ($self, $clid) = @_;
112    print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
113    my $result = 0;
114    # Test if this clid is even in our tree
115    if($clid !~ /^$self->{'root'}/)
116      {
117        print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
118        return 0;
119      }
120    # Unfortunately I have to check that there is text to retrieve before I
121    # create a new node.
122
123    my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $self->getCollection(), "index", "text");
124    my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $self->getCollection(), $index_text_directory_path);
125    if (&dbutil::read_infodb_entry("gdbm", $infodb_file_path, $clid) =~ /\w+/)
126      {
127        # Since the CLID can directly reference the correct entry in the GDBM we
128        # just create the node and return it
129        $result = new ClassifyTreeNode($self, $clid);
130      }
131    return $result;
132  }
133
134# /** Retrieve a node from this tree based upon a path
135#  *
136#  *  @param  $path  The path to the node as a ClassifyTreePath
137#  *  @return The indicated ClassifyTreeNode or null
138#  *
139#  *  @author John Thompson, DL Consulting Ltd.
140#  */
141sub getNodeByPath()
142  {
143    my ($self, $path_obj) = @_;
144    print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
145    # Starting at the ROOT of the tree, and with the first path component,
146    # recursively descend through the tree looking for the node - we can assume
147    # that we've found the root node (otherwise we won't be in a tree)
148    my $cur_node_obj = $self->getRootNode();
149    my $cur_path_obj = $cur_node_obj->getPath();
150    my $depth = 1;
151    # Continue till we either find the node we want, or run out a nodes
152    while(!$cur_node_obj->getPath()->equals($path_obj))
153      {
154        # Append the path component at this depth to the current path we
155        # are searching for
156        $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
157        $depth++;
158        #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
159        # Search through the current nodes children, looking for one that
160        # matches the current path
161        my $found = 0;
162        foreach my $child_node_obj ($cur_node_obj->getChildren())
163          {
164            #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
165            if($child_node_obj->getPath()->equals($cur_path_obj))
166              {
167                $cur_node_obj = $child_node_obj;
168                $found = 1;
169                last;
170              }
171          }
172        # Couldn't find any node with this path
173        if(!$found)
174          {
175            #rint STDERR "* no such node exists!\n";
176            return 0;
177          }
178      }
179    return $cur_node_obj;
180  }
181# /** getChild() **/
182
183# /** Retrieve the parent node of the given node.
184#  *
185#  *  @param  $child_node The node whose parent we want to retrieve
186#  *  @return The parent node, or 0 if this is the root
187#  *
188#  *  @author John Thompson, DL Consulting Ltd.
189#  */
190sub getParentNode()
191  {
192    my ($self, $child_node) = @_;
193    print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
194    return $child_node->getParentNode();
195  }
196# /** getParentNode() **/
197
198sub getRootNode()
199{
200  my ($self) = @_;
201  print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
202  return new ClassifyTreeNode($self, $self->{'root'});
203}
204
205# /** Remove the given document from the classifier tree, and then remove any
206#  *  empty nodes if required.
207#  *
208#  *  @param  $value The value which contains the path of the node to remove
209#  *                 the document from
210#  *  @param  $oid The unique identifier of the document to remove
211#  *  @param  $remove_empty Sets whether empty nodes are removed
212#  *
213#  *  @author John Thompson, DL Consulting Ltd.
214#  */
215sub removeDocument()
216  {
217    my ($self, $path, $oid, $remove_empty) = @_;
218    print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
219    # Append to root path
220    my $root_node_obj = $self->getRootNode();
221    my $path_obj = $root_node_obj->getPath();
222    $path_obj->addPathComponent($path);
223    # Retrieve the node in question
224    my $node_obj = $self->getNodeByPath($path_obj);
225    # Check we retrieved a node
226    if ($node_obj)
227      {
228        # Remove the document
229        $node_obj->removeDocument($oid);
230        # If we have been asked to remove empty nodes, do so now.
231        if ($remove_empty)
232          {
233            my $cur_node_obj = $node_obj;
234            my $empty_node_obj = 0;
235            while ($cur_node_obj->getNumLeafDocs() == 0)
236              {
237                $empty_node_obj = $cur_node_obj;
238                $cur_node_obj = $cur_node_obj->getParentNode();
239              }
240            if ($empty_node_obj)
241              {
242                # Try to retrieve the parent of this node
243                my $parent_node_obj = $empty_node_obj->getParentNode();
244                # As long as we have a parent (i.e. we aren't the root node) go
245                # ahead and delete this subtree starting at empty node
246                if ($parent_node_obj)
247                  {
248                    $parent_node_obj->removeNode($empty_node_obj);
249                  }
250              }
251          }
252      }
253    # If the node doesn't exist in this tree, then we can't very well remove
254    # anything from it!
255  }
256# /** removeDocument() **/
257
2581;
Note: See TracBrowser for help on using the browser.