root/main/trunk/greenstone2/perllib/ClassifyTreeModel.pm @ 21563

Revision 21563, 9.0 KB (checked in by mdewsnip, 11 years ago)

Deleted GDBMUtils.pm. What was the point of me spending all that time creating dbutil if people are just going to ignore it and continue writing GDBM-only code?!? Not impressed...

  • Property svn:keywords set to Author Date Id Revision
Line 
1package ClassifyTreeModel;
2
3use ClassifyTreeNode;
4use strict;
5
6
7# /** Constructor
8#  *
9#  *  @param  $class       The name of the class to bless as a string
10#  *  @param  $collection  The name of the collection whose GDBM database we
11#  *                       will be accessing as a string
12#  *  @param  $root        The oid of the root node of the classifier as a
13#  *                       string
14#  *  @return A reference to the ClassifyTreeModel object
15#  *
16#  *  @author John Thompson, DL Consulting Ltd.
17#  */
18sub new()
19  {
20    my ($class, $collection, $root) = @_;
21    my $debug = 0;
22    print STDERR "ClassifyTreeModel.new(\"$collection\", \"$root\")\n" unless !$debug;
23    # Store the variables
24    my $self = {};
25    $self->{'collection'} = $collection;
26    $self->{'debug'} = $debug;
27    $self->{'root'} = $root;
28    # Bless me father for I have sinned
29    bless $self, $class;
30    return $self;
31  }
32# /** new() **/
33
34# /** Given a path and a document id, add this document to the classifier tree
35#  *  creating any necessary tree nodes first.
36#  *
37#  *  @param  $value The path to store this document in
38#  *  @param  $oid Unique identifier of a document
39#  *
40#  *  @author John Thompson, DL Consulting Ltd.
41#  */
42sub addDocument()
43  {
44    my ($self, $value, $oid) = @_;
45    print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
46    # Generate a treepath object from the metadata value, remembering to prefix
47    # with the root nodes path
48    my $root_node_obj = $self->getRootNode();
49    my $path_obj = $root_node_obj->getPath();
50    $path_obj->addPathComponent($value);
51    # Ensure that this classifier node, and if necessary its ancestor nodes,
52    # exist in our tree.
53    my $node_obj = $self->getNodeByPath($path_obj);
54    if (!$node_obj)
55      {
56        # The node doesn't exist, so we need to add it
57        $node_obj = $self->addNode($path_obj);
58      }
59    # Add the document to the node.
60    $node_obj->addDocument($oid);
61    # Done.
62  }
63# /** addDocument() **/
64
65# /** Add a node into the tree first ensuring all its parent nodes are inserted
66#  *  to.
67#  *
68#  *  @param  $path_obj The path to insert the new node at
69#  *
70#  *  @author John Thompson, DL Consulting Ltd.
71#  */
72sub addNode()
73  {
74    my ($self, $path_obj) = @_;
75    print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
76    # Ensure the parent exists, assuming we aren't at the root
77    my $parent_path_obj = $path_obj->getParentPath();
78    #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
79    my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
80    #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
81    #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
82    if (!$parent_node_obj && !$parent_path_obj->isRootPath())
83      {
84        #rint STDERR "* recursive call!\n";
85        $parent_node_obj = $self->addNode($parent_path_obj);
86      }
87    # Insert this node into it's parent.
88    return $parent_node_obj->insertNode($path_obj);
89  }
90# /** addNode() **/
91
92# /** Retrieve the name of the collection this model is drawing from.
93#  *
94#  *  @return The collection name as a string
95#  *
96#  *  @author John Thompson, DL Consulting Ltd.
97#  */
98sub getCollection()
99  {
100    my ($self) = @_;
101    print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
102    return $self->{'collection'};
103  }
104# /** getCollection() **/
105
106# /** Retrieve a node from this tree based upon its CLID (OID).
107#  *  @param  $clid  The CLID as a string
108#  *  @return The indicated ClassifyTreeNode or null
109sub getNodeByCLID()
110  {
111    my ($self, $clid) = @_;
112    print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
113    my $result = 0;
114    # Test if this clid is even in our tree
115    if($clid !~ /^$self->{'root'}/)
116      {
117        print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
118        return 0;
119      }
120    # Unfortunately I have to check that there is text to retrieve before I
121    # create a new node.
122
123    my $index_text_directory_path = &util::filename_cat($ENV{'GSDLHOME'}, "collect", $self->getCollection(), "index", "text");
124    my $infodb_file_path = &dbutil::get_infodb_file_path("gdbm", $self->getCollection(), $index_text_directory_path);
125    if (&dbutil::read_infodb_entry("gdbm", $infodb_file_path, $clid) =~ /\w+/)
126      {
127        # Since the CLID can directly reference the correct entry in the GDBM we
128        # just create the node and return it
129        $result = new ClassifyTreeNode($self, $clid);
130      }
131    return $result;
132  }
133
134# /** Retrieve a node from this tree based upon a path
135#  *
136#  *  @param  $path  The path to the node as a ClassifyTreePath
137#  *  @return The indicated ClassifyTreeNode or null
138#  *
139#  *  @author John Thompson, DL Consulting Ltd.
140#  */
141sub getNodeByPath()
142  {
143    my ($self, $path_obj) = @_;
144    print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
145    # Starting at the ROOT of the tree, and with the first path component,
146    # recursively descend through the tree looking for the node - we can assume
147    # that we've found the root node (otherwise we won't be in a tree)
148    my $cur_node_obj = $self->getRootNode();
149    my $cur_path_obj = $cur_node_obj->getPath();
150    my $depth = 1;
151    # Continue till we either find the node we want, or run out a nodes
152    while(!$cur_node_obj->getPath()->equals($path_obj))
153      {
154        # Append the path component at this depth to the current path we
155        # are searching for
156        $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
157        $depth++;
158        #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
159        # Search through the current nodes children, looking for one that
160        # matches the current path
161        my $found = 0;
162        foreach my $child_node_obj ($cur_node_obj->getChildren())
163          {
164            #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
165            if($child_node_obj->getPath()->equals($cur_path_obj))
166              {
167                $cur_node_obj = $child_node_obj;
168                $found = 1;
169                last;
170              }
171          }
172        # Couldn't find any node with this path
173        if(!$found)
174          {
175            #rint STDERR "* no such node exists!\n";
176            return 0;
177          }
178      }
179    return $cur_node_obj;
180  }
181# /** getChild() **/
182
183# /** Retrieve the parent node of the given node.
184#  *
185#  *  @param  $child_node The node whose parent we want to retrieve
186#  *  @return The parent node, or 0 if this is the root
187#  *
188#  *  @author John Thompson, DL Consulting Ltd.
189#  */
190sub getParentNode()
191  {
192    my ($self, $child_node) = @_;
193    print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
194    return $child_node->getParentNode();
195  }
196# /** getParentNode() **/
197
198sub getRootNode()
199{
200  my ($self) = @_;
201  print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
202  return new ClassifyTreeNode($self, $self->{'root'});
203}
204
205# /** Remove the given document from the classifier tree, and then remove any
206#  *  empty nodes if required.
207#  *
208#  *  @param  $value The value which contains the path of the node to remove
209#  *                 the document from
210#  *  @param  $oid The unique identifier of the document to remove
211#  *  @param  $remove_empty Sets whether empty nodes are removed
212#  *
213#  *  @author John Thompson, DL Consulting Ltd.
214#  */
215sub removeDocument()
216  {
217    my ($self, $path, $oid, $remove_empty) = @_;
218    print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
219    # Append to root path
220    my $root_node_obj = $self->getRootNode();
221    my $path_obj = $root_node_obj->getPath();
222    $path_obj->addPathComponent($path);
223    # Retrieve the node in question
224    my $node_obj = $self->getNodeByPath($path_obj);
225    # Check we retrieved a node
226    if ($node_obj)
227      {
228        # Remove the document
229        $node_obj->removeDocument($oid);
230        # If we have been asked to remove empty nodes, do so now.
231        if ($remove_empty)
232          {
233            my $cur_node_obj = $node_obj;
234            my $empty_node_obj = 0;
235            while ($cur_node_obj->getNumLeafDocs() == 0)
236              {
237                $empty_node_obj = $cur_node_obj;
238                $cur_node_obj = $cur_node_obj->getParentNode();
239              }
240            if ($empty_node_obj)
241              {
242                # Try to retrieve the parent of this node
243                my $parent_node_obj = $empty_node_obj->getParentNode();
244                # As long as we have a parent (i.e. we aren't the root node) go
245                # ahead and delete this subtree starting at empty node
246                if ($parent_node_obj)
247                  {
248                    $parent_node_obj->removeNode($empty_node_obj);
249                  }
250              }
251          }
252      }
253    # If the node doesn't exist in this tree, then we can't very well remove
254    # anything from it!
255  }
256# /** removeDocument() **/
257
2581;
Note: See TracBrowser for help on using the browser.