root/gsdl/trunk/perllib/ClassifyTreeModel.pm @ 17110

Revision 17087, 8.8 KB (checked in by davidb, 11 years ago)

Introduction of new GDBM alternative for archives.inf as step towards full incremental building. Information traditionally stored in archives.inf PLUS additional information that will help with working out what files have changed since last build, and what doc-id they hashed to is stored in two GDBM databases. For now these databases aren't read, but in the future ArchivesInfPlugin? will be upgraded to use these to support these.

  • Property svn:keywords set to Author Date Id Revision
Line 
1package ClassifyTreeModel;
2
3use ClassifyTreeNode;
4use GDBMUtils;
5use strict;
6
7# /** Constructor
8#  *
9#  *  @param  $class       The name of the class to bless as a string
10#  *  @param  $collection  The name of the collection whose GDBM database we
11#  *                       will be accessing as a string
12#  *  @param  $root        The oid of the root node of the classifier as a
13#  *                       string
14#  *  @return A reference to the ClassifyTreeModel object
15#  *
16#  *  @author John Thompson, DL Consulting Ltd.
17#  */
18sub new()
19  {
20    my ($class, $collection, $root) = @_;
21    my $debug = 0;
22    print STDERR "ClassifyTreeModel.new(\"$collection\", \"$root\")\n" unless !$debug;
23    # Store the variables
24    my $self = {};
25    $self->{'collection'} = $collection;
26    $self->{'debug'} = $debug;
27    $self->{'root'} = $root;
28    # Bless me father for I have sinned
29    bless $self, $class;
30    return $self;
31  }
32# /** new() **/
33
34# /** Given a path and a document id, add this document to the classifier tree
35#  *  creating any necessary tree nodes first.
36#  *
37#  *  @param  $value The path to store this document in
38#  *  @param  $oid Unique identifier of a document
39#  *
40#  *  @author John Thompson, DL Consulting Ltd.
41#  */
42sub addDocument()
43  {
44    my ($self, $value, $oid) = @_;
45    print STDERR "ClassifyTreeModel.addDocument(\"$value\", \"$oid\")\n" unless !$self->{'debug'};
46    # Generate a treepath object from the metadata value, remembering to prefix
47    # with the root nodes path
48    my $root_node_obj = $self->getRootNode();
49    my $path_obj = $root_node_obj->getPath();
50    $path_obj->addPathComponent($value);
51    # Ensure that this classifier node, and if necessary its ancestor nodes,
52    # exist in our tree.
53    my $node_obj = $self->getNodeByPath($path_obj);
54    if (!$node_obj)
55      {
56        # The node doesn't exist, so we need to add it
57        $node_obj = $self->addNode($path_obj);
58      }
59    # Add the document to the node.
60    $node_obj->addDocument($oid);
61    # Done.
62  }
63# /** addDocument() **/
64
65# /** Add a node into the tree first ensuring all its parent nodes are inserted
66#  *  to.
67#  *
68#  *  @param  $path_obj The path to insert the new node at
69#  *
70#  *  @author John Thompson, DL Consulting Ltd.
71#  */
72sub addNode()
73  {
74    my ($self, $path_obj) = @_;
75    print STDERR "ClassifyTreeModel.addNode(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
76    # Ensure the parent exists, assuming we aren't at the root
77    my $parent_path_obj = $path_obj->getParentPath();
78    #rint STDERR "* parent path: " . $parent_path_obj->toString() . "\n";
79    my $parent_node_obj = $self->getNodeByPath($parent_path_obj);
80    #rint STDERR "* does parent node already exist? " . $parent_node_obj . "\n";
81    #rint STDERR "* are we at the root node yet? " . $parent_path_obj->isRootPath() . "\n";
82    if (!$parent_node_obj && !$parent_path_obj->isRootPath())
83      {
84        #rint STDERR "* recursive call!\n";
85        $parent_node_obj = $self->addNode($parent_path_obj);
86      }
87    # Insert this node into it's parent.
88    return $parent_node_obj->insertNode($path_obj);
89  }
90# /** addNode() **/
91
92# /** Retrieve the name of the collection this model is drawing from.
93#  *
94#  *  @return The collection name as a string
95#  *
96#  *  @author John Thompson, DL Consulting Ltd.
97#  */
98sub getCollection()
99  {
100    my ($self) = @_;
101    print STDERR "ClassifyTreeModel.getCollection()\n" unless !$self->{'debug'};
102    return $self->{'collection'};
103  }
104# /** getCollection() **/
105
106# /** Retrieve a node from this tree based upon its CLID (OID).
107#  *  @param  $clid  The CLID as a string
108#  *  @return The indicated ClassifyTreeNode or null
109sub getNodeByCLID()
110  {
111    my ($self, $clid) = @_;
112    print STDERR "ClassifyTreeModel.getNodeByCLID(\"$clid\")\n" unless !$self->{'debug'};
113    my $result = 0;
114    # Test if this clid is even in our tree
115    if($clid !~ /^$self->{'root'}/)
116      {
117        print STDERR "Requested node $clid, which isn't part of " . $self->{'root'} . "\n";
118        return 0;
119      }
120    # Unfortunately I have to check that there is text to retrieve before I
121    # create a new node.
122    if(&GDBMUtils::gdbmCachedCollectionGet($self->getCollection(), $clid) =~ /\w+/)
123      {
124        # Since the CLID can directly reference the correct entry in the GDBM we
125        # just create the node and return it
126        $result = new ClassifyTreeNode($self, $clid);
127      }
128    return $result;
129  }
130
131# /** Retrieve a node from this tree based upon a path
132#  *
133#  *  @param  $path  The path to the node as a ClassifyTreePath
134#  *  @return The indicated ClassifyTreeNode or null
135#  *
136#  *  @author John Thompson, DL Consulting Ltd.
137#  */
138sub getNodeByPath()
139  {
140    my ($self, $path_obj) = @_;
141    print STDERR "ClassifyTreeModel.getNodeByPath(\"" . $path_obj->toString() . "\")\n" unless !$self->{'debug'};
142    # Starting at the ROOT of the tree, and with the first path component,
143    # recursively descend through the tree looking for the node - we can assume
144    # that we've found the root node (otherwise we won't be in a tree)
145    my $cur_node_obj = $self->getRootNode();
146    my $cur_path_obj = $cur_node_obj->getPath();
147    my $depth = 1;
148    # Continue till we either find the node we want, or run out a nodes
149    while(!$cur_node_obj->getPath()->equals($path_obj))
150      {
151        # Append the path component at this depth to the current path we
152        # are searching for
153        $cur_path_obj->addPathComponent($path_obj->getPathComponent($depth));
154        $depth++;
155        #rint STDERR "Searching " . $cur_node_obj->getPath()->toString() . "'s children for: " . $cur_path_obj->toString() . "\n";
156        # Search through the current nodes children, looking for one that
157        # matches the current path
158        my $found = 0;
159        foreach my $child_node_obj ($cur_node_obj->getChildren())
160          {
161            #rint STDERR "* testing " . $child_node_obj->getPath()->toString() . "\n";
162            if($child_node_obj->getPath()->equals($cur_path_obj))
163              {
164                $cur_node_obj = $child_node_obj;
165                $found = 1;
166                last;
167              }
168          }
169        # Couldn't find any node with this path
170        if(!$found)
171          {
172            #rint STDERR "* no such node exists!\n";
173            return 0;
174          }
175      }
176    return $cur_node_obj;
177  }
178# /** getChild() **/
179
180# /** Retrieve the parent node of the given node.
181#  *
182#  *  @param  $child_node The node whose parent we want to retrieve
183#  *  @return The parent node, or 0 if this is the root
184#  *
185#  *  @author John Thompson, DL Consulting Ltd.
186#  */
187sub getParentNode()
188  {
189    my ($self, $child_node) = @_;
190    print STDERR "ClassifyTreeModel.getParentNode()\n" unless !$self->{'debug'};
191    return $child_node->getParentNode();
192  }
193# /** getParentNode() **/
194
195sub getRootNode()
196{
197  my ($self) = @_;
198  print STDERR "ClassifyTreeModel.getRootNode()\n" unless !$self->{'debug'};
199  return new ClassifyTreeNode($self, $self->{'root'});
200}
201
202# /** Remove the given document from the classifier tree, and then remove any
203#  *  empty nodes if required.
204#  *
205#  *  @param  $value The value which contains the path of the node to remove
206#  *                 the document from
207#  *  @param  $oid The unique identifier of the document to remove
208#  *  @param  $remove_empty Sets whether empty nodes are removed
209#  *
210#  *  @author John Thompson, DL Consulting Ltd.
211#  */
212sub removeDocument()
213  {
214    my ($self, $path, $oid, $remove_empty) = @_;
215    print STDERR "ClassifyTreeModel.removeDocument(\"$path\",\"$oid\",$remove_empty)\n" unless !$self->{'debug'};
216    # Append to root path
217    my $root_node_obj = $self->getRootNode();
218    my $path_obj = $root_node_obj->getPath();
219    $path_obj->addPathComponent($path);
220    # Retrieve the node in question
221    my $node_obj = $self->getNodeByPath($path_obj);
222    # Check we retrieved a node
223    if ($node_obj)
224      {
225        # Remove the document
226        $node_obj->removeDocument($oid);
227        # If we have been asked to remove empty nodes, do so now.
228        if ($remove_empty)
229          {
230            my $cur_node_obj = $node_obj;
231            my $empty_node_obj = 0;
232            while ($cur_node_obj->getNumLeafDocs() == 0)
233              {
234                $empty_node_obj = $cur_node_obj;
235                $cur_node_obj = $cur_node_obj->getParentNode();
236              }
237            if ($empty_node_obj)
238              {
239                # Try to retrieve the parent of this node
240                my $parent_node_obj = $empty_node_obj->getParentNode();
241                # As long as we have a parent (i.e. we aren't the root node) go
242                # ahead and delete this subtree starting at empty node
243                if ($parent_node_obj)
244                  {
245                    $parent_node_obj->removeNode($empty_node_obj);
246                  }
247              }
248          }
249      }
250    # If the node doesn't exist in this tree, then we can't very well remove
251    # anything from it!
252  }
253# /** removeDocument() **/
254
2551;
Note: See TracBrowser for help on using the browser.