source: gsdl/trunk/perllib/ClassifyTreeNode.pm@ 17056

Last change on this file since 17056 was 15890, checked in by mdewsnip, 16 years ago

Adding "use strict", and fixing problems identified.

  • Property svn:keywords set to Author Date Id Revision
File size: 21.9 KB
Line 
1package ClassifyTreeNode;
2
3use ClassifyTreeNode;
4use ClassifyTreePath;
5use GDBMUtils;
6use strict;
7
8# /** Constructor
9# *
10# *
11# * @author John Thompson, DL Consulting Ltd.
12# */
13sub new()
14 {
15 my ($class, $model, $clid, $force_new) = @_;
16 my $debug = 0;
17 $force_new = 0 unless defined($force_new);
18 print STDERR "ClassifyTreeNode.new(model, \"$clid\", $force_new)\n" unless !$debug;
19 $force_new = 0 unless defined($force_new);
20 # Test the parameters
21 die("Can't create a tree node that doesn't belong to a tree model!") unless $model;
22 die("Can't create a tree node that doesn't have a unique id (OID)!") unless $clid;
23 # Store the variables
24 my $self = {};
25 $self->{'debug'} = $debug;
26 $self->{'model'} = $model;
27 $self->{'clid'} = $clid;
28 # Check if this node already exists in the database, and if not insert it
29 # now
30 my $text = &GDBMUtils::gdbmGet($model->getCollection(), $clid);
31 if($text !~ /\w+/ && $force_new)
32 {
33 &GDBMUtils::gdbmSet($model->getCollection(), $clid,
34 "<doctype>classify\n<hastxt>0\n<childtype>VList\n<Title>\n<numleafdocs>0\n<contains>\n");
35 }
36 # Bless me father for I have sinned
37 bless $self, $class;
38 return $self;
39 }
40# /** new() **/
41
42# /** Add a document to this tree node.
43# *
44# * @param $oid The unique identifier of the document to add
45# *
46# * @author John Thompson, DL Consulting Ltd
47# */
48sub addDocument()
49 {
50 my ($self, $oid) = @_;
51 print STDERR "ClassifyTreeNode.addDocument(\"$oid\")\n" unless !$self->{'debug'};
52 # Get the current contains list
53 my $contains = $self->getContains();
54 # See whether this document already exists in the contains
55 if ($contains !~ /(^$oid$|^$oid;$|;$oid;|;$oid$)/)
56 {
57 # If not, append to the contains list
58 if ($contains ne "")
59 {
60 $contains .= ";$oid";
61 }
62 else
63 {
64 $contains = $oid;
65 }
66 # Store the changed contains
67 $self->setContains($contains);
68 # We now have to update the numleafdocs count for this node and its
69 # ancestor nodes
70 my $cur_node_obj = $self;
71 while ($cur_node_obj)
72 {
73 my $numleafdocs = $cur_node_obj->getNumLeafDocs();
74 if ($numleafdocs =~ /^\d+$/)
75 {
76 $numleafdocs ++;
77 }
78 else
79 {
80 $numleafdocs = 1;
81 }
82 $cur_node_obj->setNumLeafDocs($numleafdocs);
83 $cur_node_obj = $cur_node_obj->getParentNode();
84 }
85 }
86 else
87 {
88 print STDERR "Document already exists!\n" unless !$self->{'debug'};
89 }
90 }
91# /** addDocument() **/
92
93# /** Changes the CLID of a particular node. Note that this is significantly
94# * tricky step, as we have to remove the old node from the database, and
95# * then readd with the corrected CLID.
96# *
97# * @param $clid The new CLID as an integer
98# *
99# * @author John Thompson, DL Consulting Ltd.
100# */
101sub changeCLID()
102 {
103 my($self, $new_clid) = @_;
104 print STDERR "ClassifyTreeNode.changeCLID(\"$new_clid\")\n" unless !$self->{'debug'};
105 # Store the current clid for later use
106 my $old_clid = $self->{'clid'};
107 # And record the children now, as they'll change after we shift the parent
108 # CLID
109 my @child_nodes = $self->getChildren();
110 # Retrieve the current document
111 my $text = $self->toString();
112 # Create a new document with the correct CLID
113 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $new_clid, $text);
114 # Remove the old document
115 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $self->{'clid'});
116 # Finally, change the clid stored in this document
117 $self->{'clid'} = $new_clid;
118 # Now go through this nodes children, and shift them too
119 foreach my $child_node (@child_nodes)
120 {
121 # We determine the new clid by retrieving the childs current clid,
122 # and then replacing any occurance to the parents old clid with the
123 # parents new clid
124 my $old_child_clid = $child_node->getCLID();
125 #rint STDERR "* considering: " . $old_child_clid . "\n";
126 if($old_child_clid =~ /^CL/)
127 {
128 my $new_child_clid = $new_clid . substr($old_child_clid, length($old_clid));
129 #rint STDERR "* shifting child $old_child_clid to $new_child_clid\n";
130 $child_node->changeCLID($new_child_clid);
131 }
132 }
133 }
134# /** changeCLID() **/
135
136
137# /** Retrieve the unique id for this classifier.
138# *
139# * @return The CLID as a string
140# *
141# * @author John Thompson, DL Consulting Ltd.
142# */
143sub getCLID()
144 {
145 my ($self) = @_;
146 print STDERR "ClassifyTreeNode.getCLID()\n" unless !$self->{'debug'};
147 return $self->{'clid'};
148 }
149
150# /** Return the child objects of this node an as array.
151# *
152# * @return An array of node objects
153# *
154# * @author John Thompson, DL Consulting Ltd.
155# */
156sub getChildren()
157 {
158 my ($self) = @_;
159 print STDERR "ClassifyTreeNode.getChildren()\n" unless !$self->{'debug'};
160 my $text = $self->toString();
161 my @children = ();
162 # Retrieve the contains metadata item
163 if($text =~ /<contains>(.*?)\r?\n/)
164 {
165 #rint STDERR "* children formed from contains: $1\n";
166 my $contains_raw = $1;
167 my @contains = split(/;/, $contains_raw);
168 foreach my $child_clid (@contains)
169 {
170 # Replace the " with the parent clid
171 $child_clid =~ s/\"/$self->{'clid'}/;
172 # Create the node obj
173 my $child_node_obj = new ClassifyTreeNode($self->{'model'}, $child_clid);
174 # And insert into ever growing array of child nodes
175 push(@children, $child_node_obj);
176 }
177 }
178 return @children;
179 }
180# /** getChildren() **/
181
182# /** Retrieve the contains metadata which is used to determine this nodes
183# * children.
184# *
185# * @return The contains metadata as a string
186# *
187# * @author John Thompson, DL Consulting Ltd.
188# */
189sub getContains()
190 {
191 my ($self) = @_;
192 print STDERR "ClassifyTreeNode.getContains()\n" unless !$self->{'debug'};
193 my $result = 0;
194 my $text = $self->toString();
195 if($text =~ /<contains>(.*?)\r?\n/)
196 {
197 $result = $1;
198 # Replace " with this nodes CLID
199 $result =~ s/\"/$self->{'clid'}/g;
200 }
201 return $result;
202 }
203# /** getContains() **/
204
205# /** Retrieve this nodes next sibling.
206# *
207# * @return The next sibling node object or 0 if no such node
208# *
209# * @author John Thompson, DL Consulting Ltd.
210# */
211sub getNextSibling()
212 {
213 my ($self) = @_;
214 print STDERR "ClassifyTreeNode.getNextSibling()\n" unless !$self->{'debug'};
215 my $sibling_node = 0;
216 # The next sibling would be the node identified by the CLID with its
217 # suffix number one greater than this nodes CLID.
218 my @clid_parts = split(/\./, $self->{'clid'});
219 my $suffix = pop(@clid_parts);
220 $suffix++;
221 push(@clid_parts, $suffix);
222 my $next_clid = join(".", @clid_parts);
223 # Now determine if this node exists.
224 if(&GDBMUtils::gdbmGet($self->{'model'}->getCollection(), $next_clid) =~ /\w+/)
225 {
226 # And if so, create it.
227 $sibling_node = new ClassifyTreeNode($self->{'model'}, $next_clid);
228 }
229 # Done
230 return $sibling_node;
231 }
232# /** getNextSibling() **/
233
234# /** Retrieve the numleafdocs metadata which if affected by any changes to
235# * child nodes.
236# *
237# * @return The numleafdocs as an integer
238# *
239# * @author John Thompson, DL Consulting Ltd.
240# */
241sub getNumLeafDocs()
242 {
243 my ($self) = @_;
244 print STDERR "ClassifyTreeNode.getNumLeafDocs()\n" unless !$self->{'debug'};
245 my $result = 0;
246 my $text = $self->toString();
247 if($text =~ /<numleafdocs>(\d*?)\r?\n/)
248 {
249 $result = $1;
250 }
251 return $result;
252 }
253# /** getNumLeafDocs() **/
254
255# /** Retrieve the parent node of the given node.
256# *
257# * @param $child_node The node whose parent we want to retrieve
258# * @return The parent node, or 0 if this is the root
259# *
260# * @author John Thompson, DL Consulting Ltd.
261# */
262sub getParentNode()
263 {
264 my ($self) = @_;
265 print STDERR "ClassifyTreeNode.getParentNode()\n" unless !$self->{'debug'};
266 my $parent_node = 0;
267 my $child_clid = $self->getCLID();
268 my @clid_parts = split(/\./, $child_clid);
269 if(scalar(@clid_parts) > 1)
270 {
271 pop(@clid_parts);
272 my $parent_clid = join(".", @clid_parts);
273 $parent_node = $self->{'model'}->getNodeByCLID($parent_clid);
274 }
275 # Otherwise we are already at the root node
276 return $parent_node;
277 }
278# /** getParentNode() **/
279
280# /** Retrieve the path to this node.
281# *
282# * @return The path obj which represents the path to this node or 0 if no
283# * path information exists
284# *
285# * @author John Thompson, DL Consulting Ltd.
286# */
287sub getPath()
288 {
289 my ($self) = @_;
290 print STDERR "ClassifyTreeNode.getPath()\n" unless !$self->{'debug'};
291 my $result = 0;
292 my $text = $self->toString();
293 if($text =~ /<Title>(.*?)\r?\n/ )
294 {
295 my $this_component = $1;
296 # If this node has a parent, then retrieve its path
297 my $parent_node = $self->getParentNode();
298 if ($parent_node)
299 {
300 # Get the path...
301 $result = $parent_node->getPath();
302 # ... and add our component
303 $result->addPathComponent($this_component);
304 }
305 else
306 {
307 $result = new ClassifyTreePath($this_component);
308 }
309 }
310 return $result;
311 }
312# /** getPath() **/
313
314# /** Retrieve the title of this node. This returns essentially the same
315# * information as getPath, but without the encapsulating object.
316# *
317# * @return The title as a string
318# *
319# * @author John Thompson, DL Consulting Ltd.
320# */
321sub getTitle()
322 {
323 my ($self) = @_;
324 print STDERR "ClassifyTreeNode.getTitle()\n" unless !$self->{'debug'};
325 my $result = 0;
326 my $text = $self->toString();
327 if($text =~ /<Title>(.*?)\r?\n/)
328 {
329 $result = $1;
330 }
331 return $result;
332 }
333# /** getTitle() **/
334
335# /** Using the given value locate the correct position to insert a new node,
336# * create it, and then establish it in the database.
337# *
338# * @param $path The path used to determine where to insert node as a string
339# * @return The newly inserted node object
340# *
341# * @author John Thompson, DL Consulting Ltd.
342# */
343sub insertNode()
344 {
345 my ($self, $path) = @_;
346 print STDERR "ClassifyTreeNode.insertNode(\"$path\")\n" unless !$self->{'debug'};
347 my $child_clid = "";
348 my $child_node = 0;
349 my $new_contains = "";
350 # Get the children of this node
351 my @children = $self->getChildren();
352 # If there are no current children, then this will be the first
353 if (scalar(@children) == 0)
354 {
355 #rint STDERR "ClassifyTreeNode.insertNode: first child!\n";
356 $child_clid = $self->{'clid'} . ".1"; # First child
357 # Contains needs to have this new clid added
358 $new_contains = $child_clid;
359 }
360 # Otherwise search through the current children, looking at their values
361 # to locate where to insert this node.
362 else
363 {
364 #rint STDERR "ClassifyTreeNode.insertNode: searching for position...\n";
365 my $found = 0;
366 my $offset = 1;
367 foreach my $sibling_node (@children)
368 {
369 my $sibling_path = $sibling_node->getPath();
370 # If we are still searching for the insertion point
371 if(!$found)
372 {
373 if($sibling_path->toString() eq $path->toString())
374 {
375 # What?!? This node already exists! why are we adding it again!
376 print STDERR "ClassifyTreeNode.insertNode: what?!? node already exists... how did we get here?\n";
377 return $sibling_node;
378 }
379 elsif($sibling_path->toString() gt $path->toString())
380 {
381 # Found our location!
382 $found = 1;
383 $child_clid = $self->{'clid'} . "." . $offset;
384 # You may notice we haven't added this node to contains.
385 # This is because the parent node already contains this
386 # clid - instead we need to record the new highest clid
387 # created when we move the sibling nodes for here onwards
388 # up one space.
389 #rint STDERR "ClassifyTreeNode.insertNode: found our location: $child_clid \n";
390 last;
391 }
392 }
393 $offset++;
394 }
395 # If we haven't found the node, we insert at the end.
396 if(!$found)
397 {
398 #rint STDERR "ClassifyTreeNode.insertNode not found... insert at end \n";
399 $child_clid = $self->{'clid'} . "." . $offset;
400 # Contains needs to have this new clid added
401 $new_contains = $child_clid;
402 }
403 # If we did find the node, we now have to go through the sibling nodes
404 # shifting them up one CLID to ensure there's space.
405 else
406 {
407 # We need another copy of children, but this time with the last
408 # children first!
409 @children = reverse $self->getChildren();
410 my $offset2 = scalar(@children) + 1;
411 foreach my $sibling_node (@children)
412 {
413 $sibling_node->changeCLID($self->{'clid'} . "." . $offset2);
414 # If this if the highest sibling node we are going to rename,
415 # then use it to set the contains metadata.
416 if($new_contains !~ /\w+/)
417 {
418 $new_contains = $self->{'clid'} . "." . $offset2;
419 }
420 # Once we've processed the node exactly in the space the new
421 # node will occupy, we're done.
422 $offset2--;
423 if($offset2 == $offset)
424 {
425 last;
426 }
427 }
428 }
429 }
430 $child_node = new ClassifyTreeNode($self->{'model'}, $child_clid, 1);
431 # Set the value, as this is the only piece of metadata we know and care
432 # about at this stage
433 $child_node->setTitle($path->getLastPathComponent());
434 # Update the contains metadata for this node
435 my $contains = $self->getContains();
436 if($contains =~ /\w/)
437 {
438 $contains .= ";" . $new_contains;
439 }
440 else
441 {
442 $contains = $new_contains;
443 }
444 $self->setContains($contains);
445 # And return the node
446 return $child_node;
447 }
448# /** insertNode() **/
449
450# /** Remove all the children of this node and return the number of document
451# * references (leaf nodes) removed by this process.
452# *
453# * @return The count of document references removed as an integer
454# *
455# * @author John Thompson, DL Consulting Ltd
456# */
457sub removeAllNodes()
458 {
459 my ($self) = @_;
460 print STDERR "ClassifyTreeNode.removeAllNodes()\n" unless !$self->{'debug'};
461 my $num_leaf_docs = 0;
462 # Recursively remove this nodes children
463 my @children = $self->getChildren();
464 foreach my $child_node (@children)
465 {
466 $child_node->removeAllNodes();
467 }
468 # Retrieve the document count (leaf docs)
469 my $text = $self->toString();
470 if ($text =~ /<numleafdocs>(\d+)/)
471 {
472 $num_leaf_docs += $1;
473 }
474 # Now remove the node from the database. We do this calling set gdbm with
475 # no value argument.
476 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $self->{'clid'});
477 # Return the leaf count (so we can adjust the numleafdocs at the root node
478 # of this deletion.
479 return $num_leaf_docs;
480 }
481# /** removeAllNodes() **/
482
483# /** Remove the given document this node, and then update the numleafdocs
484# * metadata for all the ancestor nodes.
485# *
486# * @param $oid The unique identifier of a greenstone document
487# *
488# * @author John Thompson, DL Consulting Ltd.
489# */
490sub removeDocument()
491 {
492 my ($self, $oid) = @_;
493 print STDERR "ClassifyTreeNode::removeDocument(\"$oid\")\n" unless !$self->{'debug'};
494 # Retrieve the contains metadata
495 my $contains = $self->getContains();
496 # Remove this oid
497 my @contains_parts = split(/;/, $contains);
498 my @new_contains_parts = ();
499 foreach my $oid_or_clid (@contains_parts)
500 {
501 if ($oid ne $oid_or_clid && $oid_or_clid =~ /[\w\d]+/)
502 {
503 push(@new_contains_parts, $oid_or_clid);
504 }
505 }
506 $contains = join(";", @new_contains_parts);
507 $self->setContains($contains);
508 # We now have to update the numleafdocs count for this node and its
509 # ancestor nodes
510 my $cur_node_obj = $self;
511 while ($cur_node_obj)
512 {
513 my $numleafdocs = $cur_node_obj->getNumLeafDocs();
514 if ($numleafdocs =~ /^\d+$/)
515 {
516 $numleafdocs--;
517 }
518 else
519 {
520 $numleafdocs = 0;
521 }
522 $cur_node_obj->setNumLeafDocs($numleafdocs);
523 $cur_node_obj = $cur_node_obj->getParentNode();
524 }
525 # Done
526 }
527# /** removeDocument() **/
528
529# /** Remove the node denoted by the path.
530# *
531# * @param $child_node The node to be removed
532# *
533# * @author John Thompson, DL Consulting Ltd
534# */
535sub removeNode()
536 {
537 my ($self, $child_node) = @_;
538 # Not as easy as it first sounds as we have to do a recursive remove,
539 # keeping track of any documents removed so we can update document count.
540 # We then remove this node, adjusting the sibling's clid's as necessary
541 # before altering the contains.
542 print STDERR "ClassifyTreeNode::removeNode(child_node)\n" unless !$self->{'debug'};
543 my $remove_clid = $child_node->getCLID();
544 my $sibling_node = $child_node->getNextSibling();
545 # Recursively remove this nodes and its children, taking note of decrease
546 # in document count.
547 my $removed_numleafdocs = $child_node->removeAllNodes();
548 # Determine if removing this node requires other nodes to be moved, and if
549 # so, do so. We do this in a repeating loop until there are no further
550 # siblings, overwriting the $remove_clid variable with the clid of the node
551 # just changed (you'll see why in a moment).
552 while ($sibling_node != 0)
553 {
554 my $current_node = $sibling_node;
555 # Get this nodes sibling
556 $sibling_node = $current_node->getNextSibling();
557 # Record the CLID to change to
558 my $new_clid = $remove_clid;
559 # Record the old clid
560 $remove_clid = $current_node->getCLID();
561 # Modify the clid of the current node
562 $current_node->changeCLID($new_clid);
563 # Continue until there are no further sibling nodes
564 }
565 # By now the $remove_clid will contain the CLID that has to be removed from
566 # the contains metadata for this node
567 my $contains = $self->getContains();
568 my @contains_parts = split(/;/, $contains);
569 my @new_contains_parts = ();
570 foreach my $oid_or_clid (@contains_parts)
571 {
572 if ($remove_clid ne $oid_or_clid && $oid_or_clid =~ /[\w\d]+/)
573 {
574 push(@new_contains_parts, $oid_or_clid);
575 }
576 }
577 $contains = join(";", @new_contains_parts);
578 $self->setContains($contains);
579 # We also alter the numleafdocs metadata to reflect the removal of these
580 # nodes.
581 my $numleafdocs = $self->getNumLeafDocs();
582 if ($numleafdocs =~ /^\d+$/)
583 {
584 $numleafdocs -= $removed_numleafdocs;
585 }
586 else
587 {
588 $numleafdocs = 0;
589 }
590 $self->setNumLeafDocs($numleafdocs);
591 # Done
592 }
593# /** removeNode() **/
594
595# /** Set the contains metadata in the database.
596# *
597# * @param $contains The new contains string
598# *
599# * @author John Thompson, DL Consulting Ltd.
600# */
601sub setContains()
602 {
603 my ($self, $contains) = @_;
604 print STDERR "ClassifyTreeNode::setContains(\"$contains\")\n" unless !$self->{'debug'};
605 # Replace any occurance of this nodes CLID with "
606 $contains =~ s/$self->{'clid'}/\"/g;
607 # Load the text of this node
608 my $text = &GDBMUtils::gdbmGet($self->{'model'}->getCollection(), $self->{'clid'});
609 # Replace the contains
610 #rint STDERR "Before: $text\n";
611 $text =~ s/<contains>.*?\n+/<contains>$contains\n/;
612 #rint STDERR "After: $text\n";
613 # Store the changed text
614 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $self->{'clid'}, $text);
615 }
616# /** setContains() **/
617
618# /** Set the numleafdocs metadata in the database.
619# *
620# * @param $numleafdocs The new count of leaf documents
621# *
622# * @author John Thompson, DL Consulting Ltd.
623# */
624sub setNumLeafDocs()
625 {
626 my ($self, $numleafdocs) = @_;
627 print STDERR "ClassifyTreeNode::setNumLeafDocs(numleafdocs)\n" unless !$self->{'debug'};
628 # Load the text of this node
629 my $text = &GDBMUtils::gdbmGet($self->{'model'}->getCollection(), $self->{'clid'});
630 # Replace the numleafdocs
631 $text =~ s/<numleafdocs>\d*?\n+/<numleafdocs>$numleafdocs\n/;
632 # Store the changed text
633 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $self->{'clid'}, $text);
634 }
635# /** setNumLeafDocs() **/
636
637# /** Set the title metadata in the database.
638# * Note: Previously this was value and we extracted the title, but the new
639# * autohierarchies don't set values.
640# *
641# * @param $title The new title string
642# *
643# * @author John Thompson, DL Consulting Ltd.
644# */
645sub setTitle()
646 {
647 my ($self, $title) = @_;
648 print STDERR "ClassifyTreeNode::setTitle(\"$title\")\n" unless !$self->{'debug'};
649 # Load the text of this node
650 my $text = &GDBMUtils::gdbmGet($self->{'model'}->getCollection(), $self->{'clid'});
651 # Replace the title
652 $text =~ s/<Title>.*?\n+/<Title>$title\n/;
653 # Store the changed text
654 &GDBMUtils::gdbmSet($self->{'model'}->getCollection(), $self->{'clid'}, $text);
655 }
656# /** setValue() **/
657
658# /** Represent this node as a string.
659# *
660# * @return The string representation of this node
661# *
662# * @author John Thompson, DL Consulting Ltd.
663# */
664sub toString()
665 {
666 my ($self) = @_;
667 print STDERR "ClassifyTreeNode::toString()\n" unless !$self->{'debug'};
668 my $text = &GDBMUtils::gdbmGet($self->{'model'}->getCollection(), $self->{'clid'});
669 return $text;
670 }
671# /** toString() **/
672
6731;
Note: See TracBrowser for help on using the repository browser.