source: trunk/gsdl/src/mgpp/text/bool_optimiser.cpp@ 711

Last change on this file since 711 was 711, checked in by cs025, 25 years ago

Changes to eradicate Xmalloc

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 28.4 KB
Line 
1/**************************************************************************
2 *
3 * bool_optimiser -- optimise boolean parse tree
4 * Copyright (C) 1994,95 Tim Shimmin
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: bool_optimiser.cpp 711 1999-10-17 23:43:31Z cs025 $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.2 1999/10/17 23:43:22 cs025
27 Changes to eradicate Xmalloc
28
29 Revision 1.1 1999/10/11 02:57:01 cs025
30 Base install of MG-PP
31
32 Revision 1.1 1999/08/10 21:17:42 sjboddie
33 renamed mg-1.3d directory mg
34
35 Revision 1.1 1998/11/17 09:34:20 rjmcnab
36 *** empty log message ***
37
38 * Revision 1.4 1995/07/27 04:54:55 tes
39 * Committing optimiser before I add the code
40 * to find diff-nodes in any kind of tree.
41 *
42 * Revision 1.3 1994/10/20 00:02:55 tes
43 * Fixed bug with bool_optimiser:
44 * - it couldn't handle "false | false"
45 * - it couldn't handle "true & true"
46 * These would collapse into an empty list and cause an error.
47 *
48 * Revision 1.2 1994/10/18 06:11:02 tes
49 * The boolean optimiser seems to be modifying the parse tree
50 * like it is supposed to.
51 * Paragraph ranking now works without any text files if required to.
52 *
53 * Revision 1.1 1994/10/12 01:15:28 tes
54 * Found bugs in the existing boolean query optimiser.
55 * So decided to rewrite it.
56 * I accidentally deleted query.bool.y, but I have replaced it
57 * with bool_parser.y (which I have forgotten to add here ! ;-(
58 *
59 */
60
61static char *RCSID = "$Id: bool_optimiser.cpp 711 1999-10-17 23:43:31Z cs025 $";
62
63#include "sysfuncs.h"
64
65#include "memlib.h"
66#include "bool_tree.h"
67#include "term_lists.h"
68#include "messages.h"
69
70typedef int (*CompFunc) (const void *, const void *);
71typedef void (*TreeFunc) (bool_tree_node *);
72
73/* --- routines --- */
74static void PermeateNots (bool_tree_node * tree);
75static void DoubleNeg (bool_tree_node * not_tree);
76static void AndDeMorgan (bool_tree_node * not_tree);
77static void OrDeMorgan (bool_tree_node * not_tree);
78static int AndDistribute (bool_tree_node * tree);
79static void DNF_Distribute (bool_tree_node * tree);
80static void ReplaceWithChildren (bool_tree_node * tree);
81static void CollapseTree (bool_tree_node * tree);
82static void AndTrueFalse (bool_tree_node * and_tree);
83static void OrTrueFalse (bool_tree_node * or_tree);
84static void TF_Idempotent (bool_tree_node * tree);
85static void AndNotToDiff (bool_tree_node * and_tree, TreeFunc recurs_func);
86static void DiffDNF (bool_tree_node * or_node);
87static void DiffTree (bool_tree_node * tree);
88static void AndSort (bool_tree_node * and_tree, TermList * tl, CompFunc comp);
89static void AndOrSort (bool_tree_node * tree, TermList * tl);
90static void AndSortDNF (bool_tree_node * or_node, TermList * tl);
91static void MarkOrTerms (bool_tree_node * tree, TermList * tl);
92static int calc_sum_ft (bool_tree_node * term_tree, TermList * tl);
93static int AndOrSort_comp (const void *A, const void *B);
94static int AndSort_comp (const void *A, const void *B);
95
96/* =========================================================================
97 * Function: OptimiseBoolTree
98 * Description:
99 * For case 2:
100 * Do three major steps:
101 * (i) put into standard form
102 * -> put into DNF (disjunctive normal form - or of ands)
103 * Use DeMorgan's, Double-negative, Distributive rules
104 * (ii) simplify
105 * apply idempotency rules
106 * (iii) ameliorate
107 * convert &! to diff nodes, order terms by frequency,...
108 * Could also do the matching idempotency laws i.e. ...
109 * (A | A), (A | !A), (A & !A), (A & A), (A & (A | B)), (A | (A & B))
110 * Job for future.... ;-)
111 * Input:
112 * Output:
113 * ========================================================================= */
114
115void
116OptimiseBoolTree (bool_tree_node * parse_tree, TermList * tl, int type)
117{
118 switch (type)
119 {
120 case 1:
121 /* --- sorts for and of or-terms --- */
122 PermeateNots (parse_tree);
123 TF_Idempotent (parse_tree);
124 CollapseTree (parse_tree);
125 DiffTree (parse_tree);
126 MarkOrTerms (parse_tree, tl);
127 AndOrSort (parse_tree, tl);
128 break;
129 case 2:
130 /* --- put into DNF --- */
131 PermeateNots (parse_tree);
132 TF_Idempotent (parse_tree);
133 DNF_Distribute (parse_tree);
134 CollapseTree (parse_tree);
135 AndSortDNF (parse_tree, tl);
136 DiffDNF (parse_tree);
137 break;
138 default:
139 /* --- do nothing --- */
140 break;
141 } /*switch */
142} /*OptimiseBoolTree */
143
144/* =========================================================================
145 * Function: DoubleNeg
146 * Description:
147 * !(!(a) = a
148 * Assumes binary tree.
149 * Input:
150 * Output:
151 * ========================================================================= */
152static void
153DoubleNeg (bool_tree_node * not_tree)
154{
155 bool_tree_node *child = BOOL_CHILD (not_tree);
156 bool_tree_node *a = BOOL_CHILD (child);
157 bool_tree_node *sibling = BOOL_SIBLING (not_tree);
158
159 /* copy a into not tree */
160 bcopy ((void *) a, (void *) not_tree, sizeof (bool_tree_node));
161
162 BOOL_SIBLING (not_tree) = sibling;
163
164 delete (child);
165 delete (a);
166
167}
168
169/* =========================================================================
170 * Function: AndDeMorgan
171 * Description:
172 * DeMorgan's rule for 'not' of an 'and' i.e. !(a & b) <=> (!a) | (!b)
173 * Assumes Binary Tree
174 * Input:
175 * not of and tree
176 * Output:
177 * or of not trees
178 * ========================================================================= */
179
180static void
181AndDeMorgan (bool_tree_node * not_tree)
182{
183 bool_tree_node *child = BOOL_CHILD (not_tree);
184 bool_tree_node *a = BOOL_CHILD (child);
185 bool_tree_node *b = BOOL_SIBLING (a);
186 bool_tree_node *new_not = NULL;
187
188 /* change 'not' to 'or' */
189 BOOL_TAG (not_tree) = N_or;
190
191 /* change 'and' to 'not' */
192 BOOL_TAG (child) = N_not;
193
194 /* 'a' no longer has a sibling */
195 BOOL_SIBLING (a) = NULL;
196
197 /* 'a's old sibling is now a child of a new not */
198 new_not = CreateBoolTreeNode (N_not, b, NULL);
199 BOOL_SIBLING (child) = new_not;
200}
201
202/* =========================================================================
203 * Function: OrDeMorgan
204 * Description:
205 * DeMorgan's rule for 'not' of an 'or' i.e. !(a | b) <=> (!a) & (!b)
206 * Assumes Binary Tree
207 * Input:
208 * not of and tree
209 * Output:
210 * or of not trees
211 * ========================================================================= */
212
213static void
214OrDeMorgan (bool_tree_node * not_tree)
215{
216 bool_tree_node *child = BOOL_CHILD (not_tree);
217 bool_tree_node *a = BOOL_CHILD (child);
218 bool_tree_node *b = BOOL_SIBLING (a);
219 bool_tree_node *new_not = NULL;
220
221 /* change 'not' to 'and' */
222 BOOL_TAG (not_tree) = N_and;
223
224 /* change 'or' to 'not' */
225 BOOL_TAG (child) = N_not;
226
227 /* 'a' no longer has a sibling */
228 BOOL_SIBLING (a) = NULL;
229
230 /* 'a's old sibling is now a child of a new not */
231 new_not = CreateBoolTreeNode (N_not, b, NULL);
232 BOOL_SIBLING (child) = new_not;
233}
234
235/* =========================================================================
236 * Function: PermeateNots
237 * Description:
238 * Use DeMorgan's and Double-negative
239 * Assumes tree in binary form (i.e. No ands/ors collapsed)
240 * Input:
241 * Output:
242 * ========================================================================= */
243
244static void
245PermeateNots (bool_tree_node * tree)
246{
247 if (!tree)
248 return;
249
250 if (BOOL_TAG (tree) == N_not)
251 {
252 switch (BOOL_TAG (BOOL_CHILD (tree)))
253 {
254 case N_not:
255 DoubleNeg (tree);
256/** NOTE: I do not go on to do the children **/
257 /* I permeate the nots on the tree here & leave it at that */
258 /* In this case I do not know what type 'tree' will end up */
259 PermeateNots (tree);
260 return;
261 case N_and:
262 AndDeMorgan (tree);
263 break;
264 case N_or:
265 OrDeMorgan (tree);
266 break;
267 default:
268 /* leave alone */
269 break;
270 }
271 }
272
273 /* Permeate Nots for children */
274 if (BOOL_HAS_CHILD (tree))
275 {
276 PermeateNots (BOOL_CHILD (tree));
277 PermeateNots (BOOL_SIBLING (BOOL_CHILD (tree)));
278 }
279
280}
281
282/* =========================================================================
283 * Function: DNF_Distribute
284 * Description:
285 * Input:
286 * Output:
287 * ========================================================================= */
288
289
290#define ITER_LIMIT 500 /* if reach limit then probably in trouble */
291
292static void
293DNF_Distribute (bool_tree_node * tree)
294{
295 int i = 0;
296
297 /* multiple passes until no change */
298 /* not very smart :-( */
299 while (1)
300 {
301 if (AndDistribute (tree) == 0)
302 return;
303 if (i == ITER_LIMIT)
304 FatalError (1, "DNF infinite loop");
305 i++;
306 } /*while */
307
308}
309
310/* =========================================================================
311 * Function: AndDistribute
312 * Description:
313 * (a | b) & A <=> (a & A) | (b & A)
314 * Input:
315 * binary tree of "AND" , "OR"s.
316 * Output:
317 * return 1 if changed the tree
318 * return 0 if there was NO change (no distributive rule to apply)
319 * ========================================================================= */
320
321static int
322AndDistribute (bool_tree_node * tree)
323{
324 if (!tree)
325 return 0;
326
327 if (BOOL_TAG (tree) != N_and)
328 {
329 /* try the children if got some */
330 if (BOOL_TAG (tree) == N_or)
331 {
332 bool_tree_node *left_child = BOOL_CHILD (tree);
333 int left = AndDistribute (left_child);
334 int right = AndDistribute (BOOL_SIBLING (left_child));
335 return (left || right);
336 }
337
338 /* no children, then no change */
339 return 0;
340 }
341
342 /* AND case */
343 {
344 /* we have to check the children to see if can distribute */
345 bool_tree_node *left_child = BOOL_CHILD (tree);
346 bool_tree_node *right_child = BOOL_SIBLING (left_child);
347 int swap = 0;
348
349 /* If "or" on the right side then do a swap */
350 /* This is done for code simplicity */
351 if (BOOL_TAG (right_child) == N_or)
352 {
353 /* do a swap */
354 bool_tree_node *temp;
355 temp = right_child;
356 right_child = left_child;
357 left_child = temp;
358
359 BOOL_SIBLING (left_child) = right_child;
360 BOOL_SIBLING (right_child) = NULL;
361 BOOL_CHILD (tree) = left_child;
362
363 swap = 1;
364 }
365
366 /* do the actual distribution */
367 if (swap || BOOL_TAG (left_child) == N_or)
368 {
369 bool_tree_node *A = right_child;
370 bool_tree_node *A_copy = CopyBoolTree (A);
371 bool_tree_node *a = BOOL_CHILD (left_child);
372 bool_tree_node *b = BOOL_SIBLING (a);
373
374 /* then can distribute away ... */
375 BOOL_TAG (tree) = N_or;
376 BOOL_TAG (left_child) = N_and;
377 BOOL_SIBLING (left_child) = CreateBoolTreeNode (N_and, b, A);
378 BOOL_SIBLING (a) = A_copy;
379
380 AndDistribute (tree);
381 return 1;
382 }
383
384 /* if "and" of something other than "or" s then try the children */
385 {
386 int left = AndDistribute (left_child);
387 int right = AndDistribute (right_child);
388 return (left || right);
389 }
390 } /* AND case */
391
392} /*AndDistribute */
393
394/* =========================================================================
395 * Function: CollapseTree
396 * Description:
397 * Collapse "and" and "or"s.
398 * Input:
399 * Output:
400 * ========================================================================= */
401
402static void
403CollapseTree (bool_tree_node * tree)
404{
405 if (!tree)
406 return;
407
408 if ((BOOL_TAG (tree) == N_and) || (BOOL_TAG (tree) == N_or))
409 {
410 bool_tree_node *child = BOOL_CHILD (tree);
411 while (child)
412 {
413 while (BOOL_TAG (child) == BOOL_TAG (tree))
414 {
415 ReplaceWithChildren (child);
416 }
417 CollapseTree (child);
418 child = BOOL_SIBLING (child);
419 }
420 }
421
422 /* assume "not"s permeated */
423 return;
424}
425
426/* =========================================================================
427 * Function: ReplaceWithChildren
428 * Description:
429 * Input:
430 * Output:
431 * ========================================================================= */
432
433static void
434ReplaceWithChildren (bool_tree_node * tree)
435{
436 bool_tree_node *next = BOOL_SIBLING (tree);
437 bool_tree_node *child = BOOL_CHILD (tree);
438 bool_tree_node *last = LastBoolSibling (child);
439
440 bcopy ((void *) child, (void *) tree, sizeof (bool_tree_node));
441 delete (child);
442 BOOL_SIBLING (last) = next;
443
444}
445
446/* =========================================================================
447 * Function: TF_Idempotent
448 * Description:
449 * Applies Idempotency laws which refer to True or False (hence "TF")
450 * uses the true or false idempotent rules
451 * Input:
452 * operates on n-ary trees (not restricted to binary)
453 * Output:
454 * ========================================================================= */
455
456static void
457TF_Idempotent (bool_tree_node * tree)
458{
459 bool_tree_node *child = NULL;
460
461 while (tree)
462 {
463 if (BOOL_HAS_CHILD (tree))
464 {
465 child = BOOL_CHILD (tree);
466 TF_Idempotent (child);
467 }
468
469 /* --- process node --- */
470 switch (BOOL_TAG (tree))
471 {
472 case N_not:
473 child = BOOL_CHILD (tree);
474
475 /* if true then falsify */
476 if (BOOL_TAG (child) == N_all)
477 {
478 BOOL_TAG (tree) = N_none;
479 BOOL_CHILD (tree) = NULL;
480 delete (child);
481 break;
482 }
483
484 /* if false then trueify */
485 if (BOOL_TAG (child) == N_none)
486 {
487 BOOL_TAG (tree) = N_all;
488 BOOL_CHILD (tree) = NULL;
489 delete (child);
490 break;
491 }
492
493 break;
494 case N_and:
495 AndTrueFalse (tree);
496 break;
497 case N_or:
498 OrTrueFalse (tree);
499 break;
500
501 default:
502 /* do nothing */
503 break;
504 } /*switch */
505
506 tree = BOOL_SIBLING (tree);
507 } /*while */
508} /*TF_Idempotent */
509
510/* =========================================================================
511 * Function: AndTrueFalse
512 * Description:
513 * If any "false" then falsify whole tree
514 * If any "true" then delete them
515 * Input:
516 * Output:
517 * ========================================================================= */
518
519static void
520AndTrueFalse (bool_tree_node * and_tree)
521{
522 bool_tree_node *child = BOOL_CHILD (and_tree);
523
524 bool_tree_node *prev = NULL;
525
526 while (child)
527 {
528 if (BOOL_TAG (child) == N_none)
529 {
530 FreeBoolTree (&(BOOL_CHILD (and_tree)));
531 BOOL_TAG (and_tree) = N_none;
532 return;
533 }
534
535 if (BOOL_TAG (child) == N_all)
536 {
537 bool_tree_node *sibling = BOOL_SIBLING (child);
538
539 /* --- link over child --- */
540 if (!prev)
541 {
542 BOOL_CHILD (and_tree) = sibling;
543 }
544 else
545 {
546 BOOL_SIBLING (prev) = sibling;
547 }
548
549 delete (child);
550 child = sibling;
551 }
552 else
553 {
554 prev = child;
555 child = BOOL_SIBLING (child);
556 }
557 } /*while */
558
559 child = BOOL_CHILD (and_tree);
560
561 /* if no children ... */
562 /* => all have been true */
563 if (!child)
564 {
565 BOOL_TAG (and_tree) = N_all;
566 return;
567 }
568
569 /* If only one child ... */
570 if (!BOOL_SIBLING (child))
571 {
572 /* replace the parent with child - move it up */
573 bool_tree_node *save_sibling = BOOL_SIBLING (and_tree);
574 bcopy ((void *) child, (void *) and_tree, sizeof (bool_tree_node));
575 BOOL_SIBLING (and_tree) = save_sibling;
576 delete (child);
577 }
578
579} /*AndTrueFalse */
580
581/* =========================================================================
582 * Function: OrTrueFalse
583 * Description:
584 * If any "true" then trueify whole tree
585 * If any "false" then delete them
586 * ***** NOTE: This is a reflection of AndTrueFalse *****
587 * Input:
588 * Output:
589 * ========================================================================= */
590
591static void
592OrTrueFalse (bool_tree_node * or_tree)
593{
594 bool_tree_node *child = BOOL_CHILD (or_tree);
595
596 bool_tree_node *prev = NULL;
597
598 while (child)
599 {
600 if (BOOL_TAG (child) == N_all)
601 {
602 FreeBoolTree (&(BOOL_CHILD (or_tree)));
603 BOOL_TAG (or_tree) = N_all;
604 return;
605 }
606
607 if (BOOL_TAG (child) == N_none)
608 {
609 bool_tree_node *sibling = BOOL_SIBLING (child);
610 if (!prev)
611 {
612 BOOL_CHILD (or_tree) = sibling;
613 }
614 else
615 {
616 BOOL_SIBLING (prev) = sibling;
617 }
618 delete (child);
619 child = sibling;
620
621 }
622 else
623 {
624 prev = child;
625 child = BOOL_SIBLING (child);
626 }
627 } /*while */
628
629 child = BOOL_CHILD (or_tree);
630
631 /* if no children ... */
632 /* => all have been false */
633 if (!child)
634 {
635 BOOL_TAG (or_tree) = N_none;
636 return;
637 }
638
639 /* If only one child ... */
640 if (!BOOL_SIBLING (child))
641 {
642 /* replace the parent with child - move it up */
643 bool_tree_node *save_sibling = BOOL_SIBLING (or_tree);
644 bcopy ((void *) child, (void *) or_tree, sizeof (bool_tree_node));
645 BOOL_SIBLING (or_tree) = save_sibling;
646 delete (child);
647 }
648
649} /*AndTrueFalse */
650
651/* =========================================================================
652 * Function: AndNotToDiff
653 * Description:
654 * Convert an "And" tree with possibly some negated terms into
655 * a "And - neg-terms".
656 * Input:
657 * Expects "And" of only literals - as tree should be in DNF form.
658 * This means that no recursion is necessary.
659 * Output:
660 * ========================================================================= */
661
662static void
663AndNotToDiff (bool_tree_node * and_tree, TreeFunc recurs_func)
664{
665
666 /* -------------------------------------------------- */
667 /* --- Divide the children up into positive and negative literals --- */
668 /* Preserves ordering of children */
669 bool_tree_node *pos = NULL;
670 bool_tree_node *neg = NULL;
671 bool_tree_node *last_pos = NULL;
672 bool_tree_node *last_neg = NULL;
673
674 bool_tree_node *child = BOOL_CHILD (and_tree);
675 while (child)
676 {
677 bool_tree_node *next = BOOL_SIBLING (child);
678 if (BOOL_TAG (child) == N_not)
679 {
680 /* add to end of neg list */
681 if (!neg)
682 neg = child;
683 else
684 BOOL_SIBLING (last_neg) = child;
685 last_neg = child;
686 }
687 else
688 {
689 /* add to end of pos list */
690 if (!pos)
691 pos = child;
692 else
693 BOOL_SIBLING (last_pos) = child;
694 last_pos = child;
695 }
696 child = next;
697 } /*while */
698 if (last_pos)
699 BOOL_SIBLING (last_pos) = NULL;
700 if (last_neg)
701 BOOL_SIBLING (last_neg) = NULL;
702 /* -------------------------------------------------- */
703
704 if (!neg) /* no negtives => no subtraction */
705 {
706 /* fix up parent's child link */
707 BOOL_CHILD (and_tree) = pos;
708 return;
709 }
710
711 if (!pos) /* no positives = all negatives */
712 {
713 /* Choose one of the negs for the left-hand-side of the subtraction */
714 /* In particular, choose the first */
715 pos = neg;
716 neg = BOOL_SIBLING (neg);
717 BOOL_SIBLING (pos) = NULL;
718 }
719
720 /* Get rid of all '!' from not nodes */
721 /* Make a list out of the children */
722 {
723 bool_tree_node *n = neg;
724 bool_tree_node *next = NULL;
725
726 neg = BOOL_CHILD (neg);
727 while (n)
728 {
729 bool_tree_node *c = BOOL_CHILD (n);
730 next = BOOL_SIBLING (n);
731 delete (n);
732 if (next)
733 BOOL_SIBLING (c) = BOOL_CHILD (next);
734 n = next;
735 }
736 }
737
738 /* --- restructure the tree with "and" and "diff" --- */
739 BOOL_TAG (and_tree) = N_diff;
740 if (!BOOL_SIBLING (pos)) /* i.e. only one pos */
741 {
742 /* (A & !B & !C & !D & ...) ==> A - (B,C,D,...) */
743
744 BOOL_CHILD (and_tree) = pos;
745 BOOL_SIBLING (pos) = neg;
746 }
747 else
748 {
749 /* (A & !C & B & !D & ...) ==> (A & B & ....) - (C, D, ...) */
750
751 bool_tree_node *n = CreateBoolNode (N_and);
752 BOOL_CHILD (and_tree) = n;
753 BOOL_CHILD (n) = pos;
754 BOOL_SIBLING (n) = neg;
755 } /*if */
756
757 if (recurs_func)
758 {
759 recurs_func (BOOL_CHILD (and_tree));
760 }
761
762} /*AndNotToDiff */
763
764/* =========================================================================
765 * Function: DiffTree
766 * Description:
767 * Applies "AndNotToNeg" to each conjunct.
768 * Unlike DiffDNF, this routine will search throughout the tree for
769 * "and"-nodes. It will call AndNotToNeg with this as its recursive
770 * routine.
771 * Date: 27/July/95
772 * Input:
773 * Output:
774 * ========================================================================= */
775
776static void
777DiffTree (bool_tree_node * tree)
778{
779 bool_tree_node *t = tree;
780
781 while (t)
782 {
783 if (BOOL_TAG (t) == N_and)
784 {
785 AndNotToDiff (t, DiffTree);
786 }
787 t = BOOL_SIBLING (t);
788 } /*while */
789
790} /*DiffTree */
791
792/* =========================================================================
793 * Function: DiffDNF
794 * Description:
795 * Applies "AndNotToNeg" to each conjunct.
796 * Input:
797 * A DNF tree
798 * Output:
799 * A tree possibly with some diffs in it.
800 * ========================================================================= */
801
802static void
803DiffDNF (bool_tree_node * DNF_tree)
804{
805
806 if (BOOL_TAG (DNF_tree) == N_or)
807 {
808 bool_tree_node *child = BOOL_CHILD (DNF_tree);
809
810 while (child)
811 {
812 if (BOOL_TAG (child) == N_and)
813 {
814 AndNotToDiff (child, NULL);
815 }
816 child = BOOL_SIBLING (child);
817 }
818 }
819 else if (BOOL_TAG (DNF_tree) == N_and)
820 {
821 AndNotToDiff (DNF_tree, NULL);
822 }
823}
824
825/* =========================================================================
826 * Function: AndSort_comp
827 * Description:
828 * Input:
829 * Output:
830 * ========================================================================= */
831
832/* Module variable, "term_list", used as parameter for comparison function */
833/* The comparison function only allows certain fixed parameters */
834static TermList *term_list;
835
836static int
837AndSort_comp (const void *A, const void *B)
838{
839 const bool_tree_node *const *a = (bool_tree_node **) A;
840 const bool_tree_node *const *b = (bool_tree_node **) B;
841
842 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_term)
843 {
844 /* look up in term list */
845 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
846 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
847
848 return a_doc_count - b_doc_count;
849 }
850
851 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) != N_term)
852 return -1;
853
854 if (BOOL_TAG (*a) != N_term && BOOL_TAG (*b) == N_term)
855 return 1;
856
857 return 0;
858} /*AndSort_comp */
859
860/* =========================================================================
861 * Function: AndSort
862 * Description:
863 * Sort the list of nodes by increasing doc_count
864 * Using some Neil Sharman code - pretty straight forward.
865 * Note: not-terms are sent to the end of the list
866 * Input:
867 * Output:
868 * ========================================================================= */
869
870static void
871AndSort (bool_tree_node * and_tree, TermList * tl, CompFunc comp)
872{
873 bool_tree_node **list = NULL;
874 bool_tree_node *child = NULL;
875 int count = 0;
876 int i = 0;
877
878 if (BOOL_TAG (and_tree) != N_and)
879 return;
880
881 /* --- Convert linked list into an array for sorting --- */
882 child = BOOL_CHILD (and_tree);
883 count = 0;
884 while (child)
885 {
886 count++;
887 child = BOOL_SIBLING (child);
888 }
889
890 if (!(list = new (bool_tree_node *)[count]))
891 return;
892
893 child = BOOL_CHILD (and_tree);
894 for (i = 0; i < count; i++)
895 {
896 list[i] = child;
897 child = BOOL_SIBLING (child);
898 }
899 /* ----------------------------------------------------- */
900
901 term_list = tl;
902 qsort (list, count, sizeof (bool_tree_node *), comp);
903
904 /* --- Convert array back into linked list --- */
905 for (i = 0; i < count - 1; i++)
906 BOOL_SIBLING (list[i]) = list[i + 1];
907 BOOL_SIBLING (list[count - 1]) = NULL;
908 BOOL_CHILD (and_tree) = list[0];
909 delete list;
910 /* ----------------------------------------------------- */
911
912} /*AndSort */
913
914/* =========================================================================
915 * Function: AndSortDNF
916 * Description:
917 * Input:
918 * Output:
919 * ========================================================================= */
920
921
922static void
923AndSortDNF (bool_tree_node * DNF_tree, TermList * tl)
924{
925
926 if (BOOL_TAG (DNF_tree) == N_or)
927 {
928 bool_tree_node *child = BOOL_CHILD (DNF_tree);
929
930 while (child)
931 {
932 if (BOOL_TAG (child) == N_and)
933 {
934 AndSort (child, tl, AndSort_comp);
935 }
936 child = BOOL_SIBLING (child);
937 }
938 }
939 else if (BOOL_TAG (DNF_tree) == N_and)
940 {
941 AndSort (DNF_tree, tl, AndSort_comp);
942 }
943}
944
945/* =========================================================================
946 * Function: MarkOrTerms
947 * Description:
948 * Mark or-nodes whose children are all terms.
949 * Also store the sum of the ft's of these nodes.
950 * Input:
951 * Output:
952 * ========================================================================= */
953
954static void
955MarkOrTerms (bool_tree_node * tree, TermList * tl)
956{
957 if (BOOL_TAG (tree) == N_or)
958 {
959 int found_non_term = 0;
960 bool_tree_node *child = BOOL_CHILD (tree);
961
962 /* --- check to see if got all terms --- */
963 while (child)
964 {
965 if (BOOL_TAG (child) != N_term)
966 {
967 found_non_term = 1;
968 }
969 if (BOOL_HAS_CHILD (tree))
970 {
971 MarkOrTerms (child, tl);
972 }
973 child = BOOL_SIBLING (child);
974 } /*while */
975
976 if (!found_non_term)
977 {
978 /* Must have all terms for the OR node */
979 /* So change its tag and calculate its sum */
980 BOOL_TAG (tree) = N_or_terms;
981 BOOL_SUM_FT (tree) = calc_sum_ft (tree, tl);
982 } /*if */
983 /* ------------------------------------- */
984
985 } /*if OR node */
986
987 else if (BOOL_HAS_CHILD (tree))
988 {
989 bool_tree_node *child = BOOL_CHILD (tree);
990 while (child)
991 {
992 MarkOrTerms (child, tl);
993 child = BOOL_SIBLING (child);
994 }
995 }
996
997} /*MarkOrTerms */
998
999/* =========================================================================
1000 * Function: calc_sum_ft
1001 * Description:
1002 * Sums up the ft's of all the terms
1003 * Input:
1004 * Assumes that the tree only contains terms in it
1005 * Output:
1006 * the ft sum
1007 * ========================================================================= */
1008static int
1009calc_sum_ft (bool_tree_node * term_tree, TermList * tl)
1010{
1011 int sum_ft = 0;
1012 bool_tree_node *child = BOOL_CHILD (term_tree);
1013 while (child)
1014 {
1015 sum_ft += GetNthWE (tl, BOOL_TERM (child))->doc_count;
1016 child = BOOL_SIBLING (child);
1017 }
1018 return sum_ft;
1019}
1020
1021/* =========================================================================
1022 * Function: AndOrSort
1023 * Description:
1024 * This is like an AndSort of just terms.
1025 * The difference is that we are now also
1026 * considering sorting with <or of terms> by using the sum of f_t's.
1027 * Input:
1028 * Output:
1029 * ========================================================================= */
1030static void
1031AndOrSort (bool_tree_node * tree, TermList * tl)
1032{
1033 if (BOOL_HAS_CHILD (tree))
1034 {
1035
1036 if (BOOL_TAG (tree) == N_and)
1037 {
1038 AndSort (tree, tl, AndOrSort_comp);
1039 }
1040
1041 /* Check out any children down the tree */
1042 /* Probably a waste of time */
1043 {
1044 bool_tree_node *child = BOOL_CHILD (tree);
1045 while (child)
1046 {
1047 AndOrSort (child, tl);
1048 child = BOOL_SIBLING (child);
1049 }
1050 } /*do the tree */
1051
1052 } /*if has children */
1053} /*AndOrSort */
1054
1055/* =========================================================================
1056 * Function: AndOrSort_comp
1057 * Description:
1058 * Comparison function used to order "term"s and "or_terms"s.
1059 * It's purpose is to be used by a sort routine.
1060 * Input:
1061 * Output:
1062 * ========================================================================= */
1063static int
1064AndOrSort_comp (const void *A, const void *B)
1065{
1066 const bool_tree_node *const *a = (bool_tree_node **) A;
1067 const bool_tree_node *const *b = (bool_tree_node **) B;
1068
1069 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_term)
1070 {
1071 /* look up in term list */
1072 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
1073 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
1074
1075 return a_doc_count - b_doc_count;
1076 }
1077
1078 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_or_terms)
1079 {
1080 /* look up in term list */
1081 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
1082 int b_doc_count = BOOL_SUM_FT (*b);
1083
1084 return a_doc_count - b_doc_count;
1085 }
1086
1087 if (BOOL_TAG (*a) == N_or_terms && BOOL_TAG (*b) == N_term)
1088 {
1089 /* look up in term list */
1090 int a_doc_count = BOOL_SUM_FT (*a);
1091 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
1092
1093 return a_doc_count - b_doc_count;
1094 }
1095
1096 if (BOOL_TAG (*a) == N_or_terms && BOOL_TAG (*b) == N_or_terms)
1097 {
1098 /* look up in term list */
1099 int a_doc_count = BOOL_SUM_FT (*a);
1100 int b_doc_count = BOOL_SUM_FT (*b);
1101
1102 return a_doc_count - b_doc_count;
1103 }
1104
1105 if ((BOOL_TAG (*a) == N_term || BOOL_TAG (*a) == N_or_terms) &&
1106 (BOOL_TAG (*b) != N_term && BOOL_TAG (*b) != N_or_terms))
1107 return -1;
1108
1109 if ((BOOL_TAG (*a) != N_term && BOOL_TAG (*a) != N_or_terms) &&
1110 (BOOL_TAG (*b) == N_term || BOOL_TAG (*b) == N_or_terms))
1111 return 1;
1112
1113 return 0;
1114} /*AndOrSort_comp */
Note: See TracBrowser for help on using the repository browser.