source: trunk/gsdl/packages/mg/src/text/bool_optimiser.c@ 1014

Last change on this file since 1014 was 439, checked in by sjboddie, 25 years ago

renamed mg-1.3d directory mg

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 28.2 KB
Line 
1/**************************************************************************
2 *
3 * bool_optimiser -- optimise boolean parse tree
4 * Copyright (C) 1994,95 Tim Shimmin
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: bool_optimiser.c 439 1999-08-10 21:23:37Z sjboddie $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.1 1999/08/10 21:17:42 sjboddie
27 renamed mg-1.3d directory mg
28
29 Revision 1.1 1998/11/17 09:34:20 rjmcnab
30 *** empty log message ***
31
32 * Revision 1.4 1995/07/27 04:54:55 tes
33 * Committing optimiser before I add the code
34 * to find diff-nodes in any kind of tree.
35 *
36 * Revision 1.3 1994/10/20 00:02:55 tes
37 * Fixed bug with bool_optimiser:
38 * - it couldn't handle "false | false"
39 * - it couldn't handle "true & true"
40 * These would collapse into an empty list and cause an error.
41 *
42 * Revision 1.2 1994/10/18 06:11:02 tes
43 * The boolean optimiser seems to be modifying the parse tree
44 * like it is supposed to.
45 * Paragraph ranking now works without any text files if required to.
46 *
47 * Revision 1.1 1994/10/12 01:15:28 tes
48 * Found bugs in the existing boolean query optimiser.
49 * So decided to rewrite it.
50 * I accidentally deleted query.bool.y, but I have replaced it
51 * with bool_parser.y (which I have forgotten to add here ! ;-(
52 *
53 */
54
55static char *RCSID = "$Id: bool_optimiser.c 439 1999-08-10 21:23:37Z sjboddie $";
56
57#include "sysfuncs.h"
58
59#include "memlib.h"
60#include "bool_tree.h"
61#include "term_lists.h"
62#include "messages.h"
63
64typedef int (*CompFunc) (const void *, const void *);
65typedef void (*TreeFunc) (bool_tree_node *);
66
67/* --- routines --- */
68static void PermeateNots (bool_tree_node * tree);
69static void DoubleNeg (bool_tree_node * not_tree);
70static void AndDeMorgan (bool_tree_node * not_tree);
71static void OrDeMorgan (bool_tree_node * not_tree);
72static int AndDistribute (bool_tree_node * tree);
73static void DNF_Distribute (bool_tree_node * tree);
74static void ReplaceWithChildren (bool_tree_node * tree);
75static void CollapseTree (bool_tree_node * tree);
76static void AndTrueFalse (bool_tree_node * and_tree);
77static void OrTrueFalse (bool_tree_node * or_tree);
78static void TF_Idempotent (bool_tree_node * tree);
79static void AndNotToDiff (bool_tree_node * and_tree, TreeFunc recurs_func);
80static void DiffDNF (bool_tree_node * or_node);
81static void DiffTree (bool_tree_node * tree);
82static void AndSort (bool_tree_node * and_tree, TermList * tl, CompFunc comp);
83static void AndOrSort (bool_tree_node * tree, TermList * tl);
84static void AndSortDNF (bool_tree_node * or_node, TermList * tl);
85static void MarkOrTerms (bool_tree_node * tree, TermList * tl);
86static int calc_sum_ft (bool_tree_node * term_tree, TermList * tl);
87static int AndOrSort_comp (const void *A, const void *B);
88static int AndSort_comp (const void *A, const void *B);
89
90/* =========================================================================
91 * Function: OptimiseBoolTree
92 * Description:
93 * For case 2:
94 * Do three major steps:
95 * (i) put into standard form
96 * -> put into DNF (disjunctive normal form - or of ands)
97 * Use DeMorgan's, Double-negative, Distributive rules
98 * (ii) simplify
99 * apply idempotency rules
100 * (iii) ameliorate
101 * convert &! to diff nodes, order terms by frequency,...
102 * Could also do the matching idempotency laws i.e. ...
103 * (A | A), (A | !A), (A & !A), (A & A), (A & (A | B)), (A | (A & B))
104 * Job for future.... ;-)
105 * Input:
106 * Output:
107 * ========================================================================= */
108
109void
110OptimiseBoolTree (bool_tree_node * parse_tree, TermList * tl, int type)
111{
112 switch (type)
113 {
114 case 1:
115 /* --- sorts for and of or-terms --- */
116 PermeateNots (parse_tree);
117 TF_Idempotent (parse_tree);
118 CollapseTree (parse_tree);
119 DiffTree (parse_tree);
120 MarkOrTerms (parse_tree, tl);
121 AndOrSort (parse_tree, tl);
122 break;
123 case 2:
124 /* --- put into DNF --- */
125 PermeateNots (parse_tree);
126 TF_Idempotent (parse_tree);
127 DNF_Distribute (parse_tree);
128 CollapseTree (parse_tree);
129 AndSortDNF (parse_tree, tl);
130 DiffDNF (parse_tree);
131 break;
132 default:
133 /* --- do nothing --- */
134 break;
135 } /*switch */
136} /*OptimiseBoolTree */
137
138/* =========================================================================
139 * Function: DoubleNeg
140 * Description:
141 * !(!(a) = a
142 * Assumes binary tree.
143 * Input:
144 * Output:
145 * ========================================================================= */
146static void
147DoubleNeg (bool_tree_node * not_tree)
148{
149 bool_tree_node *child = BOOL_CHILD (not_tree);
150 bool_tree_node *a = BOOL_CHILD (child);
151 bool_tree_node *sibling = BOOL_SIBLING (not_tree);
152
153 /* copy a into not tree */
154 bcopy (a, not_tree, sizeof (bool_tree_node));
155
156 BOOL_SIBLING (not_tree) = sibling;
157
158 Xfree (child);
159 Xfree (a);
160
161}
162
163/* =========================================================================
164 * Function: AndDeMorgan
165 * Description:
166 * DeMorgan's rule for 'not' of an 'and' i.e. !(a & b) <=> (!a) | (!b)
167 * Assumes Binary Tree
168 * Input:
169 * not of and tree
170 * Output:
171 * or of not trees
172 * ========================================================================= */
173
174static void
175AndDeMorgan (bool_tree_node * not_tree)
176{
177 bool_tree_node *child = BOOL_CHILD (not_tree);
178 bool_tree_node *a = BOOL_CHILD (child);
179 bool_tree_node *b = BOOL_SIBLING (a);
180 bool_tree_node *new_not = NULL;
181
182 /* change 'not' to 'or' */
183 BOOL_TAG (not_tree) = N_or;
184
185 /* change 'and' to 'not' */
186 BOOL_TAG (child) = N_not;
187
188 /* 'a' no longer has a sibling */
189 BOOL_SIBLING (a) = NULL;
190
191 /* 'a's old sibling is now a child of a new not */
192 new_not = CreateBoolTreeNode (N_not, b, NULL);
193 BOOL_SIBLING (child) = new_not;
194}
195
196/* =========================================================================
197 * Function: OrDeMorgan
198 * Description:
199 * DeMorgan's rule for 'not' of an 'or' i.e. !(a | b) <=> (!a) & (!b)
200 * Assumes Binary Tree
201 * Input:
202 * not of and tree
203 * Output:
204 * or of not trees
205 * ========================================================================= */
206
207static void
208OrDeMorgan (bool_tree_node * not_tree)
209{
210 bool_tree_node *child = BOOL_CHILD (not_tree);
211 bool_tree_node *a = BOOL_CHILD (child);
212 bool_tree_node *b = BOOL_SIBLING (a);
213 bool_tree_node *new_not = NULL;
214
215 /* change 'not' to 'and' */
216 BOOL_TAG (not_tree) = N_and;
217
218 /* change 'or' to 'not' */
219 BOOL_TAG (child) = N_not;
220
221 /* 'a' no longer has a sibling */
222 BOOL_SIBLING (a) = NULL;
223
224 /* 'a's old sibling is now a child of a new not */
225 new_not = CreateBoolTreeNode (N_not, b, NULL);
226 BOOL_SIBLING (child) = new_not;
227}
228
229/* =========================================================================
230 * Function: PermeateNots
231 * Description:
232 * Use DeMorgan's and Double-negative
233 * Assumes tree in binary form (i.e. No ands/ors collapsed)
234 * Input:
235 * Output:
236 * ========================================================================= */
237
238static void
239PermeateNots (bool_tree_node * tree)
240{
241 if (!tree)
242 return;
243
244 if (BOOL_TAG (tree) == N_not)
245 {
246 switch (BOOL_TAG (BOOL_CHILD (tree)))
247 {
248 case N_not:
249 DoubleNeg (tree);
250/** NOTE: I do not go on to do the children **/
251 /* I permeate the nots on the tree here & leave it at that */
252 /* In this case I do not know what type 'tree' will end up */
253 PermeateNots (tree);
254 return;
255 case N_and:
256 AndDeMorgan (tree);
257 break;
258 case N_or:
259 OrDeMorgan (tree);
260 break;
261 default:
262 /* leave alone */
263 break;
264 }
265 }
266
267 /* Permeate Nots for children */
268 if (BOOL_HAS_CHILD (tree))
269 {
270 PermeateNots (BOOL_CHILD (tree));
271 PermeateNots (BOOL_SIBLING (BOOL_CHILD (tree)));
272 }
273
274}
275
276/* =========================================================================
277 * Function: DNF_Distribute
278 * Description:
279 * Input:
280 * Output:
281 * ========================================================================= */
282
283
284#define ITER_LIMIT 500 /* if reach limit then probably in trouble */
285
286static void
287DNF_Distribute (bool_tree_node * tree)
288{
289 int i = 0;
290
291 /* multiple passes until no change */
292 /* not very smart :-( */
293 while (1)
294 {
295 if (AndDistribute (tree) == 0)
296 return;
297 if (i == ITER_LIMIT)
298 FatalError (1, "DNF infinite loop");
299 i++;
300 } /*while */
301
302}
303
304/* =========================================================================
305 * Function: AndDistribute
306 * Description:
307 * (a | b) & A <=> (a & A) | (b & A)
308 * Input:
309 * binary tree of "AND" , "OR"s.
310 * Output:
311 * return 1 if changed the tree
312 * return 0 if there was NO change (no distributive rule to apply)
313 * ========================================================================= */
314
315static int
316AndDistribute (bool_tree_node * tree)
317{
318 if (!tree)
319 return 0;
320
321 if (BOOL_TAG (tree) != N_and)
322 {
323 /* try the children if got some */
324 if (BOOL_TAG (tree) == N_or)
325 {
326 bool_tree_node *left_child = BOOL_CHILD (tree);
327 int left = AndDistribute (left_child);
328 int right = AndDistribute (BOOL_SIBLING (left_child));
329 return (left || right);
330 }
331
332 /* no children, then no change */
333 return 0;
334 }
335
336 /* AND case */
337 {
338 /* we have to check the children to see if can distribute */
339 bool_tree_node *left_child = BOOL_CHILD (tree);
340 bool_tree_node *right_child = BOOL_SIBLING (left_child);
341 int swap = 0;
342
343 /* If "or" on the right side then do a swap */
344 /* This is done for code simplicity */
345 if (BOOL_TAG (right_child) == N_or)
346 {
347 /* do a swap */
348 bool_tree_node *temp;
349 temp = right_child;
350 right_child = left_child;
351 left_child = temp;
352
353 BOOL_SIBLING (left_child) = right_child;
354 BOOL_SIBLING (right_child) = NULL;
355 BOOL_CHILD (tree) = left_child;
356
357 swap = 1;
358 }
359
360 /* do the actual distribution */
361 if (swap || BOOL_TAG (left_child) == N_or)
362 {
363 bool_tree_node *A = right_child;
364 bool_tree_node *A_copy = CopyBoolTree (A);
365 bool_tree_node *a = BOOL_CHILD (left_child);
366 bool_tree_node *b = BOOL_SIBLING (a);
367
368 /* then can distribute away ... */
369 BOOL_TAG (tree) = N_or;
370 BOOL_TAG (left_child) = N_and;
371 BOOL_SIBLING (left_child) = CreateBoolTreeNode (N_and, b, A);
372 BOOL_SIBLING (a) = A_copy;
373
374 AndDistribute (tree);
375 return 1;
376 }
377
378 /* if "and" of something other than "or" s then try the children */
379 {
380 int left = AndDistribute (left_child);
381 int right = AndDistribute (right_child);
382 return (left || right);
383 }
384 } /* AND case */
385
386} /*AndDistribute */
387
388/* =========================================================================
389 * Function: CollapseTree
390 * Description:
391 * Collapse "and" and "or"s.
392 * Input:
393 * Output:
394 * ========================================================================= */
395
396static void
397CollapseTree (bool_tree_node * tree)
398{
399 if (!tree)
400 return;
401
402 if ((BOOL_TAG (tree) == N_and) || (BOOL_TAG (tree) == N_or))
403 {
404 bool_tree_node *child = BOOL_CHILD (tree);
405 while (child)
406 {
407 while (BOOL_TAG (child) == BOOL_TAG (tree))
408 {
409 ReplaceWithChildren (child);
410 }
411 CollapseTree (child);
412 child = BOOL_SIBLING (child);
413 }
414 }
415
416 /* assume "not"s permeated */
417 return;
418}
419
420/* =========================================================================
421 * Function: ReplaceWithChildren
422 * Description:
423 * Input:
424 * Output:
425 * ========================================================================= */
426
427static void
428ReplaceWithChildren (bool_tree_node * tree)
429{
430 bool_tree_node *next = BOOL_SIBLING (tree);
431 bool_tree_node *child = BOOL_CHILD (tree);
432 bool_tree_node *last = LastBoolSibling (child);
433
434 bcopy ((char *) child, (char *) tree, sizeof (bool_tree_node));
435 Xfree (child);
436 BOOL_SIBLING (last) = next;
437
438}
439
440/* =========================================================================
441 * Function: TF_Idempotent
442 * Description:
443 * Applies Idempotency laws which refer to True or False (hence "TF")
444 * uses the true or false idempotent rules
445 * Input:
446 * operates on n-ary trees (not restricted to binary)
447 * Output:
448 * ========================================================================= */
449
450static void
451TF_Idempotent (bool_tree_node * tree)
452{
453 bool_tree_node *child = NULL;
454
455 while (tree)
456 {
457 if (BOOL_HAS_CHILD (tree))
458 {
459 child = BOOL_CHILD (tree);
460 TF_Idempotent (child);
461 }
462
463 /* --- process node --- */
464 switch (BOOL_TAG (tree))
465 {
466 case N_not:
467 child = BOOL_CHILD (tree);
468
469 /* if true then falsify */
470 if (BOOL_TAG (child) == N_all)
471 {
472 BOOL_TAG (tree) = N_none;
473 BOOL_CHILD (tree) = NULL;
474 Xfree (child);
475 break;
476 }
477
478 /* if false then trueify */
479 if (BOOL_TAG (child) == N_none)
480 {
481 BOOL_TAG (tree) = N_all;
482 BOOL_CHILD (tree) = NULL;
483 Xfree (child);
484 break;
485 }
486
487 break;
488 case N_and:
489 AndTrueFalse (tree);
490 break;
491 case N_or:
492 OrTrueFalse (tree);
493 break;
494
495 default:
496 /* do nothing */
497 break;
498 } /*switch */
499
500 tree = BOOL_SIBLING (tree);
501 } /*while */
502} /*TF_Idempotent */
503
504/* =========================================================================
505 * Function: AndTrueFalse
506 * Description:
507 * If any "false" then falsify whole tree
508 * If any "true" then delete them
509 * Input:
510 * Output:
511 * ========================================================================= */
512
513static void
514AndTrueFalse (bool_tree_node * and_tree)
515{
516 bool_tree_node *child = BOOL_CHILD (and_tree);
517
518 bool_tree_node *prev = NULL;
519
520 while (child)
521 {
522 if (BOOL_TAG (child) == N_none)
523 {
524 FreeBoolTree (&(BOOL_CHILD (and_tree)));
525 BOOL_TAG (and_tree) = N_none;
526 return;
527 }
528
529 if (BOOL_TAG (child) == N_all)
530 {
531 bool_tree_node *sibling = BOOL_SIBLING (child);
532
533 /* --- link over child --- */
534 if (!prev)
535 {
536 BOOL_CHILD (and_tree) = sibling;
537 }
538 else
539 {
540 BOOL_SIBLING (prev) = sibling;
541 }
542
543 Xfree (child);
544 child = sibling;
545 }
546 else
547 {
548 prev = child;
549 child = BOOL_SIBLING (child);
550 }
551 } /*while */
552
553 child = BOOL_CHILD (and_tree);
554
555 /* if no children ... */
556 /* => all have been true */
557 if (!child)
558 {
559 BOOL_TAG (and_tree) = N_all;
560 return;
561 }
562
563 /* If only one child ... */
564 if (!BOOL_SIBLING (child))
565 {
566 /* replace the parent with child - move it up */
567 bool_tree_node *save_sibling = BOOL_SIBLING (and_tree);
568 bcopy ((char *) child, (char *) and_tree, sizeof (bool_tree_node));
569 BOOL_SIBLING (and_tree) = save_sibling;
570 Xfree (child);
571 }
572
573} /*AndTrueFalse */
574
575/* =========================================================================
576 * Function: OrTrueFalse
577 * Description:
578 * If any "true" then trueify whole tree
579 * If any "false" then delete them
580 * ***** NOTE: This is a reflection of AndTrueFalse *****
581 * Input:
582 * Output:
583 * ========================================================================= */
584
585static void
586OrTrueFalse (bool_tree_node * or_tree)
587{
588 bool_tree_node *child = BOOL_CHILD (or_tree);
589
590 bool_tree_node *prev = NULL;
591
592 while (child)
593 {
594 if (BOOL_TAG (child) == N_all)
595 {
596 FreeBoolTree (&(BOOL_CHILD (or_tree)));
597 BOOL_TAG (or_tree) = N_all;
598 return;
599 }
600
601 if (BOOL_TAG (child) == N_none)
602 {
603 bool_tree_node *sibling = BOOL_SIBLING (child);
604 if (!prev)
605 {
606 BOOL_CHILD (or_tree) = sibling;
607 }
608 else
609 {
610 BOOL_SIBLING (prev) = sibling;
611 }
612 Xfree (child);
613 child = sibling;
614
615 }
616 else
617 {
618 prev = child;
619 child = BOOL_SIBLING (child);
620 }
621 } /*while */
622
623 child = BOOL_CHILD (or_tree);
624
625 /* if no children ... */
626 /* => all have been false */
627 if (!child)
628 {
629 BOOL_TAG (or_tree) = N_none;
630 return;
631 }
632
633 /* If only one child ... */
634 if (!BOOL_SIBLING (child))
635 {
636 /* replace the parent with child - move it up */
637 bool_tree_node *save_sibling = BOOL_SIBLING (or_tree);
638 bcopy ((char *) child, (char *) or_tree, sizeof (bool_tree_node));
639 BOOL_SIBLING (or_tree) = save_sibling;
640 Xfree (child);
641 }
642
643} /*AndTrueFalse */
644
645/* =========================================================================
646 * Function: AndNotToDiff
647 * Description:
648 * Convert an "And" tree with possibly some negated terms into
649 * a "And - neg-terms".
650 * Input:
651 * Expects "And" of only literals - as tree should be in DNF form.
652 * This means that no recursion is necessary.
653 * Output:
654 * ========================================================================= */
655
656static void
657AndNotToDiff (bool_tree_node * and_tree, TreeFunc recurs_func)
658{
659
660 /* -------------------------------------------------- */
661 /* --- Divide the children up into positive and negative literals --- */
662 /* Preserves ordering of children */
663 bool_tree_node *pos = NULL;
664 bool_tree_node *neg = NULL;
665 bool_tree_node *last_pos = NULL;
666 bool_tree_node *last_neg = NULL;
667
668 bool_tree_node *child = BOOL_CHILD (and_tree);
669 while (child)
670 {
671 bool_tree_node *next = BOOL_SIBLING (child);
672 if (BOOL_TAG (child) == N_not)
673 {
674 /* add to end of neg list */
675 if (!neg)
676 neg = child;
677 else
678 BOOL_SIBLING (last_neg) = child;
679 last_neg = child;
680 }
681 else
682 {
683 /* add to end of pos list */
684 if (!pos)
685 pos = child;
686 else
687 BOOL_SIBLING (last_pos) = child;
688 last_pos = child;
689 }
690 child = next;
691 } /*while */
692 if (last_pos)
693 BOOL_SIBLING (last_pos) = NULL;
694 if (last_neg)
695 BOOL_SIBLING (last_neg) = NULL;
696 /* -------------------------------------------------- */
697
698 if (!neg) /* no negtives => no subtraction */
699 {
700 /* fix up parent's child link */
701 BOOL_CHILD (and_tree) = pos;
702 return;
703 }
704
705 if (!pos) /* no positives = all negatives */
706 {
707 /* Choose one of the negs for the left-hand-side of the subtraction */
708 /* In particular, choose the first */
709 pos = neg;
710 neg = BOOL_SIBLING (neg);
711 BOOL_SIBLING (pos) = NULL;
712 }
713
714 /* Get rid of all '!' from not nodes */
715 /* Make a list out of the children */
716 {
717 bool_tree_node *n = neg;
718 bool_tree_node *next = NULL;
719
720 neg = BOOL_CHILD (neg);
721 while (n)
722 {
723 bool_tree_node *c = BOOL_CHILD (n);
724 next = BOOL_SIBLING (n);
725 Xfree (n);
726 if (next)
727 BOOL_SIBLING (c) = BOOL_CHILD (next);
728 n = next;
729 }
730 }
731
732 /* --- restructure the tree with "and" and "diff" --- */
733 BOOL_TAG (and_tree) = N_diff;
734 if (!BOOL_SIBLING (pos)) /* i.e. only one pos */
735 {
736 /* (A & !B & !C & !D & ...) ==> A - (B,C,D,...) */
737
738 BOOL_CHILD (and_tree) = pos;
739 BOOL_SIBLING (pos) = neg;
740 }
741 else
742 {
743 /* (A & !C & B & !D & ...) ==> (A & B & ....) - (C, D, ...) */
744
745 bool_tree_node *n = CreateBoolNode (N_and);
746 BOOL_CHILD (and_tree) = n;
747 BOOL_CHILD (n) = pos;
748 BOOL_SIBLING (n) = neg;
749 } /*if */
750
751 if (recurs_func)
752 {
753 recurs_func (BOOL_CHILD (and_tree));
754 }
755
756} /*AndNotToDiff */
757
758/* =========================================================================
759 * Function: DiffTree
760 * Description:
761 * Applies "AndNotToNeg" to each conjunct.
762 * Unlike DiffDNF, this routine will search throughout the tree for
763 * "and"-nodes. It will call AndNotToNeg with this as its recursive
764 * routine.
765 * Date: 27/July/95
766 * Input:
767 * Output:
768 * ========================================================================= */
769
770static void
771DiffTree (bool_tree_node * tree)
772{
773 bool_tree_node *t = tree;
774
775 while (t)
776 {
777 if (BOOL_TAG (t) == N_and)
778 {
779 AndNotToDiff (t, DiffTree);
780 }
781 t = BOOL_SIBLING (t);
782 } /*while */
783
784} /*DiffTree */
785
786/* =========================================================================
787 * Function: DiffDNF
788 * Description:
789 * Applies "AndNotToNeg" to each conjunct.
790 * Input:
791 * A DNF tree
792 * Output:
793 * A tree possibly with some diffs in it.
794 * ========================================================================= */
795
796static void
797DiffDNF (bool_tree_node * DNF_tree)
798{
799
800 if (BOOL_TAG (DNF_tree) == N_or)
801 {
802 bool_tree_node *child = BOOL_CHILD (DNF_tree);
803
804 while (child)
805 {
806 if (BOOL_TAG (child) == N_and)
807 {
808 AndNotToDiff (child, NULL);
809 }
810 child = BOOL_SIBLING (child);
811 }
812 }
813 else if (BOOL_TAG (DNF_tree) == N_and)
814 {
815 AndNotToDiff (DNF_tree, NULL);
816 }
817}
818
819/* =========================================================================
820 * Function: AndSort_comp
821 * Description:
822 * Input:
823 * Output:
824 * ========================================================================= */
825
826/* Module variable, "term_list", used as parameter for comparison function */
827/* The comparison function only allows certain fixed parameters */
828static TermList *term_list;
829
830static int
831AndSort_comp (const void *A, const void *B)
832{
833 const bool_tree_node *const *a = A;
834 const bool_tree_node *const *b = B;
835
836 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_term)
837 {
838 /* look up in term list */
839 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
840 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
841
842 return a_doc_count - b_doc_count;
843 }
844
845 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) != N_term)
846 return -1;
847
848 if (BOOL_TAG (*a) != N_term && BOOL_TAG (*b) == N_term)
849 return 1;
850
851 return 0;
852} /*AndSort_comp */
853
854/* =========================================================================
855 * Function: AndSort
856 * Description:
857 * Sort the list of nodes by increasing doc_count
858 * Using some Neil Sharman code - pretty straight forward.
859 * Note: not-terms are sent to the end of the list
860 * Input:
861 * Output:
862 * ========================================================================= */
863
864static void
865AndSort (bool_tree_node * and_tree, TermList * tl, CompFunc comp)
866{
867 bool_tree_node **list = NULL;
868 bool_tree_node *child = NULL;
869 int count = 0;
870 int i = 0;
871
872 if (BOOL_TAG (and_tree) != N_and)
873 return;
874
875 /* --- Convert linked list into an array for sorting --- */
876 child = BOOL_CHILD (and_tree);
877 count = 0;
878 while (child)
879 {
880 count++;
881 child = BOOL_SIBLING (child);
882 }
883
884 if (!(list = Xmalloc (sizeof (bool_tree_node *) * count)))
885 return;
886
887 child = BOOL_CHILD (and_tree);
888 for (i = 0; i < count; i++)
889 {
890 list[i] = child;
891 child = BOOL_SIBLING (child);
892 }
893 /* ----------------------------------------------------- */
894
895 term_list = tl;
896 qsort (list, count, sizeof (bool_tree_node *), comp);
897
898 /* --- Convert array back into linked list --- */
899 for (i = 0; i < count - 1; i++)
900 BOOL_SIBLING (list[i]) = list[i + 1];
901 BOOL_SIBLING (list[count - 1]) = NULL;
902 BOOL_CHILD (and_tree) = list[0];
903 Xfree (list);
904 /* ----------------------------------------------------- */
905
906} /*AndSort */
907
908/* =========================================================================
909 * Function: AndSortDNF
910 * Description:
911 * Input:
912 * Output:
913 * ========================================================================= */
914
915
916static void
917AndSortDNF (bool_tree_node * DNF_tree, TermList * tl)
918{
919
920 if (BOOL_TAG (DNF_tree) == N_or)
921 {
922 bool_tree_node *child = BOOL_CHILD (DNF_tree);
923
924 while (child)
925 {
926 if (BOOL_TAG (child) == N_and)
927 {
928 AndSort (child, tl, AndSort_comp);
929 }
930 child = BOOL_SIBLING (child);
931 }
932 }
933 else if (BOOL_TAG (DNF_tree) == N_and)
934 {
935 AndSort (DNF_tree, tl, AndSort_comp);
936 }
937}
938
939/* =========================================================================
940 * Function: MarkOrTerms
941 * Description:
942 * Mark or-nodes whose children are all terms.
943 * Also store the sum of the ft's of these nodes.
944 * Input:
945 * Output:
946 * ========================================================================= */
947
948static void
949MarkOrTerms (bool_tree_node * tree, TermList * tl)
950{
951 if (BOOL_TAG (tree) == N_or)
952 {
953 int found_non_term = 0;
954 bool_tree_node *child = BOOL_CHILD (tree);
955
956 /* --- check to see if got all terms --- */
957 while (child)
958 {
959 if (BOOL_TAG (child) != N_term)
960 {
961 found_non_term = 1;
962 }
963 if (BOOL_HAS_CHILD (tree))
964 {
965 MarkOrTerms (child, tl);
966 }
967 child = BOOL_SIBLING (child);
968 } /*while */
969
970 if (!found_non_term)
971 {
972 /* Must have all terms for the OR node */
973 /* So change its tag and calculate its sum */
974 BOOL_TAG (tree) = N_or_terms;
975 BOOL_SUM_FT (tree) = calc_sum_ft (tree, tl);
976 } /*if */
977 /* ------------------------------------- */
978
979 } /*if OR node */
980
981 else if (BOOL_HAS_CHILD (tree))
982 {
983 bool_tree_node *child = BOOL_CHILD (tree);
984 while (child)
985 {
986 MarkOrTerms (child, tl);
987 child = BOOL_SIBLING (child);
988 }
989 }
990
991} /*MarkOrTerms */
992
993/* =========================================================================
994 * Function: calc_sum_ft
995 * Description:
996 * Sums up the ft's of all the terms
997 * Input:
998 * Assumes that the tree only contains terms in it
999 * Output:
1000 * the ft sum
1001 * ========================================================================= */
1002static int
1003calc_sum_ft (bool_tree_node * term_tree, TermList * tl)
1004{
1005 int sum_ft = 0;
1006 bool_tree_node *child = BOOL_CHILD (term_tree);
1007 while (child)
1008 {
1009 sum_ft += GetNthWE (tl, BOOL_TERM (child))->doc_count;
1010 child = BOOL_SIBLING (child);
1011 }
1012 return sum_ft;
1013}
1014
1015/* =========================================================================
1016 * Function: AndOrSort
1017 * Description:
1018 * This is like an AndSort of just terms.
1019 * The difference is that we are now also
1020 * considering sorting with <or of terms> by using the sum of f_t's.
1021 * Input:
1022 * Output:
1023 * ========================================================================= */
1024static void
1025AndOrSort (bool_tree_node * tree, TermList * tl)
1026{
1027 if (BOOL_HAS_CHILD (tree))
1028 {
1029
1030 if (BOOL_TAG (tree) == N_and)
1031 {
1032 AndSort (tree, tl, AndOrSort_comp);
1033 }
1034
1035 /* Check out any children down the tree */
1036 /* Probably a waste of time */
1037 {
1038 bool_tree_node *child = BOOL_CHILD (tree);
1039 while (child)
1040 {
1041 AndOrSort (child, tl);
1042 child = BOOL_SIBLING (child);
1043 }
1044 } /*do the tree */
1045
1046 } /*if has children */
1047} /*AndOrSort */
1048
1049/* =========================================================================
1050 * Function: AndOrSort_comp
1051 * Description:
1052 * Comparison function used to order "term"s and "or_terms"s.
1053 * It's purpose is to be used by a sort routine.
1054 * Input:
1055 * Output:
1056 * ========================================================================= */
1057static int
1058AndOrSort_comp (const void *A, const void *B)
1059{
1060 const bool_tree_node *const *a = A;
1061 const bool_tree_node *const *b = B;
1062
1063 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_term)
1064 {
1065 /* look up in term list */
1066 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
1067 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
1068
1069 return a_doc_count - b_doc_count;
1070 }
1071
1072 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_or_terms)
1073 {
1074 /* look up in term list */
1075 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
1076 int b_doc_count = BOOL_SUM_FT (*b);
1077
1078 return a_doc_count - b_doc_count;
1079 }
1080
1081 if (BOOL_TAG (*a) == N_or_terms && BOOL_TAG (*b) == N_term)
1082 {
1083 /* look up in term list */
1084 int a_doc_count = BOOL_SUM_FT (*a);
1085 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
1086
1087 return a_doc_count - b_doc_count;
1088 }
1089
1090 if (BOOL_TAG (*a) == N_or_terms && BOOL_TAG (*b) == N_or_terms)
1091 {
1092 /* look up in term list */
1093 int a_doc_count = BOOL_SUM_FT (*a);
1094 int b_doc_count = BOOL_SUM_FT (*b);
1095
1096 return a_doc_count - b_doc_count;
1097 }
1098
1099 if ((BOOL_TAG (*a) == N_term || BOOL_TAG (*a) == N_or_terms) &&
1100 (BOOL_TAG (*b) != N_term && BOOL_TAG (*b) != N_or_terms))
1101 return -1;
1102
1103 if ((BOOL_TAG (*a) != N_term && BOOL_TAG (*a) != N_or_terms) &&
1104 (BOOL_TAG (*b) == N_term || BOOL_TAG (*b) == N_or_terms))
1105 return 1;
1106
1107 return 0;
1108} /*AndOrSort_comp */
Note: See TracBrowser for help on using the repository browser.