source: main/tags/2.80/indexers/mg/src/text/bool_optimiser.c@ 24541

Last change on this file since 24541 was 3745, checked in by mdewsnip, 21 years ago

Addition of MG package for search and retrieval

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 28.3 KB
Line 
1/**************************************************************************
2 *
3 * bool_optimiser -- optimise boolean parse tree
4 * Copyright (C) 1994,95 Tim Shimmin
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU General Public License as published by
8 * the Free Software Foundation; either version 2 of the License, or
9 * (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU General Public License for more details.
15 *
16 * You should have received a copy of the GNU General Public License
17 * along with this program; if not, write to the Free Software
18 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19 *
20 * $Id: bool_optimiser.c 3745 2003-02-20 21:20:24Z mdewsnip $
21 *
22 **************************************************************************/
23
24/*
25 $Log$
26 Revision 1.1 2003/02/20 21:18:23 mdewsnip
27 Addition of MG package for search and retrieval
28
29 Revision 1.1 1999/08/10 21:17:42 sjboddie
30 renamed mg-1.3d directory mg
31
32 Revision 1.1 1998/11/17 09:34:20 rjmcnab
33 *** empty log message ***
34
35 * Revision 1.4 1995/07/27 04:54:55 tes
36 * Committing optimiser before I add the code
37 * to find diff-nodes in any kind of tree.
38 *
39 * Revision 1.3 1994/10/20 00:02:55 tes
40 * Fixed bug with bool_optimiser:
41 * - it couldn't handle "false | false"
42 * - it couldn't handle "true & true"
43 * These would collapse into an empty list and cause an error.
44 *
45 * Revision 1.2 1994/10/18 06:11:02 tes
46 * The boolean optimiser seems to be modifying the parse tree
47 * like it is supposed to.
48 * Paragraph ranking now works without any text files if required to.
49 *
50 * Revision 1.1 1994/10/12 01:15:28 tes
51 * Found bugs in the existing boolean query optimiser.
52 * So decided to rewrite it.
53 * I accidentally deleted query.bool.y, but I have replaced it
54 * with bool_parser.y (which I have forgotten to add here ! ;-(
55 *
56 */
57
58static char *RCSID = "$Id: bool_optimiser.c 3745 2003-02-20 21:20:24Z mdewsnip $";
59
60#include "sysfuncs.h"
61
62#include "memlib.h"
63#include "bool_tree.h"
64#include "term_lists.h"
65#include "messages.h"
66
67typedef int (*CompFunc) (const void *, const void *);
68typedef void (*TreeFunc) (bool_tree_node *);
69
70/* --- routines --- */
71static void PermeateNots (bool_tree_node * tree);
72static void DoubleNeg (bool_tree_node * not_tree);
73static void AndDeMorgan (bool_tree_node * not_tree);
74static void OrDeMorgan (bool_tree_node * not_tree);
75static int AndDistribute (bool_tree_node * tree);
76static void DNF_Distribute (bool_tree_node * tree);
77static void ReplaceWithChildren (bool_tree_node * tree);
78static void CollapseTree (bool_tree_node * tree);
79static void AndTrueFalse (bool_tree_node * and_tree);
80static void OrTrueFalse (bool_tree_node * or_tree);
81static void TF_Idempotent (bool_tree_node * tree);
82static void AndNotToDiff (bool_tree_node * and_tree, TreeFunc recurs_func);
83static void DiffDNF (bool_tree_node * or_node);
84static void DiffTree (bool_tree_node * tree);
85static void AndSort (bool_tree_node * and_tree, TermList * tl, CompFunc comp);
86static void AndOrSort (bool_tree_node * tree, TermList * tl);
87static void AndSortDNF (bool_tree_node * or_node, TermList * tl);
88static void MarkOrTerms (bool_tree_node * tree, TermList * tl);
89static int calc_sum_ft (bool_tree_node * term_tree, TermList * tl);
90static int AndOrSort_comp (const void *A, const void *B);
91static int AndSort_comp (const void *A, const void *B);
92
93/* =========================================================================
94 * Function: OptimiseBoolTree
95 * Description:
96 * For case 2:
97 * Do three major steps:
98 * (i) put into standard form
99 * -> put into DNF (disjunctive normal form - or of ands)
100 * Use DeMorgan's, Double-negative, Distributive rules
101 * (ii) simplify
102 * apply idempotency rules
103 * (iii) ameliorate
104 * convert &! to diff nodes, order terms by frequency,...
105 * Could also do the matching idempotency laws i.e. ...
106 * (A | A), (A | !A), (A & !A), (A & A), (A & (A | B)), (A | (A & B))
107 * Job for future.... ;-)
108 * Input:
109 * Output:
110 * ========================================================================= */
111
112void
113OptimiseBoolTree (bool_tree_node * parse_tree, TermList * tl, int type)
114{
115 switch (type)
116 {
117 case 1:
118 /* --- sorts for and of or-terms --- */
119 PermeateNots (parse_tree);
120 TF_Idempotent (parse_tree);
121 CollapseTree (parse_tree);
122 DiffTree (parse_tree);
123 MarkOrTerms (parse_tree, tl);
124 AndOrSort (parse_tree, tl);
125 break;
126 case 2:
127 /* --- put into DNF --- */
128 PermeateNots (parse_tree);
129 TF_Idempotent (parse_tree);
130 DNF_Distribute (parse_tree);
131 CollapseTree (parse_tree);
132 AndSortDNF (parse_tree, tl);
133 DiffDNF (parse_tree);
134 break;
135 default:
136 /* --- do nothing --- */
137 break;
138 } /*switch */
139} /*OptimiseBoolTree */
140
141/* =========================================================================
142 * Function: DoubleNeg
143 * Description:
144 * !(!(a) = a
145 * Assumes binary tree.
146 * Input:
147 * Output:
148 * ========================================================================= */
149static void
150DoubleNeg (bool_tree_node * not_tree)
151{
152 bool_tree_node *child = BOOL_CHILD (not_tree);
153 bool_tree_node *a = BOOL_CHILD (child);
154 bool_tree_node *sibling = BOOL_SIBLING (not_tree);
155
156 /* copy a into not tree */
157 bcopy (a, not_tree, sizeof (bool_tree_node));
158
159 BOOL_SIBLING (not_tree) = sibling;
160
161 Xfree (child);
162 Xfree (a);
163
164}
165
166/* =========================================================================
167 * Function: AndDeMorgan
168 * Description:
169 * DeMorgan's rule for 'not' of an 'and' i.e. !(a & b) <=> (!a) | (!b)
170 * Assumes Binary Tree
171 * Input:
172 * not of and tree
173 * Output:
174 * or of not trees
175 * ========================================================================= */
176
177static void
178AndDeMorgan (bool_tree_node * not_tree)
179{
180 bool_tree_node *child = BOOL_CHILD (not_tree);
181 bool_tree_node *a = BOOL_CHILD (child);
182 bool_tree_node *b = BOOL_SIBLING (a);
183 bool_tree_node *new_not = NULL;
184
185 /* change 'not' to 'or' */
186 BOOL_TAG (not_tree) = N_or;
187
188 /* change 'and' to 'not' */
189 BOOL_TAG (child) = N_not;
190
191 /* 'a' no longer has a sibling */
192 BOOL_SIBLING (a) = NULL;
193
194 /* 'a's old sibling is now a child of a new not */
195 new_not = CreateBoolTreeNode (N_not, b, NULL);
196 BOOL_SIBLING (child) = new_not;
197}
198
199/* =========================================================================
200 * Function: OrDeMorgan
201 * Description:
202 * DeMorgan's rule for 'not' of an 'or' i.e. !(a | b) <=> (!a) & (!b)
203 * Assumes Binary Tree
204 * Input:
205 * not of and tree
206 * Output:
207 * or of not trees
208 * ========================================================================= */
209
210static void
211OrDeMorgan (bool_tree_node * not_tree)
212{
213 bool_tree_node *child = BOOL_CHILD (not_tree);
214 bool_tree_node *a = BOOL_CHILD (child);
215 bool_tree_node *b = BOOL_SIBLING (a);
216 bool_tree_node *new_not = NULL;
217
218 /* change 'not' to 'and' */
219 BOOL_TAG (not_tree) = N_and;
220
221 /* change 'or' to 'not' */
222 BOOL_TAG (child) = N_not;
223
224 /* 'a' no longer has a sibling */
225 BOOL_SIBLING (a) = NULL;
226
227 /* 'a's old sibling is now a child of a new not */
228 new_not = CreateBoolTreeNode (N_not, b, NULL);
229 BOOL_SIBLING (child) = new_not;
230}
231
232/* =========================================================================
233 * Function: PermeateNots
234 * Description:
235 * Use DeMorgan's and Double-negative
236 * Assumes tree in binary form (i.e. No ands/ors collapsed)
237 * Input:
238 * Output:
239 * ========================================================================= */
240
241static void
242PermeateNots (bool_tree_node * tree)
243{
244 if (!tree)
245 return;
246
247 if (BOOL_TAG (tree) == N_not)
248 {
249 switch (BOOL_TAG (BOOL_CHILD (tree)))
250 {
251 case N_not:
252 DoubleNeg (tree);
253/** NOTE: I do not go on to do the children **/
254 /* I permeate the nots on the tree here & leave it at that */
255 /* In this case I do not know what type 'tree' will end up */
256 PermeateNots (tree);
257 return;
258 case N_and:
259 AndDeMorgan (tree);
260 break;
261 case N_or:
262 OrDeMorgan (tree);
263 break;
264 default:
265 /* leave alone */
266 break;
267 }
268 }
269
270 /* Permeate Nots for children */
271 if (BOOL_HAS_CHILD (tree))
272 {
273 PermeateNots (BOOL_CHILD (tree));
274 PermeateNots (BOOL_SIBLING (BOOL_CHILD (tree)));
275 }
276
277}
278
279/* =========================================================================
280 * Function: DNF_Distribute
281 * Description:
282 * Input:
283 * Output:
284 * ========================================================================= */
285
286
287#define ITER_LIMIT 500 /* if reach limit then probably in trouble */
288
289static void
290DNF_Distribute (bool_tree_node * tree)
291{
292 int i = 0;
293
294 /* multiple passes until no change */
295 /* not very smart :-( */
296 while (1)
297 {
298 if (AndDistribute (tree) == 0)
299 return;
300 if (i == ITER_LIMIT)
301 FatalError (1, "DNF infinite loop");
302 i++;
303 } /*while */
304
305}
306
307/* =========================================================================
308 * Function: AndDistribute
309 * Description:
310 * (a | b) & A <=> (a & A) | (b & A)
311 * Input:
312 * binary tree of "AND" , "OR"s.
313 * Output:
314 * return 1 if changed the tree
315 * return 0 if there was NO change (no distributive rule to apply)
316 * ========================================================================= */
317
318static int
319AndDistribute (bool_tree_node * tree)
320{
321 if (!tree)
322 return 0;
323
324 if (BOOL_TAG (tree) != N_and)
325 {
326 /* try the children if got some */
327 if (BOOL_TAG (tree) == N_or)
328 {
329 bool_tree_node *left_child = BOOL_CHILD (tree);
330 int left = AndDistribute (left_child);
331 int right = AndDistribute (BOOL_SIBLING (left_child));
332 return (left || right);
333 }
334
335 /* no children, then no change */
336 return 0;
337 }
338
339 /* AND case */
340 {
341 /* we have to check the children to see if can distribute */
342 bool_tree_node *left_child = BOOL_CHILD (tree);
343 bool_tree_node *right_child = BOOL_SIBLING (left_child);
344 int swap = 0;
345
346 /* If "or" on the right side then do a swap */
347 /* This is done for code simplicity */
348 if (BOOL_TAG (right_child) == N_or)
349 {
350 /* do a swap */
351 bool_tree_node *temp;
352 temp = right_child;
353 right_child = left_child;
354 left_child = temp;
355
356 BOOL_SIBLING (left_child) = right_child;
357 BOOL_SIBLING (right_child) = NULL;
358 BOOL_CHILD (tree) = left_child;
359
360 swap = 1;
361 }
362
363 /* do the actual distribution */
364 if (swap || BOOL_TAG (left_child) == N_or)
365 {
366 bool_tree_node *A = right_child;
367 bool_tree_node *A_copy = CopyBoolTree (A);
368 bool_tree_node *a = BOOL_CHILD (left_child);
369 bool_tree_node *b = BOOL_SIBLING (a);
370
371 /* then can distribute away ... */
372 BOOL_TAG (tree) = N_or;
373 BOOL_TAG (left_child) = N_and;
374 BOOL_SIBLING (left_child) = CreateBoolTreeNode (N_and, b, A);
375 BOOL_SIBLING (a) = A_copy;
376
377 AndDistribute (tree);
378 return 1;
379 }
380
381 /* if "and" of something other than "or" s then try the children */
382 {
383 int left = AndDistribute (left_child);
384 int right = AndDistribute (right_child);
385 return (left || right);
386 }
387 } /* AND case */
388
389} /*AndDistribute */
390
391/* =========================================================================
392 * Function: CollapseTree
393 * Description:
394 * Collapse "and" and "or"s.
395 * Input:
396 * Output:
397 * ========================================================================= */
398
399static void
400CollapseTree (bool_tree_node * tree)
401{
402 if (!tree)
403 return;
404
405 if ((BOOL_TAG (tree) == N_and) || (BOOL_TAG (tree) == N_or))
406 {
407 bool_tree_node *child = BOOL_CHILD (tree);
408 while (child)
409 {
410 while (BOOL_TAG (child) == BOOL_TAG (tree))
411 {
412 ReplaceWithChildren (child);
413 }
414 CollapseTree (child);
415 child = BOOL_SIBLING (child);
416 }
417 }
418
419 /* assume "not"s permeated */
420 return;
421}
422
423/* =========================================================================
424 * Function: ReplaceWithChildren
425 * Description:
426 * Input:
427 * Output:
428 * ========================================================================= */
429
430static void
431ReplaceWithChildren (bool_tree_node * tree)
432{
433 bool_tree_node *next = BOOL_SIBLING (tree);
434 bool_tree_node *child = BOOL_CHILD (tree);
435 bool_tree_node *last = LastBoolSibling (child);
436
437 bcopy ((char *) child, (char *) tree, sizeof (bool_tree_node));
438 Xfree (child);
439 BOOL_SIBLING (last) = next;
440
441}
442
443/* =========================================================================
444 * Function: TF_Idempotent
445 * Description:
446 * Applies Idempotency laws which refer to True or False (hence "TF")
447 * uses the true or false idempotent rules
448 * Input:
449 * operates on n-ary trees (not restricted to binary)
450 * Output:
451 * ========================================================================= */
452
453static void
454TF_Idempotent (bool_tree_node * tree)
455{
456 bool_tree_node *child = NULL;
457
458 while (tree)
459 {
460 if (BOOL_HAS_CHILD (tree))
461 {
462 child = BOOL_CHILD (tree);
463 TF_Idempotent (child);
464 }
465
466 /* --- process node --- */
467 switch (BOOL_TAG (tree))
468 {
469 case N_not:
470 child = BOOL_CHILD (tree);
471
472 /* if true then falsify */
473 if (BOOL_TAG (child) == N_all)
474 {
475 BOOL_TAG (tree) = N_none;
476 BOOL_CHILD (tree) = NULL;
477 Xfree (child);
478 break;
479 }
480
481 /* if false then trueify */
482 if (BOOL_TAG (child) == N_none)
483 {
484 BOOL_TAG (tree) = N_all;
485 BOOL_CHILD (tree) = NULL;
486 Xfree (child);
487 break;
488 }
489
490 break;
491 case N_and:
492 AndTrueFalse (tree);
493 break;
494 case N_or:
495 OrTrueFalse (tree);
496 break;
497
498 default:
499 /* do nothing */
500 break;
501 } /*switch */
502
503 tree = BOOL_SIBLING (tree);
504 } /*while */
505} /*TF_Idempotent */
506
507/* =========================================================================
508 * Function: AndTrueFalse
509 * Description:
510 * If any "false" then falsify whole tree
511 * If any "true" then delete them
512 * Input:
513 * Output:
514 * ========================================================================= */
515
516static void
517AndTrueFalse (bool_tree_node * and_tree)
518{
519 bool_tree_node *child = BOOL_CHILD (and_tree);
520
521 bool_tree_node *prev = NULL;
522
523 while (child)
524 {
525 if (BOOL_TAG (child) == N_none)
526 {
527 FreeBoolTree (&(BOOL_CHILD (and_tree)));
528 BOOL_TAG (and_tree) = N_none;
529 return;
530 }
531
532 if (BOOL_TAG (child) == N_all)
533 {
534 bool_tree_node *sibling = BOOL_SIBLING (child);
535
536 /* --- link over child --- */
537 if (!prev)
538 {
539 BOOL_CHILD (and_tree) = sibling;
540 }
541 else
542 {
543 BOOL_SIBLING (prev) = sibling;
544 }
545
546 Xfree (child);
547 child = sibling;
548 }
549 else
550 {
551 prev = child;
552 child = BOOL_SIBLING (child);
553 }
554 } /*while */
555
556 child = BOOL_CHILD (and_tree);
557
558 /* if no children ... */
559 /* => all have been true */
560 if (!child)
561 {
562 BOOL_TAG (and_tree) = N_all;
563 return;
564 }
565
566 /* If only one child ... */
567 if (!BOOL_SIBLING (child))
568 {
569 /* replace the parent with child - move it up */
570 bool_tree_node *save_sibling = BOOL_SIBLING (and_tree);
571 bcopy ((char *) child, (char *) and_tree, sizeof (bool_tree_node));
572 BOOL_SIBLING (and_tree) = save_sibling;
573 Xfree (child);
574 }
575
576} /*AndTrueFalse */
577
578/* =========================================================================
579 * Function: OrTrueFalse
580 * Description:
581 * If any "true" then trueify whole tree
582 * If any "false" then delete them
583 * ***** NOTE: This is a reflection of AndTrueFalse *****
584 * Input:
585 * Output:
586 * ========================================================================= */
587
588static void
589OrTrueFalse (bool_tree_node * or_tree)
590{
591 bool_tree_node *child = BOOL_CHILD (or_tree);
592
593 bool_tree_node *prev = NULL;
594
595 while (child)
596 {
597 if (BOOL_TAG (child) == N_all)
598 {
599 FreeBoolTree (&(BOOL_CHILD (or_tree)));
600 BOOL_TAG (or_tree) = N_all;
601 return;
602 }
603
604 if (BOOL_TAG (child) == N_none)
605 {
606 bool_tree_node *sibling = BOOL_SIBLING (child);
607 if (!prev)
608 {
609 BOOL_CHILD (or_tree) = sibling;
610 }
611 else
612 {
613 BOOL_SIBLING (prev) = sibling;
614 }
615 Xfree (child);
616 child = sibling;
617
618 }
619 else
620 {
621 prev = child;
622 child = BOOL_SIBLING (child);
623 }
624 } /*while */
625
626 child = BOOL_CHILD (or_tree);
627
628 /* if no children ... */
629 /* => all have been false */
630 if (!child)
631 {
632 BOOL_TAG (or_tree) = N_none;
633 return;
634 }
635
636 /* If only one child ... */
637 if (!BOOL_SIBLING (child))
638 {
639 /* replace the parent with child - move it up */
640 bool_tree_node *save_sibling = BOOL_SIBLING (or_tree);
641 bcopy ((char *) child, (char *) or_tree, sizeof (bool_tree_node));
642 BOOL_SIBLING (or_tree) = save_sibling;
643 Xfree (child);
644 }
645
646} /*AndTrueFalse */
647
648/* =========================================================================
649 * Function: AndNotToDiff
650 * Description:
651 * Convert an "And" tree with possibly some negated terms into
652 * a "And - neg-terms".
653 * Input:
654 * Expects "And" of only literals - as tree should be in DNF form.
655 * This means that no recursion is necessary.
656 * Output:
657 * ========================================================================= */
658
659static void
660AndNotToDiff (bool_tree_node * and_tree, TreeFunc recurs_func)
661{
662
663 /* -------------------------------------------------- */
664 /* --- Divide the children up into positive and negative literals --- */
665 /* Preserves ordering of children */
666 bool_tree_node *pos = NULL;
667 bool_tree_node *neg = NULL;
668 bool_tree_node *last_pos = NULL;
669 bool_tree_node *last_neg = NULL;
670
671 bool_tree_node *child = BOOL_CHILD (and_tree);
672 while (child)
673 {
674 bool_tree_node *next = BOOL_SIBLING (child);
675 if (BOOL_TAG (child) == N_not)
676 {
677 /* add to end of neg list */
678 if (!neg)
679 neg = child;
680 else
681 BOOL_SIBLING (last_neg) = child;
682 last_neg = child;
683 }
684 else
685 {
686 /* add to end of pos list */
687 if (!pos)
688 pos = child;
689 else
690 BOOL_SIBLING (last_pos) = child;
691 last_pos = child;
692 }
693 child = next;
694 } /*while */
695 if (last_pos)
696 BOOL_SIBLING (last_pos) = NULL;
697 if (last_neg)
698 BOOL_SIBLING (last_neg) = NULL;
699 /* -------------------------------------------------- */
700
701 if (!neg) /* no negtives => no subtraction */
702 {
703 /* fix up parent's child link */
704 BOOL_CHILD (and_tree) = pos;
705 return;
706 }
707
708 if (!pos) /* no positives = all negatives */
709 {
710 /* Choose one of the negs for the left-hand-side of the subtraction */
711 /* In particular, choose the first */
712 pos = neg;
713 neg = BOOL_SIBLING (neg);
714 BOOL_SIBLING (pos) = NULL;
715 }
716
717 /* Get rid of all '!' from not nodes */
718 /* Make a list out of the children */
719 {
720 bool_tree_node *n = neg;
721 bool_tree_node *next = NULL;
722
723 neg = BOOL_CHILD (neg);
724 while (n)
725 {
726 bool_tree_node *c = BOOL_CHILD (n);
727 next = BOOL_SIBLING (n);
728 Xfree (n);
729 if (next)
730 BOOL_SIBLING (c) = BOOL_CHILD (next);
731 n = next;
732 }
733 }
734
735 /* --- restructure the tree with "and" and "diff" --- */
736 BOOL_TAG (and_tree) = N_diff;
737 if (!BOOL_SIBLING (pos)) /* i.e. only one pos */
738 {
739 /* (A & !B & !C & !D & ...) ==> A - (B,C,D,...) */
740
741 BOOL_CHILD (and_tree) = pos;
742 BOOL_SIBLING (pos) = neg;
743 }
744 else
745 {
746 /* (A & !C & B & !D & ...) ==> (A & B & ....) - (C, D, ...) */
747
748 bool_tree_node *n = CreateBoolNode (N_and);
749 BOOL_CHILD (and_tree) = n;
750 BOOL_CHILD (n) = pos;
751 BOOL_SIBLING (n) = neg;
752 } /*if */
753
754 if (recurs_func)
755 {
756 recurs_func (BOOL_CHILD (and_tree));
757 }
758
759} /*AndNotToDiff */
760
761/* =========================================================================
762 * Function: DiffTree
763 * Description:
764 * Applies "AndNotToNeg" to each conjunct.
765 * Unlike DiffDNF, this routine will search throughout the tree for
766 * "and"-nodes. It will call AndNotToNeg with this as its recursive
767 * routine.
768 * Date: 27/July/95
769 * Input:
770 * Output:
771 * ========================================================================= */
772
773static void
774DiffTree (bool_tree_node * tree)
775{
776 bool_tree_node *t = tree;
777
778 while (t)
779 {
780 if (BOOL_TAG (t) == N_and)
781 {
782 AndNotToDiff (t, DiffTree);
783 }
784 t = BOOL_SIBLING (t);
785 } /*while */
786
787} /*DiffTree */
788
789/* =========================================================================
790 * Function: DiffDNF
791 * Description:
792 * Applies "AndNotToNeg" to each conjunct.
793 * Input:
794 * A DNF tree
795 * Output:
796 * A tree possibly with some diffs in it.
797 * ========================================================================= */
798
799static void
800DiffDNF (bool_tree_node * DNF_tree)
801{
802
803 if (BOOL_TAG (DNF_tree) == N_or)
804 {
805 bool_tree_node *child = BOOL_CHILD (DNF_tree);
806
807 while (child)
808 {
809 if (BOOL_TAG (child) == N_and)
810 {
811 AndNotToDiff (child, NULL);
812 }
813 child = BOOL_SIBLING (child);
814 }
815 }
816 else if (BOOL_TAG (DNF_tree) == N_and)
817 {
818 AndNotToDiff (DNF_tree, NULL);
819 }
820}
821
822/* =========================================================================
823 * Function: AndSort_comp
824 * Description:
825 * Input:
826 * Output:
827 * ========================================================================= */
828
829/* Module variable, "term_list", used as parameter for comparison function */
830/* The comparison function only allows certain fixed parameters */
831static TermList *term_list;
832
833static int
834AndSort_comp (const void *A, const void *B)
835{
836 const bool_tree_node *const *a = A;
837 const bool_tree_node *const *b = B;
838
839 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_term)
840 {
841 /* look up in term list */
842 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
843 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
844
845 return a_doc_count - b_doc_count;
846 }
847
848 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) != N_term)
849 return -1;
850
851 if (BOOL_TAG (*a) != N_term && BOOL_TAG (*b) == N_term)
852 return 1;
853
854 return 0;
855} /*AndSort_comp */
856
857/* =========================================================================
858 * Function: AndSort
859 * Description:
860 * Sort the list of nodes by increasing doc_count
861 * Using some Neil Sharman code - pretty straight forward.
862 * Note: not-terms are sent to the end of the list
863 * Input:
864 * Output:
865 * ========================================================================= */
866
867static void
868AndSort (bool_tree_node * and_tree, TermList * tl, CompFunc comp)
869{
870 bool_tree_node **list = NULL;
871 bool_tree_node *child = NULL;
872 int count = 0;
873 int i = 0;
874
875 if (BOOL_TAG (and_tree) != N_and)
876 return;
877
878 /* --- Convert linked list into an array for sorting --- */
879 child = BOOL_CHILD (and_tree);
880 count = 0;
881 while (child)
882 {
883 count++;
884 child = BOOL_SIBLING (child);
885 }
886
887 if (!(list = Xmalloc (sizeof (bool_tree_node *) * count)))
888 return;
889
890 child = BOOL_CHILD (and_tree);
891 for (i = 0; i < count; i++)
892 {
893 list[i] = child;
894 child = BOOL_SIBLING (child);
895 }
896 /* ----------------------------------------------------- */
897
898 term_list = tl;
899 qsort (list, count, sizeof (bool_tree_node *), comp);
900
901 /* --- Convert array back into linked list --- */
902 for (i = 0; i < count - 1; i++)
903 BOOL_SIBLING (list[i]) = list[i + 1];
904 BOOL_SIBLING (list[count - 1]) = NULL;
905 BOOL_CHILD (and_tree) = list[0];
906 Xfree (list);
907 /* ----------------------------------------------------- */
908
909} /*AndSort */
910
911/* =========================================================================
912 * Function: AndSortDNF
913 * Description:
914 * Input:
915 * Output:
916 * ========================================================================= */
917
918
919static void
920AndSortDNF (bool_tree_node * DNF_tree, TermList * tl)
921{
922
923 if (BOOL_TAG (DNF_tree) == N_or)
924 {
925 bool_tree_node *child = BOOL_CHILD (DNF_tree);
926
927 while (child)
928 {
929 if (BOOL_TAG (child) == N_and)
930 {
931 AndSort (child, tl, AndSort_comp);
932 }
933 child = BOOL_SIBLING (child);
934 }
935 }
936 else if (BOOL_TAG (DNF_tree) == N_and)
937 {
938 AndSort (DNF_tree, tl, AndSort_comp);
939 }
940}
941
942/* =========================================================================
943 * Function: MarkOrTerms
944 * Description:
945 * Mark or-nodes whose children are all terms.
946 * Also store the sum of the ft's of these nodes.
947 * Input:
948 * Output:
949 * ========================================================================= */
950
951static void
952MarkOrTerms (bool_tree_node * tree, TermList * tl)
953{
954 if (BOOL_TAG (tree) == N_or)
955 {
956 int found_non_term = 0;
957 bool_tree_node *child = BOOL_CHILD (tree);
958
959 /* --- check to see if got all terms --- */
960 while (child)
961 {
962 if (BOOL_TAG (child) != N_term)
963 {
964 found_non_term = 1;
965 }
966 if (BOOL_HAS_CHILD (tree))
967 {
968 MarkOrTerms (child, tl);
969 }
970 child = BOOL_SIBLING (child);
971 } /*while */
972
973 if (!found_non_term)
974 {
975 /* Must have all terms for the OR node */
976 /* So change its tag and calculate its sum */
977 BOOL_TAG (tree) = N_or_terms;
978 BOOL_SUM_FT (tree) = calc_sum_ft (tree, tl);
979 } /*if */
980 /* ------------------------------------- */
981
982 } /*if OR node */
983
984 else if (BOOL_HAS_CHILD (tree))
985 {
986 bool_tree_node *child = BOOL_CHILD (tree);
987 while (child)
988 {
989 MarkOrTerms (child, tl);
990 child = BOOL_SIBLING (child);
991 }
992 }
993
994} /*MarkOrTerms */
995
996/* =========================================================================
997 * Function: calc_sum_ft
998 * Description:
999 * Sums up the ft's of all the terms
1000 * Input:
1001 * Assumes that the tree only contains terms in it
1002 * Output:
1003 * the ft sum
1004 * ========================================================================= */
1005static int
1006calc_sum_ft (bool_tree_node * term_tree, TermList * tl)
1007{
1008 int sum_ft = 0;
1009 bool_tree_node *child = BOOL_CHILD (term_tree);
1010 while (child)
1011 {
1012 sum_ft += GetNthWE (tl, BOOL_TERM (child))->doc_count;
1013 child = BOOL_SIBLING (child);
1014 }
1015 return sum_ft;
1016}
1017
1018/* =========================================================================
1019 * Function: AndOrSort
1020 * Description:
1021 * This is like an AndSort of just terms.
1022 * The difference is that we are now also
1023 * considering sorting with <or of terms> by using the sum of f_t's.
1024 * Input:
1025 * Output:
1026 * ========================================================================= */
1027static void
1028AndOrSort (bool_tree_node * tree, TermList * tl)
1029{
1030 if (BOOL_HAS_CHILD (tree))
1031 {
1032
1033 if (BOOL_TAG (tree) == N_and)
1034 {
1035 AndSort (tree, tl, AndOrSort_comp);
1036 }
1037
1038 /* Check out any children down the tree */
1039 /* Probably a waste of time */
1040 {
1041 bool_tree_node *child = BOOL_CHILD (tree);
1042 while (child)
1043 {
1044 AndOrSort (child, tl);
1045 child = BOOL_SIBLING (child);
1046 }
1047 } /*do the tree */
1048
1049 } /*if has children */
1050} /*AndOrSort */
1051
1052/* =========================================================================
1053 * Function: AndOrSort_comp
1054 * Description:
1055 * Comparison function used to order "term"s and "or_terms"s.
1056 * It's purpose is to be used by a sort routine.
1057 * Input:
1058 * Output:
1059 * ========================================================================= */
1060static int
1061AndOrSort_comp (const void *A, const void *B)
1062{
1063 const bool_tree_node *const *a = A;
1064 const bool_tree_node *const *b = B;
1065
1066 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_term)
1067 {
1068 /* look up in term list */
1069 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
1070 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
1071
1072 return a_doc_count - b_doc_count;
1073 }
1074
1075 if (BOOL_TAG (*a) == N_term && BOOL_TAG (*b) == N_or_terms)
1076 {
1077 /* look up in term list */
1078 int a_doc_count = GetNthWE (term_list, BOOL_TERM (*a))->doc_count;
1079 int b_doc_count = BOOL_SUM_FT (*b);
1080
1081 return a_doc_count - b_doc_count;
1082 }
1083
1084 if (BOOL_TAG (*a) == N_or_terms && BOOL_TAG (*b) == N_term)
1085 {
1086 /* look up in term list */
1087 int a_doc_count = BOOL_SUM_FT (*a);
1088 int b_doc_count = GetNthWE (term_list, BOOL_TERM (*b))->doc_count;
1089
1090 return a_doc_count - b_doc_count;
1091 }
1092
1093 if (BOOL_TAG (*a) == N_or_terms && BOOL_TAG (*b) == N_or_terms)
1094 {
1095 /* look up in term list */
1096 int a_doc_count = BOOL_SUM_FT (*a);
1097 int b_doc_count = BOOL_SUM_FT (*b);
1098
1099 return a_doc_count - b_doc_count;
1100 }
1101
1102 if ((BOOL_TAG (*a) == N_term || BOOL_TAG (*a) == N_or_terms) &&
1103 (BOOL_TAG (*b) != N_term && BOOL_TAG (*b) != N_or_terms))
1104 return -1;
1105
1106 if ((BOOL_TAG (*a) != N_term && BOOL_TAG (*a) != N_or_terms) &&
1107 (BOOL_TAG (*b) == N_term || BOOL_TAG (*b) == N_or_terms))
1108 return 1;
1109
1110 return 0;
1111} /*AndOrSort_comp */
Note: See TracBrowser for help on using the repository browser.