source: trunk/gsdl3/src/packages/mg/lib/rx.c@ 13656

Last change on this file since 13656 was 13656, checked in by kjdon, 17 years ago

Changed a variable named errcode to rx_errcode, so this compiles with Visual

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 174.9 KB
Line 
1/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
2
3This file is part of the librx library.
4
5Librx is free software; you can redistribute it and/or modify it under
6the terms of the GNU Library General Public License as published by
7the Free Software Foundation; either version 2, or (at your option)
8any later version.
9
10Librx is distributed in the hope that it will be useful, but WITHOUT
11ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
12FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
13for more details.
14
15You should have received a copy of the GNU Library General Public
16License along with this software; see the file COPYING.LIB. If not,
17write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
1802139, USA. */
19
20/* NOTE!!! AIX is so losing it requires this to be the first thing in the
21 * file.
22 * Do not put ANYTHING before it!
23 */
24#if !defined (__GNUC__) && defined (_AIX)
25 #pragma alloca
26#endif
27
28/* To make linux happy? */
29#ifndef _GNU_SOURCE
30#define _GNU_SOURCE
31#endif
32
33#if HAVE_CONFIG_H
34# ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
35# include <win32cfg.h>
36# include <malloc.h>
37# else
38# include <sysfuncs.h>
39# endif
40#endif
41
42
43
44char rx_version_string[] = "GNU Rx version 0.07.1";
45
46 /* ``Too hard!''
47 * -- anon.
48 */
49
50
51
52#include <stdio.h>
53#include <ctype.h>
54#ifndef isgraph
55#define isgraph(c) (isprint (c) && !isspace (c))
56#endif
57#ifndef isblank
58#define isblank(c) ((c) == ' ' || (c) == '\t')
59#endif
60
61#include <sys/types.h>
62
63#undef MAX
64#undef MIN
65#define MAX(a, b) ((a) > (b) ? (a) : (b))
66#define MIN(a, b) ((a) < (b) ? (a) : (b))
67
68typedef char boolean;
69#define false 0
70#define true 1
71
72#ifndef __GCC__
73#undef __inline__
74#define __inline__
75#endif
76
77/* Emacs already defines alloca, sometimes. */
78#ifndef alloca
79
80/* Make alloca work the best possible way. */
81#ifdef __GNUC__
82#define alloca __builtin_alloca
83#else /* not __GNUC__ */
84#if HAVE_ALLOCA_H
85#include <alloca.h>
86#else /* not __GNUC__ or HAVE_ALLOCA_H */
87#ifndef _AIX /* Already did AIX, up at the top. */
88char *alloca ();
89#endif /* not _AIX */
90#endif /* not HAVE_ALLOCA_H */
91#endif /* not __GNUC__ */
92
93#endif /* not alloca */
94
95/* Memory management and stuff for emacs. */
96
97#define CHARBITS 8
98#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
99
100
101/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
102 * use `alloca' instead of `malloc' for the backtracking stack.
103 *
104 * Emacs will die miserably if we don't do this.
105 */
106
107#ifdef REGEX_MALLOC
108#define REGEX_ALLOCATE malloc
109#else /* not REGEX_MALLOC */
110#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
111#define REGEX_ALLOCATE _alloca
112#else
113#define REGEX_ALLOCATE alloca
114#endif /* __WIN32__ */
115#endif /* not REGEX_MALLOC */
116
117
118#ifdef RX_WANT_RX_DEFS
119#define RX_DECL extern
120#define RX_DEF_QUAL
121#else
122#define RX_WANT_RX_DEFS
123#define RX_DECL static
124#define RX_DEF_QUAL static
125#endif
126#include "rx.h"
127#undef RX_DECL
128#define RX_DECL RX_DEF_QUAL
129
130
131#ifndef emacs
132
133#ifdef SYNTAX_TABLE
134extern char *re_syntax_table;
135#else /* not SYNTAX_TABLE */
136
137#ifdef __STDC__
138static void
139init_syntax_once (void)
140#else
141static void
142init_syntax_once ()
143#endif
144{
145 register int c;
146 static int done = 0;
147
148 if (done)
149 return;
150
151 bzero (re_syntax_table, sizeof re_syntax_table);
152
153 for (c = 'a'; c <= 'z'; c++)
154 re_syntax_table[c] = Sword;
155
156 for (c = 'A'; c <= 'Z'; c++)
157 re_syntax_table[c] = Sword;
158
159 for (c = '0'; c <= '9'; c++)
160 re_syntax_table[c] = Sword;
161
162 re_syntax_table['_'] = Sword;
163
164 done = 1;
165}
166#endif /* not SYNTAX_TABLE */
167#endif /* not emacs */
168
169
170/* Compile with `-DRX_DEBUG' and use the following flags.
171 *
172 * Debugging flags:
173 * rx_debug - print information as a regexp is compiled
174 * rx_debug_trace - print information as a regexp is executed
175 */
176
177#ifdef RX_DEBUG
178
179int rx_debug_compile = 0;
180int rx_debug_trace = 0;
181static struct re_pattern_buffer * dbug_rxb = 0;
182
183#ifdef __STDC__
184typedef void (*side_effect_printer) (struct rx *, void *, FILE *);
185#else
186typedef void (*side_effect_printer) ();
187#endif
188
189#ifdef __STDC__
190static void print_cset (struct rx *rx, rx_Bitset cset, FILE * fp);
191#else
192static void print_cset ();
193#endif
194
195#ifdef __STDC__
196static void
197print_rexp (struct rx *rx,
198 struct rexp_node *node, int depth,
199 side_effect_printer seprint, FILE * fp)
200#else
201static void
202print_rexp (rx, node, depth, seprint, fp)
203 struct rx *rx;
204 struct rexp_node *node;
205 int depth;
206 side_effect_printer seprint;
207 FILE * fp;
208#endif
209{
210 if (!node)
211 return;
212 else
213 {
214 switch (node->type)
215 {
216 case r_cset:
217 {
218 fprintf (fp, "%*s", depth, "");
219 print_cset (rx, node->params.cset, fp);
220 fputc ('\n', fp);
221 break;
222 }
223
224 case r_opt:
225 case r_star:
226 fprintf (fp, "%*s%s\n", depth, "",
227 node->type == r_opt ? "opt" : "star");
228 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
229 break;
230
231 case r_2phase_star:
232 fprintf (fp, "%*s2phase star\n", depth, "");
233 print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
234 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
235 break;
236
237
238 case r_alternate:
239 case r_concat:
240 fprintf (fp, "%*s%s\n", depth, "",
241 node->type == r_alternate ? "alt" : "concat");
242 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
243 print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
244 break;
245 case r_side_effect:
246 fprintf (fp, "%*sSide effect: ", depth, "");
247 seprint (rx, node->params.side_effect, fp);
248 fputc ('\n', fp);
249 }
250 }
251}
252
253#ifdef __STDC__
254static void
255print_nfa (struct rx * rx,
256 struct rx_nfa_state * n,
257 side_effect_printer seprint, FILE * fp)
258#else
259static void
260print_nfa (rx, n, seprint, fp)
261 struct rx * rx;
262 struct rx_nfa_state * n;
263 side_effect_printer seprint;
264 FILE * fp;
265#endif
266{
267 while (n)
268 {
269 struct rx_nfa_edge *e = n->edges;
270 struct rx_possible_future *ec = n->futures;
271 fprintf (fp, "node %d %s\n", n->id,
272 n->is_final ? "final" : (n->is_start ? "start" : ""));
273 while (e)
274 {
275 fprintf (fp, " edge to %d, ", e->dest->id);
276 switch (e->type)
277 {
278 case ne_epsilon:
279 fprintf (fp, "epsilon\n");
280 break;
281 case ne_side_effect:
282 fprintf (fp, "side effect ");
283 seprint (rx, e->params.side_effect, fp);
284 fputc ('\n', fp);
285 break;
286 case ne_cset:
287 fprintf (fp, "cset ");
288 print_cset (rx, e->params.cset, fp);
289 fputc ('\n', fp);
290 break;
291 }
292 e = e->next;
293 }
294
295 while (ec)
296 {
297 int x;
298 struct rx_nfa_state_set * s;
299 struct rx_se_list * l;
300 fprintf (fp, " eclosure to {");
301 for (s = ec->destset; s; s = s->cdr)
302 fprintf (fp, "%d ", s->car->id);
303 fprintf (fp, "} (");
304 for (l = ec->effects; l; l = l->cdr)
305 {
306 seprint (rx, l->car, fp);
307 fputc (' ', fp);
308 }
309 fprintf (fp, ")\n");
310 ec = ec->next;
311 }
312 n = n->next;
313 }
314}
315
316static char * efnames [] =
317{
318 "bogon",
319 "re_se_try",
320 "re_se_pushback",
321 "re_se_push0",
322 "re_se_pushpos",
323 "re_se_chkpos",
324 "re_se_poppos",
325 "re_se_at_dot",
326 "re_se_syntax",
327 "re_se_not_syntax",
328 "re_se_begbuf",
329 "re_se_hat",
330 "re_se_wordbeg",
331 "re_se_wordbound",
332 "re_se_notwordbound",
333 "re_se_wordend",
334 "re_se_endbuf",
335 "re_se_dollar",
336 "re_se_fail",
337};
338
339static char * efnames2[] =
340{
341 "re_se_win",
342 "re_se_lparen",
343 "re_se_rparen",
344 "re_se_backref",
345 "re_se_iter",
346 "re_se_end_iter",
347 "re_se_tv"
348};
349
350static char * inx_names[] =
351{
352 "rx_backtrack_point",
353 "rx_do_side_effects",
354 "rx_cache_miss",
355 "rx_next_char",
356 "rx_backtrack",
357 "rx_error_inx",
358 "rx_num_instructions"
359};
360
361
362#ifdef __STDC__
363static void
364re_seprint (struct rx * rx, void * effect, FILE * fp)
365#else
366static void
367re_seprint (rx, effect, fp)
368 struct rx * rx;
369 void * effect;
370 FILE * fp;
371#endif
372{
373 if ((int)effect < 0)
374 fputs (efnames[-(int)effect], fp);
375 else if (dbug_rxb)
376 {
377 struct re_se_params * p = &dbug_rxb->se_params[(int)effect];
378 fprintf (fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2);
379 }
380 else
381 fprintf (fp, "[complex op # %d]", (int)effect);
382}
383
384
385/* These are so the regex.c regression tests will compile. */
386void
387print_compiled_pattern (rxb)
388 struct re_pattern_buffer * rxb;
389{
390}
391
392void
393print_fastmap (fm)
394 char * fm;
395{
396}
397
398#endif /* RX_DEBUG */
399
400
401
402
403/* This page: Bitsets. Completely unintersting. */
404
405#ifdef __STDC__
406RX_DECL int
407rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b)
408#else
409RX_DECL int
410rx_bitset_is_equal (size, a, b)
411 int size;
412 rx_Bitset a;
413 rx_Bitset b;
414#endif
415{
416 int x;
417 RX_subset s = b[0];
418 b[0] = ~a[0];
419
420 for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x)
421 ;
422
423 b[0] = s;
424 return !x && s == a[0];
425}
426
427#ifdef __STDC__
428RX_DECL int
429rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b)
430#else
431RX_DECL int
432rx_bitset_is_subset (size, a, b)
433 int size;
434 rx_Bitset a;
435 rx_Bitset b;
436#endif
437{
438 int x = rx_bitset_numb_subsets(size) - 1;
439 while (x-- && (a[x] & b[x]) == a[x]);
440 return x == -1;
441}
442
443
444#ifdef __STDC__
445RX_DECL int
446rx_bitset_empty (int size, rx_Bitset set)
447#else
448RX_DECL int
449rx_bitset_empty (size, set)
450 int size;
451 rx_Bitset set;
452#endif
453{
454 int x;
455 RX_subset s = set[0];
456 set[0] = 1;
457 for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
458 ;
459 set[0] = s;
460 return !s;
461}
462
463#ifdef __STDC__
464RX_DECL void
465rx_bitset_null (int size, rx_Bitset b)
466#else
467RX_DECL void
468rx_bitset_null (size, b)
469 int size;
470 rx_Bitset b;
471#endif
472{
473 bzero (b, rx_sizeof_bitset(size));
474}
475
476
477#ifdef __STDC__
478RX_DECL void
479rx_bitset_universe (int size, rx_Bitset b)
480#else
481RX_DECL void
482rx_bitset_universe (size, b)
483 int size;
484 rx_Bitset b;
485#endif
486{
487 int x = rx_bitset_numb_subsets (size);
488 while (x--)
489 *b++ = ~(RX_subset)0;
490}
491
492
493#ifdef __STDC__
494RX_DECL void
495rx_bitset_complement (int size, rx_Bitset b)
496#else
497RX_DECL void
498rx_bitset_complement (size, b)
499 int size;
500 rx_Bitset b;
501#endif
502{
503 int x = rx_bitset_numb_subsets (size);
504 while (x--)
505 {
506 *b = ~*b;
507 ++b;
508 }
509}
510
511
512#ifdef __STDC__
513RX_DECL void
514rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b)
515#else
516RX_DECL void
517rx_bitset_assign (size, a, b)
518 int size;
519 rx_Bitset a;
520 rx_Bitset b;
521#endif
522{
523 int x;
524 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
525 a[x] = b[x];
526}
527
528
529#ifdef __STDC__
530RX_DECL void
531rx_bitset_union (int size, rx_Bitset a, rx_Bitset b)
532#else
533RX_DECL void
534rx_bitset_union (size, a, b)
535 int size;
536 rx_Bitset a;
537 rx_Bitset b;
538#endif
539{
540 int x;
541 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
542 a[x] |= b[x];
543}
544
545
546#ifdef __STDC__
547RX_DECL void
548rx_bitset_intersection (int size,
549 rx_Bitset a, rx_Bitset b)
550#else
551RX_DECL void
552rx_bitset_intersection (size, a, b)
553 int size;
554 rx_Bitset a;
555 rx_Bitset b;
556#endif
557{
558 int x;
559 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
560 a[x] &= b[x];
561}
562
563
564#ifdef __STDC__
565RX_DECL void
566rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b)
567#else
568RX_DECL void
569rx_bitset_difference (size, a, b)
570 int size;
571 rx_Bitset a;
572 rx_Bitset b;
573#endif
574{
575 int x;
576 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
577 a[x] &= ~ b[x];
578}
579
580
581#ifdef __STDC__
582RX_DECL void
583rx_bitset_revdifference (int size,
584 rx_Bitset a, rx_Bitset b)
585#else
586RX_DECL void
587rx_bitset_revdifference (size, a, b)
588 int size;
589 rx_Bitset a;
590 rx_Bitset b;
591#endif
592{
593 int x;
594 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
595 a[x] = ~a[x] & b[x];
596}
597
598#ifdef __STDC__
599RX_DECL void
600rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b)
601#else
602RX_DECL void
603rx_bitset_xor (size, a, b)
604 int size;
605 rx_Bitset a;
606 rx_Bitset b;
607#endif
608{
609 int x;
610 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
611 a[x] ^= b[x];
612}
613
614
615#ifdef __STDC__
616RX_DECL unsigned long
617rx_bitset_hash (int size, rx_Bitset b)
618#else
619RX_DECL unsigned long
620rx_bitset_hash (size, b)
621 int size;
622 rx_Bitset b;
623#endif
624{
625 int x;
626 unsigned long hash = (unsigned long)rx_bitset_hash;
627
628 for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
629 hash ^= rx_bitset_subset_val(b, x);
630
631 return hash;
632}
633
634
635RX_DECL RX_subset rx_subset_singletons [RX_subset_bits] =
636{
637 0x1,
638 0x2,
639 0x4,
640 0x8,
641 0x10,
642 0x20,
643 0x40,
644 0x80,
645 0x100,
646 0x200,
647 0x400,
648 0x800,
649 0x1000,
650 0x2000,
651 0x4000,
652 0x8000,
653 0x10000,
654 0x20000,
655 0x40000,
656 0x80000,
657 0x100000,
658 0x200000,
659 0x400000,
660 0x800000,
661 0x1000000,
662 0x2000000,
663 0x4000000,
664 0x8000000,
665 0x10000000,
666 0x20000000,
667 0x40000000,
668 0x80000000
669};
670
671#ifdef RX_DEBUG
672
673#ifdef __STDC__
674static void
675print_cset (struct rx *rx, rx_Bitset cset, FILE * fp)
676#else
677static void
678print_cset (rx, cset, fp)
679 struct rx *rx;
680 rx_Bitset cset;
681 FILE * fp;
682#endif
683{
684 int x;
685 fputc ('[', fp);
686 for (x = 0; x < rx->local_cset_size; ++x)
687 if (RX_bitset_member (cset, x))
688 {
689 if (isprint(x))
690 fputc (x, fp);
691 else
692 fprintf (fp, "\\0%o ", x);
693 }
694 fputc (']', fp);
695}
696
697#endif /* RX_DEBUG */
698
699
700
701
702static unsigned long rx_hash_masks[4] =
703{
704 0x12488421,
705 0x96699669,
706 0xbe7dd7eb,
707 0xffffffff
708};
709
710
711/* Hash tables */
712#ifdef __STDC__
713RX_DECL struct rx_hash_item *
714rx_hash_find (struct rx_hash * table,
715 unsigned long hash,
716 void * value,
717 struct rx_hash_rules * rules)
718#else
719RX_DECL struct rx_hash_item *
720rx_hash_find (table, hash, value, rules)
721 struct rx_hash * table;
722 unsigned long hash;
723 void * value;
724 struct rx_hash_rules * rules;
725#endif
726{
727 rx_hash_eq eq = rules->eq;
728 int maskc = 0;
729 long mask = rx_hash_masks [0];
730 int bucket = (hash & mask) % 13;
731
732 while (table->children [bucket])
733 {
734 table = table->children [bucket];
735 ++maskc;
736 mask = rx_hash_masks[maskc];
737 bucket = (hash & mask) % 13;
738 }
739
740 {
741 struct rx_hash_item * it = table->buckets[bucket];
742 while (it)
743 if (eq (it->data, value))
744 return it;
745 else
746 it = it->next_same_hash;
747 }
748
749 return 0;
750}
751
752
753#ifdef __STDC__
754RX_DECL struct rx_hash_item *
755rx_hash_store (struct rx_hash * table,
756 unsigned long hash,
757 void * value,
758 struct rx_hash_rules * rules)
759#else
760RX_DECL struct rx_hash_item *
761rx_hash_store (table, hash, value, rules)
762 struct rx_hash * table;
763 unsigned long hash;
764 void * value;
765 struct rx_hash_rules * rules;
766#endif
767{
768 rx_hash_eq eq = rules->eq;
769 int maskc = 0;
770 long mask = rx_hash_masks[0];
771 int bucket = (hash & mask) % 13;
772 int depth = 0;
773
774 while (table->children [bucket])
775 {
776 table = table->children [bucket];
777 ++maskc;
778 mask = rx_hash_masks[maskc];
779 bucket = (hash & mask) % 13;
780 ++depth;
781 }
782
783 {
784 struct rx_hash_item * it = table->buckets[bucket];
785 while (it)
786 if (eq (it->data, value))
787 return it;
788 else
789 it = it->next_same_hash;
790 }
791
792 {
793 if ( (depth < 3)
794 && (table->bucket_size [bucket] >= 4))
795 {
796 struct rx_hash * newtab = ((struct rx_hash *)
797 rules->hash_alloc (rules));
798 if (!newtab)
799 goto add_to_bucket;
800 bzero (newtab, sizeof (*newtab));
801 newtab->parent = table;
802 {
803 struct rx_hash_item * them = table->buckets[bucket];
804 unsigned long newmask = rx_hash_masks[maskc + 1];
805 while (them)
806 {
807 struct rx_hash_item * save = them->next_same_hash;
808 int new_buck = (them->hash & newmask) % 13;
809 them->next_same_hash = newtab->buckets[new_buck];
810 newtab->buckets[new_buck] = them;
811 them->table = newtab;
812 them = save;
813 ++newtab->bucket_size[new_buck];
814 ++newtab->refs;
815 }
816 table->refs = (table->refs - table->bucket_size[bucket] + 1);
817 table->bucket_size[bucket] = 0;
818 table->buckets[bucket] = 0;
819 table->children[bucket] = newtab;
820 table = newtab;
821 bucket = (hash & newmask) % 13;
822 }
823 }
824 }
825 add_to_bucket:
826 {
827 struct rx_hash_item * it = ((struct rx_hash_item *)
828 rules->hash_item_alloc (rules, value));
829 if (!it)
830 return 0;
831 it->hash = hash;
832 it->table = table;
833 /* DATA and BINDING are to be set in hash_item_alloc */
834 it->next_same_hash = table->buckets [bucket];
835 table->buckets[bucket] = it;
836 ++table->bucket_size [bucket];
837 ++table->refs;
838 return it;
839 }
840}
841
842
843#ifdef __STDC__
844RX_DECL void
845rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules)
846#else
847RX_DECL void
848rx_hash_free (it, rules)
849 struct rx_hash_item * it;
850 struct rx_hash_rules * rules;
851#endif
852{
853 if (it)
854 {
855 struct rx_hash * table = it->table;
856 unsigned long hash = it->hash;
857 int depth = (table->parent
858 ? (table->parent->parent
859 ? (table->parent->parent->parent
860 ? 3
861 : 2)
862 : 1)
863 : 0);
864 int bucket = (hash & rx_hash_masks [depth]) % 13;
865 struct rx_hash_item ** pos = &table->buckets [bucket];
866
867 while (*pos != it)
868 pos = &(*pos)->next_same_hash;
869 *pos = it->next_same_hash;
870 rules->free_hash_item (it, rules);
871 --table->bucket_size[bucket];
872 --table->refs;
873 while (!table->refs && depth)
874 {
875 struct rx_hash * save = table;
876 table = table->parent;
877 --depth;
878 bucket = (hash & rx_hash_masks [depth]) % 13;
879 --table->refs;
880 table->children[bucket] = 0;
881 rules->free_hash (save, rules);
882 }
883 }
884}
885
886#ifdef __STDC__
887RX_DECL void
888rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
889 struct rx_hash_rules * rules)
890#else
891RX_DECL void
892rx_free_hash_table (tab, freefn, rules)
893 struct rx_hash * tab;
894 rx_hash_freefn freefn;
895 struct rx_hash_rules * rules;
896#endif
897{
898 int x;
899
900 for (x = 0; x < 13; ++x)
901 if (tab->children[x])
902 {
903 rx_free_hash_table (tab->children[x], freefn, rules);
904 rules->free_hash (tab->children[x], rules);
905 }
906 else
907 {
908 struct rx_hash_item * them = tab->buckets[x];
909 while (them)
910 {
911 struct rx_hash_item * that = them;
912 them = that->next_same_hash;
913 freefn (that);
914 rules->free_hash_item (that, rules);
915 }
916 }
917}
918
919
920
921
922/* Utilities for manipulating bitset represntations of characters sets. */
923
924#ifdef __STDC__
925RX_DECL rx_Bitset
926rx_cset (struct rx *rx)
927#else
928RX_DECL rx_Bitset
929rx_cset (rx)
930 struct rx *rx;
931#endif
932{
933 rx_Bitset b = (rx_Bitset) malloc (rx_sizeof_bitset (rx->local_cset_size));
934 if (b)
935 rx_bitset_null (rx->local_cset_size, b);
936 return b;
937}
938
939
940#ifdef __STDC__
941RX_DECL rx_Bitset
942rx_copy_cset (struct rx *rx, rx_Bitset a)
943#else
944RX_DECL rx_Bitset
945rx_copy_cset (rx, a)
946 struct rx *rx;
947 rx_Bitset a;
948#endif
949{
950 rx_Bitset cs = rx_cset (rx);
951
952 if (cs)
953 rx_bitset_union (rx->local_cset_size, cs, a);
954
955 return cs;
956}
957
958
959#ifdef __STDC__
960RX_DECL void
961rx_free_cset (struct rx * rx, rx_Bitset c)
962#else
963RX_DECL void
964rx_free_cset (rx, c)
965 struct rx * rx;
966 rx_Bitset c;
967#endif
968{
969 if (c)
970 free ((char *)c);
971}
972
973
974
975/* Hash table memory allocation policy for the regexp compiler */
976
977#ifdef __STDC__
978static struct rx_hash *
979compiler_hash_alloc (struct rx_hash_rules * rules)
980#else
981static struct rx_hash *
982compiler_hash_alloc (rules)
983 struct rx_hash_rules * rules;
984#endif
985{
986 return (struct rx_hash *)malloc (sizeof (struct rx_hash));
987}
988
989
990#ifdef __STDC__
991static struct rx_hash_item *
992compiler_hash_item_alloc (struct rx_hash_rules * rules, void * value)
993#else
994static struct rx_hash_item *
995compiler_hash_item_alloc (rules, value)
996 struct rx_hash_rules * rules;
997 void * value;
998#endif
999{
1000 struct rx_hash_item * it;
1001 it = (struct rx_hash_item *)malloc (sizeof (*it));
1002 if (it)
1003 {
1004 it->data = value;
1005 it->binding = 0;
1006 }
1007 return it;
1008}
1009
1010
1011#ifdef __STDC__
1012static void
1013compiler_free_hash (struct rx_hash * tab,
1014 struct rx_hash_rules * rules)
1015#else
1016static void
1017compiler_free_hash (tab, rules)
1018 struct rx_hash * tab;
1019 struct rx_hash_rules * rules;
1020#endif
1021{
1022 free ((char *)tab);
1023}
1024
1025
1026#ifdef __STDC__
1027static void
1028compiler_free_hash_item (struct rx_hash_item * item,
1029 struct rx_hash_rules * rules)
1030#else
1031static void
1032compiler_free_hash_item (item, rules)
1033 struct rx_hash_item * item;
1034 struct rx_hash_rules * rules;
1035#endif
1036{
1037 free ((char *)item);
1038}
1039
1040
1041
1042/* This page: REXP_NODE (expression tree) structures. */
1043
1044#ifdef __STDC__
1045RX_DECL struct rexp_node *
1046rexp_node (struct rx *rx,
1047 enum rexp_node_type type)
1048#else
1049RX_DECL struct rexp_node *
1050rexp_node (rx, type)
1051 struct rx *rx;
1052 enum rexp_node_type type;
1053#endif
1054{
1055 struct rexp_node *n;
1056
1057 n = (struct rexp_node *)malloc (sizeof (*n));
1058 bzero (n, sizeof (*n));
1059 if (n)
1060 n->type = type;
1061 return n;
1062}
1063
1064
1065/* free_rexp_node assumes that the bitset passed to rx_mk_r_cset
1066 * can be freed using rx_free_cset.
1067 */
1068#ifdef __STDC__
1069RX_DECL struct rexp_node *
1070rx_mk_r_cset (struct rx * rx,
1071 rx_Bitset b)
1072#else
1073RX_DECL struct rexp_node *
1074rx_mk_r_cset (rx, b)
1075 struct rx * rx;
1076 rx_Bitset b;
1077#endif
1078{
1079 struct rexp_node * n = rexp_node (rx, r_cset);
1080 if (n)
1081 n->params.cset = b;
1082 return n;
1083}
1084
1085
1086#ifdef __STDC__
1087RX_DECL struct rexp_node *
1088rx_mk_r_concat (struct rx * rx,
1089 struct rexp_node * a,
1090 struct rexp_node * b)
1091#else
1092RX_DECL struct rexp_node *
1093rx_mk_r_concat (rx, a, b)
1094 struct rx * rx;
1095 struct rexp_node * a;
1096 struct rexp_node * b;
1097#endif
1098{
1099 struct rexp_node * n = rexp_node (rx, r_concat);
1100 if (n)
1101 {
1102 n->params.pair.left = a;
1103 n->params.pair.right = b;
1104 }
1105 return n;
1106}
1107
1108
1109#ifdef __STDC__
1110RX_DECL struct rexp_node *
1111rx_mk_r_alternate (struct rx * rx,
1112 struct rexp_node * a,
1113 struct rexp_node * b)
1114#else
1115RX_DECL struct rexp_node *
1116rx_mk_r_alternate (rx, a, b)
1117 struct rx * rx;
1118 struct rexp_node * a;
1119 struct rexp_node * b;
1120#endif
1121{
1122 struct rexp_node * n = rexp_node (rx, r_alternate);
1123 if (n)
1124 {
1125 n->params.pair.left = a;
1126 n->params.pair.right = b;
1127 }
1128 return n;
1129}
1130
1131
1132#ifdef __STDC__
1133RX_DECL struct rexp_node *
1134rx_mk_r_opt (struct rx * rx,
1135 struct rexp_node * a)
1136#else
1137RX_DECL struct rexp_node *
1138rx_mk_r_opt (rx, a)
1139 struct rx * rx;
1140 struct rexp_node * a;
1141#endif
1142{
1143 struct rexp_node * n = rexp_node (rx, r_opt);
1144 if (n)
1145 {
1146 n->params.pair.left = a;
1147 n->params.pair.right = 0;
1148 }
1149 return n;
1150}
1151
1152
1153#ifdef __STDC__
1154RX_DECL struct rexp_node *
1155rx_mk_r_star (struct rx * rx,
1156 struct rexp_node * a)
1157#else
1158RX_DECL struct rexp_node *
1159rx_mk_r_star (rx, a)
1160 struct rx * rx;
1161 struct rexp_node * a;
1162#endif
1163{
1164 struct rexp_node * n = rexp_node (rx, r_star);
1165 if (n)
1166 {
1167 n->params.pair.left = a;
1168 n->params.pair.right = 0;
1169 }
1170 return n;
1171}
1172
1173
1174#ifdef __STDC__
1175RX_DECL struct rexp_node *
1176rx_mk_r_2phase_star (struct rx * rx,
1177 struct rexp_node * a,
1178 struct rexp_node * b)
1179#else
1180RX_DECL struct rexp_node *
1181rx_mk_r_2phase_star (rx, a, b)
1182 struct rx * rx;
1183 struct rexp_node * a;
1184 struct rexp_node * b;
1185#endif
1186{
1187 struct rexp_node * n = rexp_node (rx, r_2phase_star);
1188 if (n)
1189 {
1190 n->params.pair.left = a;
1191 n->params.pair.right = b;
1192 }
1193 return n;
1194}
1195
1196
1197#ifdef __STDC__
1198RX_DECL struct rexp_node *
1199rx_mk_r_side_effect (struct rx * rx,
1200 rx_side_effect a)
1201#else
1202RX_DECL struct rexp_node *
1203rx_mk_r_side_effect (rx, a)
1204 struct rx * rx;
1205 rx_side_effect a;
1206#endif
1207{
1208 struct rexp_node * n = rexp_node (rx, r_side_effect);
1209 if (n)
1210 {
1211 n->params.side_effect = a;
1212 n->params.pair.right = 0;
1213 }
1214 return n;
1215}
1216
1217
1218#ifdef __STDC__
1219RX_DECL struct rexp_node *
1220rx_mk_r_data (struct rx * rx,
1221 void * a)
1222#else
1223RX_DECL struct rexp_node *
1224rx_mk_r_data (rx, a)
1225 struct rx * rx;
1226 void * a;
1227#endif
1228{
1229 struct rexp_node * n = rexp_node (rx, r_data);
1230 if (n)
1231 {
1232 n->params.pair.left = a;
1233 n->params.pair.right = 0;
1234 }
1235 return n;
1236}
1237
1238
1239#ifdef __STDC__
1240RX_DECL void
1241rx_free_rexp (struct rx * rx, struct rexp_node * node)
1242#else
1243RX_DECL void
1244rx_free_rexp (rx, node)
1245 struct rx * rx;
1246 struct rexp_node * node;
1247#endif
1248{
1249 if (node)
1250 {
1251 switch (node->type)
1252 {
1253 case r_cset:
1254 if (node->params.cset)
1255 rx_free_cset (rx, node->params.cset);
1256
1257 case r_side_effect:
1258 break;
1259
1260 case r_concat:
1261 case r_alternate:
1262 case r_2phase_star:
1263 case r_opt:
1264 case r_star:
1265 rx_free_rexp (rx, node->params.pair.left);
1266 rx_free_rexp (rx, node->params.pair.right);
1267 break;
1268
1269 case r_data:
1270 /* This shouldn't occur. */
1271 break;
1272 }
1273 free ((char *)node);
1274 }
1275}
1276
1277
1278#ifdef __STDC__
1279RX_DECL struct rexp_node *
1280rx_copy_rexp (struct rx *rx,
1281 struct rexp_node *node)
1282#else
1283RX_DECL struct rexp_node *
1284rx_copy_rexp (rx, node)
1285 struct rx *rx;
1286 struct rexp_node *node;
1287#endif
1288{
1289 if (!node)
1290 return 0;
1291 else
1292 {
1293 struct rexp_node *n = rexp_node (rx, node->type);
1294 if (!n)
1295 return 0;
1296 switch (node->type)
1297 {
1298 case r_cset:
1299 n->params.cset = rx_copy_cset (rx, node->params.cset);
1300 if (!n->params.cset)
1301 {
1302 rx_free_rexp (rx, n);
1303 return 0;
1304 }
1305 break;
1306
1307 case r_side_effect:
1308 n->params.side_effect = node->params.side_effect;
1309 break;
1310
1311 case r_concat:
1312 case r_alternate:
1313 case r_opt:
1314 case r_2phase_star:
1315 case r_star:
1316 n->params.pair.left =
1317 rx_copy_rexp (rx, node->params.pair.left);
1318 n->params.pair.right =
1319 rx_copy_rexp (rx, node->params.pair.right);
1320 if ( (node->params.pair.left && !n->params.pair.left)
1321 || (node->params.pair.right && !n->params.pair.right))
1322 {
1323 rx_free_rexp (rx, n);
1324 return 0;
1325 }
1326 break;
1327 case r_data:
1328 /* shouldn't happen */
1329 break;
1330 }
1331 return n;
1332 }
1333}
1334
1335
1336
1337
1338/* This page: functions to build and destroy graphs that describe nfa's */
1339
1340/* Constructs a new nfa node. */
1341#ifdef __STDC__
1342RX_DECL struct rx_nfa_state *
1343rx_nfa_state (struct rx *rx)
1344#else
1345RX_DECL struct rx_nfa_state *
1346rx_nfa_state (rx)
1347 struct rx *rx;
1348#endif
1349{
1350 struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
1351 if (!n)
1352 return 0;
1353 bzero (n, sizeof (*n));
1354 n->next = rx->nfa_states;
1355 rx->nfa_states = n;
1356 return n;
1357}
1358
1359
1360#ifdef __STDC__
1361RX_DECL void
1362rx_free_nfa_state (struct rx_nfa_state * n)
1363#else
1364RX_DECL void
1365rx_free_nfa_state (n)
1366 struct rx_nfa_state * n;
1367#endif
1368{
1369 free ((char *)n);
1370}
1371
1372
1373/* This looks up an nfa node, given a numeric id. Numeric id's are
1374 * assigned after the nfa has been built.
1375 */
1376#ifdef __STDC__
1377RX_DECL struct rx_nfa_state *
1378rx_id_to_nfa_state (struct rx * rx,
1379 int id)
1380#else
1381RX_DECL struct rx_nfa_state *
1382rx_id_to_nfa_state (rx, id)
1383 struct rx * rx;
1384 int id;
1385#endif
1386{
1387 struct rx_nfa_state * n;
1388 for (n = rx->nfa_states; n; n = n->next)
1389 if (n->id == id)
1390 return n;
1391 return 0;
1392}
1393
1394
1395/* This adds an edge between two nodes, but doesn't initialize the
1396 * edge label.
1397 */
1398
1399#ifdef __STDC__
1400RX_DECL struct rx_nfa_edge *
1401rx_nfa_edge (struct rx *rx,
1402 enum rx_nfa_etype type,
1403 struct rx_nfa_state *start,
1404 struct rx_nfa_state *dest)
1405#else
1406RX_DECL struct rx_nfa_edge *
1407rx_nfa_edge (rx, type, start, dest)
1408 struct rx *rx;
1409 enum rx_nfa_etype type;
1410 struct rx_nfa_state *start;
1411 struct rx_nfa_state *dest;
1412#endif
1413{
1414 struct rx_nfa_edge *e;
1415 e = (struct rx_nfa_edge *)malloc (sizeof (*e));
1416 if (!e)
1417 return 0;
1418 e->next = start->edges;
1419 start->edges = e;
1420 e->type = type;
1421 e->dest = dest;
1422 return e;
1423}
1424
1425
1426#ifdef __STDC__
1427RX_DECL void
1428rx_free_nfa_edge (struct rx_nfa_edge * e)
1429#else
1430RX_DECL void
1431rx_free_nfa_edge (e)
1432 struct rx_nfa_edge * e;
1433#endif
1434{
1435 free ((char *)e);
1436}
1437
1438
1439/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
1440 * of an NFA. These are added to an nfa automaticly by eclose_nfa.
1441 */
1442
1443#ifdef __STDC__
1444static struct rx_possible_future *
1445rx_possible_future (struct rx * rx,
1446 struct rx_se_list * effects)
1447#else
1448static struct rx_possible_future *
1449rx_possible_future (rx, effects)
1450 struct rx * rx;
1451 struct rx_se_list * effects;
1452#endif
1453{
1454 struct rx_possible_future *ec;
1455 ec = (struct rx_possible_future *) malloc (sizeof (*ec));
1456 if (!ec)
1457 return 0;
1458 ec->destset = 0;
1459 ec->next = 0;
1460 ec->effects = effects;
1461 return ec;
1462}
1463
1464
1465#ifdef __STDC__
1466static void
1467rx_free_possible_future (struct rx_possible_future * pf)
1468#else
1469static void
1470rx_free_possible_future (pf)
1471 struct rx_possible_future * pf;
1472#endif
1473{
1474 free ((char *)pf);
1475}
1476
1477
1478#ifdef __STDC__
1479RX_DECL void
1480rx_free_nfa (struct rx *rx)
1481#else
1482RX_DECL void
1483rx_free_nfa (rx)
1484 struct rx *rx;
1485#endif
1486{
1487 while (rx->nfa_states)
1488 {
1489 while (rx->nfa_states->edges)
1490 {
1491 switch (rx->nfa_states->edges->type)
1492 {
1493 case ne_cset:
1494 rx_free_cset (rx, rx->nfa_states->edges->params.cset);
1495 break;
1496 default:
1497 break;
1498 }
1499 {
1500 struct rx_nfa_edge * e;
1501 e = rx->nfa_states->edges;
1502 rx->nfa_states->edges = rx->nfa_states->edges->next;
1503 rx_free_nfa_edge (e);
1504 }
1505 } /* while (rx->nfa_states->edges) */
1506 {
1507 /* Iterate over the partial epsilon closures of rx->nfa_states */
1508 struct rx_possible_future * pf = rx->nfa_states->futures;
1509 while (pf)
1510 {
1511 struct rx_possible_future * pft = pf;
1512 pf = pf->next;
1513 rx_free_possible_future (pft);
1514 }
1515 }
1516 {
1517 struct rx_nfa_state *n;
1518 n = rx->nfa_states;
1519 rx->nfa_states = rx->nfa_states->next;
1520 rx_free_nfa_state (n);
1521 }
1522 }
1523}
1524
1525
1526
1527
1528/* This page: translating a pattern expression into an nfa and doing the
1529 * static part of the nfa->super-nfa translation.
1530 */
1531
1532/* This is the thompson regexp->nfa algorithm.
1533 * It is modified to allow for `side-effect epsilons.' Those are
1534 * edges that are taken whenever a similar epsilon edge would be,
1535 * but which imply that some side effect occurs when the edge
1536 * is taken.
1537 *
1538 * Side effects are used to model parts of the pattern langauge
1539 * that are not regular (in the formal sense).
1540 */
1541
1542#ifdef __STDC__
1543RX_DECL int
1544rx_build_nfa (struct rx *rx,
1545 struct rexp_node *rexp,
1546 struct rx_nfa_state **start,
1547 struct rx_nfa_state **end)
1548#else
1549RX_DECL int
1550rx_build_nfa (rx, rexp, start, end)
1551 struct rx *rx;
1552 struct rexp_node *rexp;
1553 struct rx_nfa_state **start;
1554 struct rx_nfa_state **end;
1555#endif
1556{
1557 struct rx_nfa_edge *edge;
1558
1559 /* Start & end nodes may have been allocated by the caller. */
1560 *start = *start ? *start : rx_nfa_state (rx);
1561
1562 if (!*start)
1563 return 0;
1564
1565 if (!rexp)
1566 {
1567 *end = *start;
1568 return 1;
1569 }
1570
1571 *end = *end ? *end : rx_nfa_state (rx);
1572
1573 if (!*end)
1574 {
1575 rx_free_nfa_state (*start);
1576 return 0;
1577 }
1578
1579 switch (rexp->type)
1580 {
1581 case r_data:
1582 return 0;
1583
1584 case r_cset:
1585 edge = rx_nfa_edge (rx, ne_cset, *start, *end);
1586 if (!edge)
1587 return 0;
1588 edge->params.cset = rx_copy_cset (rx, rexp->params.cset);
1589 if (!edge->params.cset)
1590 {
1591 rx_free_nfa_edge (edge);
1592 return 0;
1593 }
1594 return 1;
1595
1596 case r_opt:
1597 return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
1598 && rx_nfa_edge (rx, ne_epsilon, *start, *end));
1599
1600 case r_star:
1601 {
1602 struct rx_nfa_state * star_start = 0;
1603 struct rx_nfa_state * star_end = 0;
1604 return (rx_build_nfa (rx, rexp->params.pair.left,
1605 &star_start, &star_end)
1606 && star_start
1607 && star_end
1608 && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
1609 && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
1610 && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
1611
1612 && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
1613 }
1614
1615 case r_2phase_star:
1616 {
1617 struct rx_nfa_state * star_start = 0;
1618 struct rx_nfa_state * star_end = 0;
1619 struct rx_nfa_state * loop_exp_start = 0;
1620 struct rx_nfa_state * loop_exp_end = 0;
1621
1622 return (rx_build_nfa (rx, rexp->params.pair.left,
1623 &star_start, &star_end)
1624 && rx_build_nfa (rx, rexp->params.pair.right,
1625 &loop_exp_start, &loop_exp_end)
1626 && star_start
1627 && star_end
1628 && loop_exp_end
1629 && loop_exp_start
1630 && rx_nfa_edge (rx, ne_epsilon, star_start, *end)
1631 && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
1632 && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
1633
1634 && rx_nfa_edge (rx, ne_epsilon, star_end, loop_exp_start)
1635 && rx_nfa_edge (rx, ne_epsilon, loop_exp_end, star_start));
1636 }
1637
1638
1639 case r_concat:
1640 {
1641 struct rx_nfa_state *shared = 0;
1642 return
1643 (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
1644 && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
1645 }
1646
1647 case r_alternate:
1648 {
1649 struct rx_nfa_state *ls = 0;
1650 struct rx_nfa_state *le = 0;
1651 struct rx_nfa_state *rs = 0;
1652 struct rx_nfa_state *re = 0;
1653 return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
1654 && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
1655 && rx_nfa_edge (rx, ne_epsilon, *start, ls)
1656 && rx_nfa_edge (rx, ne_epsilon, *start, rs)
1657 && rx_nfa_edge (rx, ne_epsilon, le, *end)
1658 && rx_nfa_edge (rx, ne_epsilon, re, *end));
1659 }
1660
1661 case r_side_effect:
1662 edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
1663 if (!edge)
1664 return 0;
1665 edge->params.side_effect = rexp->params.side_effect;
1666 return 1;
1667 }
1668
1669 /* this should never happen */
1670 return 0;
1671}
1672
1673
1674/* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon
1675 * transitions. Only these nodes can occur in super-states.
1676 * All nodes are given an integer id.
1677 * The id is non-negative if the node has non-epsilon out-transitions, negative
1678 * otherwise (this is because we want the non-negative ids to be used as
1679 * array indexes in a few places).
1680 */
1681
1682#ifdef __STDC__
1683RX_DECL void
1684rx_name_nfa_states (struct rx *rx)
1685#else
1686RX_DECL void
1687rx_name_nfa_states (rx)
1688 struct rx *rx;
1689#endif
1690{
1691 struct rx_nfa_state *n = rx->nfa_states;
1692
1693 rx->nodec = 0;
1694 rx->epsnodec = -1;
1695
1696 while (n)
1697 {
1698 struct rx_nfa_edge *e = n->edges;
1699
1700 if (n->is_start)
1701 n->eclosure_needed = 1;
1702
1703 while (e)
1704 {
1705 switch (e->type)
1706 {
1707 case ne_epsilon:
1708 case ne_side_effect:
1709 break;
1710
1711 case ne_cset:
1712 n->id = rx->nodec++;
1713 {
1714 struct rx_nfa_edge *from_n = n->edges;
1715 while (from_n)
1716 {
1717 from_n->dest->eclosure_needed = 1;
1718 from_n = from_n->next;
1719 }
1720 }
1721 goto cont;
1722 }
1723 e = e->next;
1724 }
1725 n->id = rx->epsnodec--;
1726 cont:
1727 n = n->next;
1728 }
1729 rx->epsnodec = -rx->epsnodec;
1730}
1731
1732
1733
1734/* This page: data structures for the static part of the nfa->supernfa
1735 * translation.
1736 *
1737 * There are side effect lists -- lists of side effects occuring
1738 * along an uninterrupted, acyclic path of side-effect epsilon edges.
1739 * Such paths are collapsed to single edges in the course of computing
1740 * epsilon closures. Such single edges are labled with a list of all
1741 * the side effects entailed in crossing them. Like lists of side
1742 * effects are made == by the constructors below.
1743 *
1744 * There are also nfa state sets. These are used to hold a list of all
1745 * states reachable from a starting state for a given type of transition
1746 * and side effect list. These are also hash-consed.
1747 */
1748
1749/* The next several functions compare, construct, etc. lists of side
1750 * effects. See ECLOSE_NFA (below) for details.
1751 */
1752
1753/* Ordering of rx_se_list
1754 * (-1, 0, 1 return value convention).
1755 */
1756
1757#ifdef __STDC__
1758static int
1759se_list_cmp (void * va, void * vb)
1760#else
1761static int
1762se_list_cmp (va, vb)
1763 void * va;
1764 void * vb;
1765#endif
1766{
1767 struct rx_se_list * a = (struct rx_se_list *)va;
1768 struct rx_se_list * b = (struct rx_se_list *)vb;
1769
1770 return ((va == vb)
1771 ? 0
1772 : (!va
1773 ? -1
1774 : (!vb
1775 ? 1
1776 : ((long)a->car < (long)b->car
1777 ? 1
1778 : ((long)a->car > (long)b->car
1779 ? -1
1780 : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
1781}
1782
1783
1784#ifdef __STDC__
1785static int
1786se_list_equal (void * va, void * vb)
1787#else
1788static int
1789se_list_equal (va, vb)
1790 void * va;
1791 void * vb;
1792#endif
1793{
1794 return !(se_list_cmp (va, vb));
1795}
1796
1797static struct rx_hash_rules se_list_hash_rules =
1798{
1799 se_list_equal,
1800 compiler_hash_alloc,
1801 compiler_free_hash,
1802 compiler_hash_item_alloc,
1803 compiler_free_hash_item
1804};
1805
1806
1807#ifdef __STDC__
1808static struct rx_se_list *
1809side_effect_cons (struct rx * rx,
1810 void * se, struct rx_se_list * list)
1811#else
1812static struct rx_se_list *
1813side_effect_cons (rx, se, list)
1814 struct rx * rx;
1815 void * se;
1816 struct rx_se_list * list;
1817#endif
1818{
1819 struct rx_se_list * l;
1820 l = ((struct rx_se_list *) malloc (sizeof (*l)));
1821 if (!l)
1822 return 0;
1823 l->car = se;
1824 l->cdr = list;
1825 return l;
1826}
1827
1828
1829#ifdef __STDC__
1830static struct rx_se_list *
1831hash_cons_se_prog (struct rx * rx,
1832 struct rx_hash * memo,
1833 void * car, struct rx_se_list * cdr)
1834#else
1835static struct rx_se_list *
1836hash_cons_se_prog (rx, memo, car, cdr)
1837 struct rx * rx;
1838 struct rx_hash * memo;
1839 void * car;
1840 struct rx_se_list * cdr;
1841#endif
1842{
1843 long hash = (long)car ^ (long)cdr;
1844 struct rx_se_list template;
1845
1846 template.car = car;
1847 template.cdr = cdr;
1848 {
1849 struct rx_hash_item * it = rx_hash_store (memo, hash,
1850 (void *)&template,
1851 &se_list_hash_rules);
1852 if (!it)
1853 return 0;
1854 if (it->data == (void *)&template)
1855 {
1856 struct rx_se_list * consed;
1857 consed = (struct rx_se_list *) malloc (sizeof (*consed));
1858 *consed = template;
1859 it->data = (void *)consed;
1860 }
1861 return (struct rx_se_list *)it->data;
1862 }
1863}
1864
1865
1866#ifdef __STDC__
1867static struct rx_se_list *
1868hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
1869#else
1870static struct rx_se_list *
1871hash_se_prog (rx, memo, prog)
1872 struct rx * rx;
1873 struct rx_hash * memo;
1874 struct rx_se_list * prog;
1875#endif
1876{
1877 struct rx_se_list * answer = 0;
1878 while (prog)
1879 {
1880 answer = hash_cons_se_prog (rx, memo, prog->car, answer);
1881 if (!answer)
1882 return 0;
1883 prog = prog->cdr;
1884 }
1885 return answer;
1886}
1887
1888#ifdef __STDC__
1889static int
1890nfa_set_cmp (void * va, void * vb)
1891#else
1892static int
1893nfa_set_cmp (va, vb)
1894 void * va;
1895 void * vb;
1896#endif
1897{
1898 struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
1899 struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
1900
1901 return ((va == vb)
1902 ? 0
1903 : (!va
1904 ? -1
1905 : (!vb
1906 ? 1
1907 : (a->car->id < b->car->id
1908 ? 1
1909 : (a->car->id > b->car->id
1910 ? -1
1911 : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
1912}
1913
1914#ifdef __STDC__
1915static int
1916nfa_set_equal (void * va, void * vb)
1917#else
1918static int
1919nfa_set_equal (va, vb)
1920 void * va;
1921 void * vb;
1922#endif
1923{
1924 return !nfa_set_cmp (va, vb);
1925}
1926
1927static struct rx_hash_rules nfa_set_hash_rules =
1928{
1929 nfa_set_equal,
1930 compiler_hash_alloc,
1931 compiler_free_hash,
1932 compiler_hash_item_alloc,
1933 compiler_free_hash_item
1934};
1935
1936
1937#ifdef __STDC__
1938static struct rx_nfa_state_set *
1939nfa_set_cons (struct rx * rx,
1940 struct rx_hash * memo, struct rx_nfa_state * state,
1941 struct rx_nfa_state_set * set)
1942#else
1943static struct rx_nfa_state_set *
1944nfa_set_cons (rx, memo, state, set)
1945 struct rx * rx;
1946 struct rx_hash * memo;
1947 struct rx_nfa_state * state;
1948 struct rx_nfa_state_set * set;
1949#endif
1950{
1951 struct rx_nfa_state_set template;
1952 struct rx_hash_item * node;
1953 template.car = state;
1954 template.cdr = set;
1955 node = rx_hash_store (memo,
1956 (((long)state) >> 8) ^ (long)set,
1957 &template, &nfa_set_hash_rules);
1958 if (!node)
1959 return 0;
1960 if (node->data == &template)
1961 {
1962 struct rx_nfa_state_set * l;
1963 l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
1964 node->data = (void *) l;
1965 if (!l)
1966 return 0;
1967 *l = template;
1968 }
1969 return (struct rx_nfa_state_set *)node->data;
1970}
1971
1972
1973#ifdef __STDC__
1974static struct rx_nfa_state_set *
1975nfa_set_enjoin (struct rx * rx,
1976 struct rx_hash * memo, struct rx_nfa_state * state,
1977 struct rx_nfa_state_set * set)
1978#else
1979static struct rx_nfa_state_set *
1980nfa_set_enjoin (rx, memo, state, set)
1981 struct rx * rx;
1982 struct rx_hash * memo;
1983 struct rx_nfa_state * state;
1984 struct rx_nfa_state_set * set;
1985#endif
1986{
1987 if (!set || state->id < set->car->id)
1988 return nfa_set_cons (rx, memo, state, set);
1989 if (state->id == set->car->id)
1990 return set;
1991 else
1992 {
1993 struct rx_nfa_state_set * newcdr
1994 = nfa_set_enjoin (rx, memo, state, set->cdr);
1995 if (newcdr != set->cdr)
1996 set = nfa_set_cons (rx, memo, set->car, newcdr);
1997 return set;
1998 }
1999}
2000
2001
2002
2003
2004/* This page: computing epsilon closures. The closures aren't total.
2005 * Each node's closures are partitioned according to the side effects entailed
2006 * along the epsilon edges. Return true on success.
2007 */
2008
2009struct eclose_frame
2010{
2011 struct rx_se_list *prog_backwards;
2012};
2013
2014
2015#ifdef __STDC__
2016static int
2017eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
2018 struct rx_nfa_state *node, struct eclose_frame *frame)
2019#else
2020static int
2021eclose_node (rx, outnode, node, frame)
2022 struct rx *rx;
2023 struct rx_nfa_state *outnode;
2024 struct rx_nfa_state *node;
2025 struct eclose_frame *frame;
2026#endif
2027{
2028 struct rx_nfa_edge *e = node->edges;
2029
2030 /* For each node, we follow all epsilon paths to build the closure.
2031 * The closure omits nodes that have only epsilon edges.
2032 * The closure is split into partial closures -- all the states in
2033 * a partial closure are reached by crossing the same list of
2034 * of side effects (though not necessarily the same path).
2035 */
2036 if (node->mark)
2037 return 1;
2038 node->mark = 1;
2039
2040 if (node->id >= 0 || node->is_final)
2041 {
2042 struct rx_possible_future **ec;
2043 struct rx_se_list * prog_in_order
2044 = ((struct rx_se_list *)hash_se_prog (rx,
2045 &rx->se_list_memo,
2046 frame->prog_backwards));
2047 int cmp;
2048
2049 ec = &outnode->futures;
2050
2051 while (*ec)
2052 {
2053 cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
2054 if (cmp <= 0)
2055 break;
2056 ec = &(*ec)->next;
2057 }
2058 if (!*ec || (cmp < 0))
2059 {
2060 struct rx_possible_future * saved = *ec;
2061 *ec = rx_possible_future (rx, prog_in_order);
2062 (*ec)->next = saved;
2063 if (!*ec)
2064 return 0;
2065 }
2066 if (node->id >= 0)
2067 {
2068 (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
2069 node, (*ec)->destset);
2070 if (!(*ec)->destset)
2071 return 0;
2072 }
2073 }
2074
2075 while (e)
2076 {
2077 switch (e->type)
2078 {
2079 case ne_epsilon:
2080 if (!eclose_node (rx, outnode, e->dest, frame))
2081 return 0;
2082 break;
2083 case ne_side_effect:
2084 {
2085 frame->prog_backwards = side_effect_cons (rx,
2086 e->params.side_effect,
2087 frame->prog_backwards);
2088 if (!frame->prog_backwards)
2089 return 0;
2090 if (!eclose_node (rx, outnode, e->dest, frame))
2091 return 0;
2092 {
2093 struct rx_se_list * dying = frame->prog_backwards;
2094 frame->prog_backwards = frame->prog_backwards->cdr;
2095 free ((char *)dying);
2096 }
2097 break;
2098 }
2099 default:
2100 break;
2101 }
2102 e = e->next;
2103 }
2104 node->mark = 0;
2105 return 1;
2106}
2107
2108
2109#ifdef __STDC__
2110RX_DECL int
2111rx_eclose_nfa (struct rx *rx)
2112#else
2113RX_DECL int
2114rx_eclose_nfa (rx)
2115 struct rx *rx;
2116#endif
2117{
2118 struct rx_nfa_state *n = rx->nfa_states;
2119 struct eclose_frame frame;
2120 static int rx_id = 0;
2121
2122 frame.prog_backwards = 0;
2123 rx->rx_id = rx_id++;
2124 bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
2125 bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
2126 while (n)
2127 {
2128 n->futures = 0;
2129 if (n->eclosure_needed && !eclose_node (rx, n, n, &frame))
2130 return 0;
2131 /* clear_marks (rx); */
2132 n = n->next;
2133 }
2134 return 1;
2135}
2136
2137
2138/* This deletes epsilon edges from an NFA. After running eclose_node,
2139 * we have no more need for these edges. They are removed to simplify
2140 * further operations on the NFA.
2141 */
2142
2143#ifdef __STDC__
2144RX_DECL void
2145rx_delete_epsilon_transitions (struct rx *rx)
2146#else
2147RX_DECL void
2148rx_delete_epsilon_transitions (rx)
2149 struct rx *rx;
2150#endif
2151{
2152 struct rx_nfa_state *n = rx->nfa_states;
2153 struct rx_nfa_edge **e;
2154
2155 while (n)
2156 {
2157 e = &n->edges;
2158 while (*e)
2159 {
2160 struct rx_nfa_edge *t;
2161 switch ((*e)->type)
2162 {
2163 case ne_epsilon:
2164 case ne_side_effect:
2165 t = *e;
2166 *e = t->next;
2167 rx_free_nfa_edge (t);
2168 break;
2169
2170 default:
2171 e = &(*e)->next;
2172 break;
2173 }
2174 }
2175 n = n->next;
2176 }
2177}
2178
2179
2180
2181/* This page: storing the nfa in a contiguous region of memory for
2182 * subsequent conversion to a super-nfa.
2183 */
2184
2185/* This is for qsort on an array of nfa_states. The order
2186 * is based on state ids and goes
2187 * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0)
2188 * This way, positive ids double as array indices.
2189 */
2190
2191#ifdef __STDC__
2192static int
2193nfacmp (void * va, void * vb)
2194#else
2195static int
2196nfacmp (va, vb)
2197 void * va;
2198 void * vb;
2199#endif
2200{
2201 struct rx_nfa_state **a = (struct rx_nfa_state **)va;
2202 struct rx_nfa_state **b = (struct rx_nfa_state **)vb;
2203 return (*a == *b /* &&&& 3.18 */
2204 ? 0
2205 : (((*a)->id < 0) == ((*b)->id < 0)
2206 ? (((*a)->id < (*b)->id) ? -1 : 1)
2207 : (((*a)->id < 0)
2208 ? 1 : -1)));
2209}
2210
2211#ifdef __STDC__
2212static int
2213count_hash_nodes (struct rx_hash * st)
2214#else
2215static int
2216count_hash_nodes (st)
2217 struct rx_hash * st;
2218#endif
2219{
2220 int x;
2221 int count = 0;
2222 for (x = 0; x < 13; ++x)
2223 count += ((st->children[x])
2224 ? count_hash_nodes (st->children[x])
2225 : st->bucket_size[x]);
2226
2227 return count;
2228}
2229
2230
2231#ifdef __STDC__
2232static void
2233se_memo_freer (struct rx_hash_item * node)
2234#else
2235static void
2236se_memo_freer (node)
2237 struct rx_hash_item * node;
2238#endif
2239{
2240 free ((char *)node->data);
2241}
2242
2243
2244#ifdef __STDC__
2245static void
2246nfa_set_freer (struct rx_hash_item * node)
2247#else
2248static void
2249nfa_set_freer (node)
2250 struct rx_hash_item * node;
2251#endif
2252{
2253 free ((char *)node->data);
2254}
2255
2256
2257/* This copies an entire NFA into a single malloced block of memory.
2258 * Mostly this is for compatability with regex.c, though it is convenient
2259 * to have the nfa nodes in an array.
2260 */
2261
2262#ifdef __STDC__
2263RX_DECL int
2264rx_compactify_nfa (struct rx *rx,
2265 void **mem, unsigned long *size)
2266#else
2267RX_DECL int
2268rx_compactify_nfa (rx, mem, size)
2269 struct rx *rx;
2270 void **mem;
2271 unsigned long *size;
2272#endif
2273{
2274 int total_nodec;
2275 struct rx_nfa_state *n;
2276 int edgec = 0;
2277 int eclosec = 0;
2278 int se_list_consc = count_hash_nodes (&rx->se_list_memo);
2279 int nfa_setc = count_hash_nodes (&rx->set_list_memo);
2280 unsigned long total_size;
2281
2282 /* This takes place in two stages. First, the total size of the
2283 * nfa is computed, then structures are copied.
2284 */
2285 n = rx->nfa_states;
2286 total_nodec = 0;
2287 while (n)
2288 {
2289 struct rx_nfa_edge *e = n->edges;
2290 struct rx_possible_future *ec = n->futures;
2291 ++total_nodec;
2292 while (e)
2293 {
2294 ++edgec;
2295 e = e->next;
2296 }
2297 while (ec)
2298 {
2299 ++eclosec;
2300 ec = ec->next;
2301 }
2302 n = n->next;
2303 }
2304
2305 total_size = (total_nodec * sizeof (struct rx_nfa_state)
2306 + edgec * rx_sizeof_bitset (rx->local_cset_size)
2307 + edgec * sizeof (struct rx_nfa_edge)
2308 + nfa_setc * sizeof (struct rx_nfa_state_set)
2309 + eclosec * sizeof (struct rx_possible_future)
2310 + se_list_consc * sizeof (struct rx_se_list)
2311 + rx->reserved);
2312
2313 if (total_size > *size)
2314 {
2315 *mem = remalloc (*mem, total_size);
2316 if (*mem)
2317 *size = total_size;
2318 else
2319 return 0;
2320 }
2321 /* Now we've allocated the memory; this copies the NFA. */
2322 {
2323 static struct rx_nfa_state **scratch = 0;
2324 static int scratch_alloc = 0;
2325 struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem;
2326 struct rx_nfa_state *new_state = state_base;
2327 struct rx_nfa_edge *new_edge =
2328 (struct rx_nfa_edge *)
2329 ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state));
2330 struct rx_se_list * new_se_list =
2331 (struct rx_se_list *)
2332 ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge));
2333 struct rx_possible_future *new_close =
2334 ((struct rx_possible_future *)
2335 ((char *) new_se_list
2336 + se_list_consc * sizeof (struct rx_se_list)));
2337 struct rx_nfa_state_set * new_nfa_set =
2338 ((struct rx_nfa_state_set *)
2339 ((char *)new_close + eclosec * sizeof (struct rx_possible_future)));
2340 char *new_bitset =
2341 ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set));
2342 int x;
2343 struct rx_nfa_state *n;
2344
2345 if (scratch_alloc < total_nodec)
2346 {
2347 scratch = ((struct rx_nfa_state **)
2348 remalloc (scratch, total_nodec * sizeof (*scratch)));
2349 if (scratch)
2350 scratch_alloc = total_nodec;
2351 else
2352 {
2353 scratch_alloc = 0;
2354 return 0;
2355 }
2356 }
2357
2358 for (x = 0, n = rx->nfa_states; n; n = n->next)
2359 scratch[x++] = n;
2360
2361 qsort (scratch, total_nodec,
2362 sizeof (struct rx_nfa_state *), (int (*)())nfacmp);
2363
2364 for (x = 0; x < total_nodec; ++x)
2365 {
2366 struct rx_possible_future *eclose = scratch[x]->futures;
2367 struct rx_nfa_edge *edge = scratch[x]->edges;
2368 struct rx_nfa_state *cn = new_state++;
2369 cn->futures = 0;
2370 cn->edges = 0;
2371 cn->next = (x == total_nodec - 1) ? 0 : (cn + 1);
2372 cn->id = scratch[x]->id;
2373 cn->is_final = scratch[x]->is_final;
2374 cn->is_start = scratch[x]->is_start;
2375 cn->mark = 0;
2376 while (edge)
2377 {
2378 int indx = (edge->dest->id < 0
2379 ? (total_nodec + edge->dest->id)
2380 : edge->dest->id);
2381 struct rx_nfa_edge *e = new_edge++;
2382 rx_Bitset cset = (rx_Bitset) new_bitset;
2383 new_bitset += rx_sizeof_bitset (rx->local_cset_size);
2384 rx_bitset_null (rx->local_cset_size, cset);
2385 rx_bitset_union (rx->local_cset_size, cset, edge->params.cset);
2386 e->next = cn->edges;
2387 cn->edges = e;
2388 e->type = edge->type;
2389 e->dest = state_base + indx;
2390 e->params.cset = cset;
2391 edge = edge->next;
2392 }
2393 while (eclose)
2394 {
2395 struct rx_possible_future *ec = new_close++;
2396 struct rx_hash_item * sp;
2397 struct rx_se_list ** sepos;
2398 struct rx_se_list * sesrc;
2399 struct rx_nfa_state_set * destlst;
2400 struct rx_nfa_state_set ** destpos;
2401 ec->next = cn->futures;
2402 cn->futures = ec;
2403 for (sepos = &ec->effects, sesrc = eclose->effects;
2404 sesrc;
2405 sesrc = sesrc->cdr, sepos = &(*sepos)->cdr)
2406 {
2407 sp = rx_hash_find (&rx->se_list_memo,
2408 (long)sesrc->car ^ (long)sesrc->cdr,
2409 sesrc, &se_list_hash_rules);
2410 if (sp->binding)
2411 {
2412 sesrc = (struct rx_se_list *)sp->binding;
2413 break;
2414 }
2415 *new_se_list = *sesrc;
2416 sp->binding = (void *)new_se_list;
2417 *sepos = new_se_list;
2418 ++new_se_list;
2419 }
2420 *sepos = sesrc;
2421 for (destpos = &ec->destset, destlst = eclose->destset;
2422 destlst;
2423 destpos = &(*destpos)->cdr, destlst = destlst->cdr)
2424 {
2425 sp = rx_hash_find (&rx->set_list_memo,
2426 ((((long)destlst->car) >> 8)
2427 ^ (long)destlst->cdr),
2428 destlst, &nfa_set_hash_rules);
2429 if (sp->binding)
2430 {
2431 destlst = (struct rx_nfa_state_set *)sp->binding;
2432 break;
2433 }
2434 *new_nfa_set = *destlst;
2435 new_nfa_set->car = state_base + destlst->car->id;
2436 sp->binding = (void *)new_nfa_set;
2437 *destpos = new_nfa_set;
2438 ++new_nfa_set;
2439 }
2440 *destpos = destlst;
2441 eclose = eclose->next;
2442 }
2443 }
2444 }
2445 rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
2446 bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
2447 rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
2448 bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
2449
2450 rx_free_nfa (rx);
2451 rx->nfa_states = (struct rx_nfa_state *)*mem;
2452 return 1;
2453}
2454
2455
2456
2457/* The functions in the next several pages define the lazy-NFA-conversion used
2458 * by matchers. The input to this construction is an NFA such as
2459 * is built by compactify_nfa (rx.c). The output is the superNFA.
2460 */
2461
2462/* Match engines can use arbitrary values for opcodes. So, the parse tree
2463 * is built using instructions names (enum rx_opcode), but the superstate
2464 * nfa is populated with mystery opcodes (void *).
2465 *
2466 * For convenience, here is an id table. The opcodes are == to their inxs
2467 *
2468 * The lables in re_search_2 would make good values for instructions.
2469 */
2470
2471void * rx_id_instruction_table[rx_num_instructions] =
2472{
2473 (void *) rx_backtrack_point,
2474 (void *) rx_do_side_effects,
2475 (void *) rx_cache_miss,
2476 (void *) rx_next_char,
2477 (void *) rx_backtrack,
2478 (void *) rx_error_inx
2479};
2480
2481
2482
2483
2484/* Memory mgt. for superstate graphs. */
2485
2486#ifdef __STDC__
2487static char *
2488rx_cache_malloc (struct rx_cache * cache, int bytes)
2489#else
2490static char *
2491rx_cache_malloc (cache, bytes)
2492 struct rx_cache * cache;
2493 int bytes;
2494#endif
2495{
2496 while (cache->bytes_left < bytes)
2497 {
2498 if (cache->memory_pos)
2499 cache->memory_pos = cache->memory_pos->next;
2500 if (!cache->memory_pos)
2501 {
2502 cache->morecore (cache);
2503 if (!cache->memory_pos)
2504 return 0;
2505 }
2506 cache->bytes_left = cache->memory_pos->bytes;
2507 cache->memory_addr = ((char *)cache->memory_pos
2508 + sizeof (struct rx_blocklist));
2509 }
2510 cache->bytes_left -= bytes;
2511 {
2512 char * addr = cache->memory_addr;
2513 cache->memory_addr += bytes;
2514 return addr;
2515 }
2516}
2517
2518#ifdef __STDC__
2519static void
2520rx_cache_free (struct rx_cache * cache,
2521 struct rx_freelist ** freelist, char * mem)
2522#else
2523static void
2524rx_cache_free (cache, freelist, mem)
2525 struct rx_cache * cache;
2526 struct rx_freelist ** freelist;
2527 char * mem;
2528#endif
2529{
2530 struct rx_freelist * it = (struct rx_freelist *)mem;
2531 it->next = *freelist;
2532 *freelist = it;
2533}
2534
2535
2536/* The partially instantiated superstate graph has a transition
2537 * table at every node. There is one entry for every character.
2538 * This fills in the transition for a set.
2539 */
2540#ifdef __STDC__
2541static void
2542install_transition (struct rx_superstate *super,
2543 struct rx_inx *answer, rx_Bitset trcset)
2544#else
2545static void
2546install_transition (super, answer, trcset)
2547 struct rx_superstate *super;
2548 struct rx_inx *answer;
2549 rx_Bitset trcset;
2550#endif
2551{
2552 struct rx_inx * transitions = super->transitions;
2553 int chr;
2554 for (chr = 0; chr < 256; )
2555 if (!*trcset)
2556 {
2557 ++trcset;
2558 chr += 32;
2559 }
2560 else
2561 {
2562 RX_subset sub = *trcset;
2563 RX_subset mask = 1;
2564 int bound = chr + 32;
2565 while (chr < bound)
2566 {
2567 if (sub & mask)
2568 transitions [chr] = *answer;
2569 ++chr;
2570 mask <<= 1;
2571 }
2572 ++trcset;
2573 }
2574}
2575
2576
2577#ifdef __STDC__
2578static int
2579qlen (struct rx_superstate * q)
2580#else
2581static int
2582qlen (q)
2583 struct rx_superstate * q;
2584#endif
2585{
2586 int count = 1;
2587 struct rx_superstate * it;
2588 if (!q)
2589 return 0;
2590 for (it = q->next_recyclable; it != q; it = it->next_recyclable)
2591 ++count;
2592 return count;
2593}
2594
2595#ifdef __STDC__
2596static void
2597check_cache (struct rx_cache * cache)
2598#else
2599static void
2600check_cache (cache)
2601 struct rx_cache * cache;
2602#endif
2603{
2604 struct rx_cache * you_fucked_up = 0;
2605 int total = cache->superstates;
2606 int semi = cache->semifree_superstates;
2607 if (semi != qlen (cache->semifree_superstate))
2608 check_cache (you_fucked_up);
2609 if ((total - semi) != qlen (cache->lru_superstate))
2610 check_cache (you_fucked_up);
2611}
2612
2613/* When a superstate is old and neglected, it can enter a
2614 * semi-free state. A semi-free state is slated to die.
2615 * Incoming transitions to a semi-free state are re-written
2616 * to cause an (interpreted) fault when they are taken.
2617 * The fault handler revives the semi-free state, patches
2618 * incoming transitions back to normal, and continues.
2619 *
2620 * The idea is basicly to free in two stages, aborting
2621 * between the two if the state turns out to be useful again.
2622 * When a free is aborted, the rescued superstate is placed
2623 * in the most-favored slot to maximize the time until it
2624 * is next semi-freed.
2625 */
2626
2627#ifdef __STDC__
2628static void
2629semifree_superstate (struct rx_cache * cache)
2630#else
2631static void
2632semifree_superstate (cache)
2633 struct rx_cache * cache;
2634#endif
2635{
2636 int disqualified = cache->semifree_superstates;
2637 if (disqualified == cache->superstates)
2638 return;
2639 while (cache->lru_superstate->locks)
2640 {
2641 cache->lru_superstate = cache->lru_superstate->next_recyclable;
2642 ++disqualified;
2643 if (disqualified == cache->superstates)
2644 return;
2645 }
2646 {
2647 struct rx_superstate * it = cache->lru_superstate;
2648 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2649 it->prev_recyclable->next_recyclable = it->next_recyclable;
2650 cache->lru_superstate = (it == it->next_recyclable
2651 ? 0
2652 : it->next_recyclable);
2653 if (!cache->semifree_superstate)
2654 {
2655 cache->semifree_superstate = it;
2656 it->next_recyclable = it;
2657 it->prev_recyclable = it;
2658 }
2659 else
2660 {
2661 it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
2662 it->next_recyclable = cache->semifree_superstate;
2663 it->prev_recyclable->next_recyclable = it;
2664 it->next_recyclable->prev_recyclable = it;
2665 }
2666 {
2667 struct rx_distinct_future *df;
2668 it->is_semifree = 1;
2669 ++cache->semifree_superstates;
2670 df = it->transition_refs;
2671 if (df)
2672 {
2673 df->prev_same_dest->next_same_dest = 0;
2674 for (df = it->transition_refs; df; df = df->next_same_dest)
2675 {
2676 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
2677 df->future_frame.data = 0;
2678 df->future_frame.data_2 = (void *) df;
2679 /* If there are any NEXT-CHAR instruction frames that
2680 * refer to this state, we convert them to CACHE-MISS frames.
2681 */
2682 if (!df->effects
2683 && (df->edge->options->next_same_super_edge[0]
2684 == df->edge->options))
2685 install_transition (df->present, &df->future_frame,
2686 df->edge->cset);
2687 }
2688 df = it->transition_refs;
2689 df->prev_same_dest->next_same_dest = df;
2690 }
2691 }
2692 }
2693}
2694
2695
2696#ifdef __STDC__
2697static void
2698refresh_semifree_superstate (struct rx_cache * cache,
2699 struct rx_superstate * super)
2700#else
2701static void
2702refresh_semifree_superstate (cache, super)
2703 struct rx_cache * cache;
2704 struct rx_superstate * super;
2705#endif
2706{
2707 struct rx_distinct_future *df;
2708
2709 if (super->transition_refs)
2710 {
2711 super->transition_refs->prev_same_dest->next_same_dest = 0;
2712 for (df = super->transition_refs; df; df = df->next_same_dest)
2713 {
2714 df->future_frame.inx = cache->instruction_table[rx_next_char];
2715 df->future_frame.data = (void *) super->transitions;
2716 /* CACHE-MISS instruction frames that refer to this state,
2717 * must be converted to NEXT-CHAR frames.
2718 */
2719 if (!df->effects
2720 && (df->edge->options->next_same_super_edge[0]
2721 == df->edge->options))
2722 install_transition (df->present, &df->future_frame,
2723 df->edge->cset);
2724 }
2725 super->transition_refs->prev_same_dest->next_same_dest
2726 = super->transition_refs;
2727 }
2728 if (cache->semifree_superstate == super)
2729 cache->semifree_superstate = (super->prev_recyclable == super
2730 ? 0
2731 : super->prev_recyclable);
2732 super->next_recyclable->prev_recyclable = super->prev_recyclable;
2733 super->prev_recyclable->next_recyclable = super->next_recyclable;
2734
2735 if (!cache->lru_superstate)
2736 (cache->lru_superstate
2737 = super->next_recyclable
2738 = super->prev_recyclable
2739 = super);
2740 else
2741 {
2742 super->next_recyclable = cache->lru_superstate;
2743 super->prev_recyclable = cache->lru_superstate->prev_recyclable;
2744 super->next_recyclable->prev_recyclable = super;
2745 super->prev_recyclable->next_recyclable = super;
2746 }
2747 super->is_semifree = 0;
2748 --cache->semifree_superstates;
2749}
2750
2751#ifdef __STDC__
2752static void
2753rx_refresh_this_superstate (struct rx_cache * cache, struct rx_superstate * superstate)
2754#else
2755static void
2756rx_refresh_this_superstate (cache, superstate)
2757 struct rx_cache * cache;
2758 struct rx_superstate * superstate;
2759#endif
2760{
2761 if (superstate->is_semifree)
2762 refresh_semifree_superstate (cache, superstate);
2763 else if (cache->lru_superstate == superstate)
2764 cache->lru_superstate = superstate->next_recyclable;
2765 else if (superstate != cache->lru_superstate->prev_recyclable)
2766 {
2767 superstate->next_recyclable->prev_recyclable
2768 = superstate->prev_recyclable;
2769 superstate->prev_recyclable->next_recyclable
2770 = superstate->next_recyclable;
2771 superstate->next_recyclable = cache->lru_superstate;
2772 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
2773 superstate->next_recyclable->prev_recyclable = superstate;
2774 superstate->prev_recyclable->next_recyclable = superstate;
2775 }
2776}
2777
2778#ifdef __STDC__
2779static void
2780release_superset_low (struct rx_cache * cache,
2781 struct rx_superset *set)
2782#else
2783static void
2784release_superset_low (cache, set)
2785 struct rx_cache * cache;
2786 struct rx_superset *set;
2787#endif
2788{
2789 if (!--set->refs)
2790 {
2791 if (set->cdr)
2792 release_superset_low (cache, set->cdr);
2793
2794 set->starts_for = 0;
2795
2796 rx_hash_free
2797 (rx_hash_find
2798 (&cache->superset_table,
2799 (unsigned long)set->car ^ set->id ^ (unsigned long)set->cdr,
2800 (void *)set,
2801 &cache->superset_hash_rules),
2802 &cache->superset_hash_rules);
2803 rx_cache_free (cache, &cache->free_supersets, (char *)set);
2804 }
2805}
2806
2807#ifdef __STDC__
2808RX_DECL void
2809rx_release_superset (struct rx *rx,
2810 struct rx_superset *set)
2811#else
2812RX_DECL void
2813rx_release_superset (rx, set)
2814 struct rx *rx;
2815 struct rx_superset *set;
2816#endif
2817{
2818 release_superset_low (rx->cache, set);
2819}
2820
2821/* This tries to add a new superstate to the superstate freelist.
2822 * It might, as a result, free some edge pieces or hash tables.
2823 * If nothing can be freed because too many locks are being held, fail.
2824 */
2825
2826#ifdef __STDC__
2827static int
2828rx_really_free_superstate (struct rx_cache * cache)
2829#else
2830static int
2831rx_really_free_superstate (cache)
2832 struct rx_cache * cache;
2833#endif
2834{
2835 int locked_superstates = 0;
2836 struct rx_superstate * it;
2837
2838 if (!cache->superstates)
2839 return 0;
2840
2841 {
2842 /* This is a total guess. The idea is that we should expect as
2843 * many misses as we've recently experienced. I.e., cache->misses
2844 * should be the same as cache->semifree_superstates.
2845 */
2846 while ((cache->hits + cache->misses) > cache->superstates_allowed)
2847 {
2848 cache->hits >>= 1;
2849 cache->misses >>= 1;
2850 }
2851 if ( ((cache->hits + cache->misses) * cache->semifree_superstates)
2852 < (cache->superstates * cache->misses))
2853 {
2854 semifree_superstate (cache);
2855 semifree_superstate (cache);
2856 }
2857 }
2858
2859 while (cache->semifree_superstate && cache->semifree_superstate->locks)
2860 {
2861 refresh_semifree_superstate (cache, cache->semifree_superstate);
2862 ++locked_superstates;
2863 if (locked_superstates == cache->superstates)
2864 return 0;
2865 }
2866
2867 if (cache->semifree_superstate)
2868 {
2869 it = cache->semifree_superstate;
2870 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2871 it->prev_recyclable->next_recyclable = it->next_recyclable;
2872 cache->semifree_superstate = ((it == it->next_recyclable)
2873 ? 0
2874 : it->next_recyclable);
2875 --cache->semifree_superstates;
2876 }
2877 else
2878 {
2879 while (cache->lru_superstate->locks)
2880 {
2881 cache->lru_superstate = cache->lru_superstate->next_recyclable;
2882 ++locked_superstates;
2883 if (locked_superstates == cache->superstates)
2884 return 0;
2885 }
2886 it = cache->lru_superstate;
2887 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2888 it->prev_recyclable->next_recyclable = it->next_recyclable;
2889 cache->lru_superstate = ((it == it->next_recyclable)
2890 ? 0
2891 : it->next_recyclable);
2892 }
2893
2894 if (it->transition_refs)
2895 {
2896 struct rx_distinct_future *df;
2897 for (df = it->transition_refs,
2898 df->prev_same_dest->next_same_dest = 0;
2899 df;
2900 df = df->next_same_dest)
2901 {
2902 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
2903 df->future_frame.data = 0;
2904 df->future_frame.data_2 = (void *) df;
2905 df->future = 0;
2906 }
2907 it->transition_refs->prev_same_dest->next_same_dest =
2908 it->transition_refs;
2909 }
2910 {
2911 struct rx_super_edge *tc = it->edges;
2912 while (tc)
2913 {
2914 struct rx_distinct_future * df;
2915 struct rx_super_edge *tct = tc->next;
2916 df = tc->options;
2917 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
2918 while (df)
2919 {
2920 struct rx_distinct_future *dft = df;
2921 df = df->next_same_super_edge[0];
2922
2923
2924 if (dft->future && dft->future->transition_refs == dft)
2925 {
2926 dft->future->transition_refs = dft->next_same_dest;
2927 if (dft->future->transition_refs == dft)
2928 dft->future->transition_refs = 0;
2929 }
2930 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
2931 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
2932 rx_cache_free (cache, &cache->free_discernable_futures,
2933 (char *)dft);
2934 }
2935 rx_cache_free (cache, &cache->free_transition_classes, (char *)tc);
2936 tc = tct;
2937 }
2938 }
2939
2940 if (it->contents->superstate == it)
2941 it->contents->superstate = 0;
2942 release_superset_low (cache, it->contents);
2943 rx_cache_free (cache, &cache->free_superstates, (char *)it);
2944 --cache->superstates;
2945 return 1;
2946}
2947
2948#ifdef __STDC__
2949static char *
2950rx_cache_get (struct rx_cache * cache,
2951 struct rx_freelist ** freelist)
2952#else
2953static char *
2954rx_cache_get (cache, freelist)
2955 struct rx_cache * cache;
2956 struct rx_freelist ** freelist;
2957#endif
2958{
2959 while (!*freelist && rx_really_free_superstate (cache))
2960 ;
2961 if (!*freelist)
2962 return 0;
2963 {
2964 struct rx_freelist * it = *freelist;
2965 *freelist = it->next;
2966 return (char *)it;
2967 }
2968}
2969
2970#ifdef __STDC__
2971static char *
2972rx_cache_malloc_or_get (struct rx_cache * cache,
2973 struct rx_freelist ** freelist, int bytes)
2974#else
2975static char *
2976rx_cache_malloc_or_get (cache, freelist, bytes)
2977 struct rx_cache * cache;
2978 struct rx_freelist ** freelist;
2979 int bytes;
2980#endif
2981{
2982 if (!*freelist)
2983 {
2984 char * answer = rx_cache_malloc (cache, bytes);
2985 if (answer)
2986 return answer;
2987 }
2988
2989 return rx_cache_get (cache, freelist);
2990}
2991
2992#ifdef __STDC__
2993static char *
2994rx_cache_get_superstate (struct rx_cache * cache)
2995#else
2996static char *
2997rx_cache_get_superstate (cache)
2998 struct rx_cache * cache;
2999#endif
3000{
3001 char * answer;
3002 int bytes = ( sizeof (struct rx_superstate)
3003 + cache->local_cset_size * sizeof (struct rx_inx));
3004 if (!cache->free_superstates
3005 && (cache->superstates < cache->superstates_allowed))
3006 {
3007 answer = rx_cache_malloc (cache, bytes);
3008 if (answer)
3009 {
3010 ++cache->superstates;
3011 return answer;
3012 }
3013 }
3014 answer = rx_cache_get (cache, &cache->free_superstates);
3015 if (!answer)
3016 {
3017 answer = rx_cache_malloc (cache, bytes);
3018 if (answer)
3019 ++cache->superstates_allowed;
3020 }
3021 ++cache->superstates;
3022 return answer;
3023}
3024
3025
3026
3027
3028#ifdef __STDC__
3029static int
3030supersetcmp (void * va, void * vb)
3031#else
3032static int
3033supersetcmp (va, vb)
3034 void * va;
3035 void * vb;
3036#endif
3037{
3038 struct rx_superset * a = (struct rx_superset *)va;
3039 struct rx_superset * b = (struct rx_superset *)vb;
3040 return ( (a == b)
3041 || (a && b && (a->car == b->car) && (a->cdr == b->cdr)));
3042}
3043
3044#ifdef __STDC__
3045static struct rx_hash_item *
3046superset_allocator (struct rx_hash_rules * rules, void * val)
3047#else
3048static struct rx_hash_item *
3049superset_allocator (rules, val)
3050 struct rx_hash_rules * rules;
3051 void * val;
3052#endif
3053{
3054 struct rx_cache * cache
3055 = ((struct rx_cache *)
3056 ((char *)rules
3057 - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
3058 struct rx_superset * template = (struct rx_superset *)val;
3059 struct rx_superset * newset
3060 = ((struct rx_superset *)
3061 rx_cache_malloc_or_get (cache,
3062 &cache->free_supersets,
3063 sizeof (*template)));
3064 if (!newset)
3065 return 0;
3066 newset->refs = 0;
3067 newset->car = template->car;
3068 newset->id = template->car->id;
3069 newset->cdr = template->cdr;
3070 newset->superstate = 0;
3071 rx_protect_superset (rx, template->cdr);
3072 newset->hash_item.data = (void *)newset;
3073 newset->hash_item.binding = 0;
3074 return &newset->hash_item;
3075}
3076
3077#ifdef __STDC__
3078static struct rx_hash *
3079super_hash_allocator (struct rx_hash_rules * rules)
3080#else
3081static struct rx_hash *
3082super_hash_allocator (rules)
3083 struct rx_hash_rules * rules;
3084#endif
3085{
3086 struct rx_cache * cache
3087 = ((struct rx_cache *)
3088 ((char *)rules
3089 - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
3090 return ((struct rx_hash *)
3091 rx_cache_malloc_or_get (cache,
3092 &cache->free_hash, sizeof (struct rx_hash)));
3093}
3094
3095
3096#ifdef __STDC__
3097static void
3098super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
3099#else
3100static void
3101super_hash_liberator (hash, rules)
3102 struct rx_hash * hash;
3103 struct rx_hash_rules * rules;
3104#endif
3105{
3106 struct rx_cache * cache
3107 = ((struct rx_cache *)
3108 (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
3109 rx_cache_free (cache, &cache->free_hash, (char *)hash);
3110}
3111
3112#ifdef __STDC__
3113static void
3114superset_hash_item_liberator (struct rx_hash_item * it,
3115 struct rx_hash_rules * rules)
3116#else
3117static void
3118superset_hash_item_liberator (it, rules) /* Well, it does ya know. */
3119 struct rx_hash_item * it;
3120 struct rx_hash_rules * rules;
3121#endif
3122{
3123}
3124
3125int rx_cache_bound = 128;
3126static int rx_default_cache_got = 0;
3127
3128#ifdef __STDC__
3129static int
3130bytes_for_cache_size (int supers, int cset_size)
3131#else
3132static int
3133bytes_for_cache_size (supers, cset_size)
3134 int supers;
3135 int cset_size;
3136#endif
3137{
3138 /* What the hell is this? !!!*/
3139 return (int)
3140 ((float)supers *
3141 ( (1.03 * (float) ( rx_sizeof_bitset (cset_size)
3142 + sizeof (struct rx_super_edge)))
3143 + (1.80 * (float) sizeof (struct rx_possible_future))
3144 + (float) ( sizeof (struct rx_superstate)
3145 + cset_size * sizeof (struct rx_inx))));
3146}
3147
3148#ifdef __STDC__
3149static void
3150rx_morecore (struct rx_cache * cache)
3151#else
3152static void
3153rx_morecore (cache)
3154 struct rx_cache * cache;
3155#endif
3156{
3157 if (rx_default_cache_got >= rx_cache_bound)
3158 return;
3159
3160 rx_default_cache_got += 16;
3161 cache->superstates_allowed = rx_cache_bound;
3162 {
3163 struct rx_blocklist ** pos = &cache->memory;
3164 int size = bytes_for_cache_size (16, cache->local_cset_size);
3165 while (*pos)
3166 pos = &(*pos)->next;
3167 *pos = ((struct rx_blocklist *)
3168 malloc (size + sizeof (struct rx_blocklist)));
3169 if (!*pos)
3170 return;
3171
3172 (*pos)->next = 0;
3173 (*pos)->bytes = size;
3174 cache->memory_pos = *pos;
3175 cache->memory_addr = (char *)*pos + sizeof (**pos);
3176 cache->bytes_left = size;
3177 }
3178}
3179
3180static struct rx_cache default_cache =
3181{
3182 {
3183 supersetcmp,
3184 super_hash_allocator,
3185 super_hash_liberator,
3186 superset_allocator,
3187 superset_hash_item_liberator,
3188 },
3189 0,
3190 0,
3191 0,
3192 0,
3193 rx_morecore,
3194
3195 0,
3196 0,
3197 0,
3198 0,
3199 0,
3200
3201 0,
3202 0,
3203
3204 0,
3205
3206 0,
3207 0,
3208 0,
3209 0,
3210 128,
3211
3212 256,
3213 rx_id_instruction_table,
3214
3215 {
3216 0,
3217 0,
3218 {0},
3219 {0},
3220 {0}
3221 }
3222};
3223
3224/* This adds an element to a superstate set. These sets are lists, such
3225 * that lists with == elements are ==. The empty set is returned by
3226 * superset_cons (rx, 0, 0) and is NOT equivelent to
3227 * (struct rx_superset)0.
3228 */
3229
3230#ifdef __STDC__
3231RX_DECL struct rx_superset *
3232rx_superset_cons (struct rx * rx,
3233 struct rx_nfa_state *car, struct rx_superset *cdr)
3234#else
3235RX_DECL struct rx_superset *
3236rx_superset_cons (rx, car, cdr)
3237 struct rx * rx;
3238 struct rx_nfa_state *car;
3239 struct rx_superset *cdr;
3240#endif
3241{
3242 struct rx_cache * cache = rx->cache;
3243 if (!car && !cdr)
3244 {
3245 if (!cache->empty_superset)
3246 {
3247 cache->empty_superset
3248 = ((struct rx_superset *)
3249 rx_cache_malloc_or_get (cache, &cache->free_supersets,
3250 sizeof (struct rx_superset)));
3251 if (!cache->empty_superset)
3252 return 0;
3253 bzero (cache->empty_superset, sizeof (struct rx_superset));
3254 cache->empty_superset->refs = 1000;
3255 }
3256 return cache->empty_superset;
3257 }
3258 {
3259 struct rx_superset template;
3260 struct rx_hash_item * hit;
3261 template.car = car;
3262 template.cdr = cdr;
3263 template.id = car->id;
3264 hit = rx_hash_store (&cache->superset_table,
3265 (unsigned long)car ^ car->id ^ (unsigned long)cdr,
3266 (void *)&template,
3267 &cache->superset_hash_rules);
3268 return (hit
3269 ? (struct rx_superset *)hit->data
3270 : 0);
3271 }
3272}
3273
3274/* This computes a union of two NFA state sets. The sets do not have the
3275 * same representation though. One is a RX_SUPERSET structure (part
3276 * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
3277 */
3278
3279#ifdef __STDC__
3280RX_DECL struct rx_superset *
3281rx_superstate_eclosure_union
3282 (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
3283#else
3284RX_DECL struct rx_superset *
3285rx_superstate_eclosure_union (rx, set, ecl)
3286 struct rx * rx;
3287 struct rx_superset *set;
3288 struct rx_nfa_state_set *ecl;
3289#endif
3290{
3291 if (!ecl)
3292 return set;
3293
3294 if (!set->car)
3295 return rx_superset_cons (rx, ecl->car,
3296 rx_superstate_eclosure_union (rx, set, ecl->cdr));
3297 if (set->car == ecl->car)
3298 return rx_superstate_eclosure_union (rx, set, ecl->cdr);
3299
3300 {
3301 struct rx_superset * tail;
3302 struct rx_nfa_state * first;
3303
3304 if (set->car > ecl->car)
3305 {
3306 tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
3307 first = set->car;
3308 }
3309 else
3310 {
3311 tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
3312 first = ecl->car;
3313 }
3314 if (!tail)
3315 return 0;
3316 else
3317 {
3318 struct rx_superset * answer;
3319 answer = rx_superset_cons (rx, first, tail);
3320 if (!answer)
3321 {
3322 rx_protect_superset (rx, tail);
3323 rx_release_superset (rx, tail);
3324 return 0;
3325 }
3326 else
3327 return answer;
3328 }
3329 }
3330}
3331
3332
3333
3334
3335
3336/*
3337 * This makes sure that a list of rx_distinct_futures contains
3338 * a future for each possible set of side effects in the eclosure
3339 * of a given state. This is some of the work of filling in a
3340 * superstate transition.
3341 */
3342
3343#ifdef __STDC__
3344static struct rx_distinct_future *
3345include_futures (struct rx *rx,
3346 struct rx_distinct_future *df, struct rx_nfa_state
3347 *state, struct rx_superstate *superstate)
3348#else
3349static struct rx_distinct_future *
3350include_futures (rx, df, state, superstate)
3351 struct rx *rx;
3352 struct rx_distinct_future *df;
3353 struct rx_nfa_state *state;
3354 struct rx_superstate *superstate;
3355#endif
3356{
3357 struct rx_possible_future *future;
3358 struct rx_cache * cache = rx->cache;
3359 for (future = state->futures; future; future = future->next)
3360 {
3361 struct rx_distinct_future *dfp;
3362 struct rx_distinct_future *insert_before = 0;
3363 if (df)
3364 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3365 for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
3366 if (dfp->effects == future->effects)
3367 break;
3368 else
3369 {
3370 int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
3371 if (order > 0)
3372 {
3373 insert_before = dfp;
3374 dfp = 0;
3375 break;
3376 }
3377 }
3378 if (df)
3379 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
3380 if (!dfp)
3381 {
3382 dfp
3383 = ((struct rx_distinct_future *)
3384 rx_cache_malloc_or_get (cache, &cache->free_discernable_futures,
3385 sizeof (struct rx_distinct_future)));
3386 if (!dfp)
3387 return 0;
3388 if (!df)
3389 {
3390 df = insert_before = dfp;
3391 df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
3392 }
3393 else if (!insert_before)
3394 insert_before = df;
3395 else if (insert_before == df)
3396 df = dfp;
3397
3398 dfp->next_same_super_edge[0] = insert_before;
3399 dfp->next_same_super_edge[1]
3400 = insert_before->next_same_super_edge[1];
3401 dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
3402 dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
3403 dfp->next_same_dest = dfp->prev_same_dest = dfp;
3404 dfp->future = 0;
3405 dfp->present = superstate;
3406 dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
3407 dfp->future_frame.data = 0;
3408 dfp->future_frame.data_2 = (void *) dfp;
3409 dfp->side_effects_frame.inx
3410 = rx->instruction_table[rx_do_side_effects];
3411 dfp->side_effects_frame.data = 0;
3412 dfp->side_effects_frame.data_2 = (void *) dfp;
3413 dfp->effects = future->effects;
3414 }
3415 }
3416 return df;
3417}
3418
3419
3420
3421
3422/* This constructs a new superstate from its state set. The only
3423 * complexity here is memory management.
3424 */
3425#ifdef __STDC__
3426RX_DECL struct rx_superstate *
3427rx_superstate (struct rx *rx,
3428 struct rx_superset *set)
3429#else
3430RX_DECL struct rx_superstate *
3431rx_superstate (rx, set)
3432 struct rx *rx;
3433 struct rx_superset *set;
3434#endif
3435{
3436 struct rx_cache * cache = rx->cache;
3437 struct rx_superstate * superstate = 0;
3438
3439 /* Does the superstate already exist in the cache? */
3440 if (set->superstate)
3441 {
3442 if (set->superstate->rx_id != rx->rx_id)
3443 {
3444 /* Aha. It is in the cache, but belongs to a superstate
3445 * that refers to an NFA that no longer exists.
3446 * (We know it no longer exists because it was evidently
3447 * stored in the same region of memory as the current nfa
3448 * yet it has a different id.)
3449 */
3450 superstate = set->superstate;
3451 if (!superstate->is_semifree)
3452 {
3453 if (cache->lru_superstate == superstate)
3454 {
3455 cache->lru_superstate = superstate->next_recyclable;
3456 if (cache->lru_superstate == superstate)
3457 cache->lru_superstate = 0;
3458 }
3459 {
3460 superstate->next_recyclable->prev_recyclable
3461 = superstate->prev_recyclable;
3462 superstate->prev_recyclable->next_recyclable
3463 = superstate->next_recyclable;
3464 if (!cache->semifree_superstate)
3465 {
3466 (cache->semifree_superstate
3467 = superstate->next_recyclable
3468 = superstate->prev_recyclable
3469 = superstate);
3470 }
3471 else
3472 {
3473 superstate->next_recyclable = cache->semifree_superstate;
3474 superstate->prev_recyclable
3475 = cache->semifree_superstate->prev_recyclable;
3476 superstate->next_recyclable->prev_recyclable
3477 = superstate;
3478 superstate->prev_recyclable->next_recyclable
3479 = superstate;
3480 cache->semifree_superstate = superstate;
3481 }
3482 ++cache->semifree_superstates;
3483 }
3484 }
3485 set->superstate = 0;
3486 goto handle_cache_miss;
3487 }
3488 ++cache->hits;
3489 superstate = set->superstate;
3490
3491 rx_refresh_this_superstate (cache, superstate);
3492 return superstate;
3493 }
3494
3495 handle_cache_miss:
3496
3497 /* This point reached only for cache misses. */
3498 ++cache->misses;
3499#if RX_DEBUG
3500 if (rx_debug_trace > 1)
3501 {
3502 struct rx_superset * setp = set;
3503 fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
3504 while (setp)
3505 {
3506 fprintf (stderr, "%d ", setp->id);
3507 setp = setp->cdr;
3508 }
3509 fprintf (stderr, "(%d)\n", set);
3510 }
3511#endif
3512 superstate = (struct rx_superstate *)rx_cache_get_superstate (cache);
3513 if (!superstate)
3514 return 0;
3515
3516 if (!cache->lru_superstate)
3517 (cache->lru_superstate
3518 = superstate->next_recyclable
3519 = superstate->prev_recyclable
3520 = superstate);
3521 else
3522 {
3523 superstate->next_recyclable = cache->lru_superstate;
3524 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
3525 ( superstate->prev_recyclable->next_recyclable
3526 = superstate->next_recyclable->prev_recyclable
3527 = superstate);
3528 }
3529 superstate->rx_id = rx->rx_id;
3530 superstate->transition_refs = 0;
3531 superstate->locks = 0;
3532 superstate->is_semifree = 0;
3533 set->superstate = superstate;
3534 superstate->contents = set;
3535 rx_protect_superset (rx, set);
3536 superstate->edges = 0;
3537 {
3538 int x;
3539 /* None of the transitions from this superstate are known yet. */
3540 for (x = 0; x < rx->local_cset_size; ++x) /* &&&&& 3.8 % */
3541 {
3542 struct rx_inx * ifr = &superstate->transitions[x];
3543 ifr->inx = rx->instruction_table [rx_cache_miss];
3544 ifr->data = ifr->data_2 = 0;
3545 }
3546 }
3547 return superstate;
3548}
3549
3550
3551
3552/* This computes the destination set of one edge of the superstate NFA.
3553 * Note that a RX_DISTINCT_FUTURE is a superstate edge.
3554 * Returns 0 on an allocation failure.
3555 */
3556
3557#ifdef __STDC__
3558static int
3559solve_destination (struct rx *rx, struct rx_distinct_future *df)
3560#else
3561static int
3562solve_destination (rx, df)
3563 struct rx *rx;
3564 struct rx_distinct_future *df;
3565#endif
3566{
3567 struct rx_super_edge *tc = df->edge;
3568 struct rx_superset *nfa_state;
3569 struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
3570 struct rx_superset *solution = nil_set;
3571 struct rx_superstate *dest;
3572
3573 rx_protect_superset (rx, solution);
3574 /* Iterate over all NFA states in the state set of this superstate. */
3575 for (nfa_state = df->present->contents;
3576 nfa_state->car;
3577 nfa_state = nfa_state->cdr)
3578 {
3579 struct rx_nfa_edge *e;
3580 /* Iterate over all edges of each NFA state. */
3581 for (e = nfa_state->car->edges; e; e = e->next)
3582 /* If we find an edge that is labeled with
3583 * the characters we are solving for.....
3584 */
3585 if (rx_bitset_is_subset (rx->local_cset_size,
3586 tc->cset, e->params.cset))
3587 {
3588 struct rx_nfa_state *n = e->dest;
3589 struct rx_possible_future *pf;
3590 /* ....search the partial epsilon closures of the destination
3591 * of that edge for a path that involves the same set of
3592 * side effects we are solving for.
3593 * If we find such a RX_POSSIBLE_FUTURE, we add members to the
3594 * stateset we are computing.
3595 */
3596 for (pf = n->futures; pf; pf = pf->next)
3597 if (pf->effects == df->effects)
3598 {
3599 struct rx_superset * old_sol;
3600 old_sol = solution;
3601 solution = rx_superstate_eclosure_union (rx, solution,
3602 pf->destset);
3603 if (!solution)
3604 return 0;
3605 rx_protect_superset (rx, solution);
3606 rx_release_superset (rx, old_sol);
3607 }
3608 }
3609 }
3610 /* It is possible that the RX_DISTINCT_FUTURE we are working on has
3611 * the empty set of NFA states as its definition. In that case, this
3612 * is a failure point.
3613 */
3614 if (solution == nil_set)
3615 {
3616 df->future_frame.inx = (void *) rx_backtrack;
3617 df->future_frame.data = 0;
3618 df->future_frame.data_2 = 0;
3619 return 1;
3620 }
3621 dest = rx_superstate (rx, solution);
3622 rx_release_superset (rx, solution);
3623 if (!dest)
3624 return 0;
3625
3626 {
3627 struct rx_distinct_future *dft;
3628 dft = df;
3629 df->prev_same_dest->next_same_dest = 0;
3630 while (dft)
3631 {
3632 dft->future = dest;
3633 dft->future_frame.inx = rx->instruction_table[rx_next_char];
3634 dft->future_frame.data = (void *) dest->transitions;
3635 dft = dft->next_same_dest;
3636 }
3637 df->prev_same_dest->next_same_dest = df;
3638 }
3639 if (!dest->transition_refs)
3640 dest->transition_refs = df;
3641 else
3642 {
3643 struct rx_distinct_future *dft = dest->transition_refs->next_same_dest;
3644 dest->transition_refs->next_same_dest = df->next_same_dest;
3645 df->next_same_dest->prev_same_dest = dest->transition_refs;
3646 df->next_same_dest = dft;
3647 dft->prev_same_dest = df;
3648 }
3649 return 1;
3650}
3651
3652
3653/* This takes a superstate and a character, and computes some edges
3654 * from the superstate NFA. In particular, this computes all edges
3655 * that lead from SUPERSTATE given CHR. This function also
3656 * computes the set of characters that share this edge set.
3657 * This returns 0 on allocation error.
3658 * The character set and list of edges are returned through
3659 * the paramters CSETOUT and DFOUT.
3660} */
3661
3662#ifdef __STDC__
3663static int
3664compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
3665 rx_Bitset csetout, struct rx_superstate *superstate,
3666 unsigned char chr)
3667#else
3668static int
3669compute_super_edge (rx, dfout, csetout, superstate, chr)
3670 struct rx *rx;
3671 struct rx_distinct_future **dfout;
3672 rx_Bitset csetout;
3673 struct rx_superstate *superstate;
3674 unsigned char chr;
3675#endif
3676{
3677 struct rx_superset *stateset = superstate->contents;
3678
3679 /* To compute the set of characters that share edges with CHR,
3680 * we start with the full character set, and subtract.
3681 */
3682 rx_bitset_universe (rx->local_cset_size, csetout);
3683 *dfout = 0;
3684
3685 /* Iterate over the NFA states in the superstate state-set. */
3686 while (stateset->car)
3687 {
3688 struct rx_nfa_edge *e;
3689 for (e = stateset->car->edges; e; e = e->next)
3690 if (RX_bitset_member (e->params.cset, chr))
3691 {
3692 /* If we find an NFA edge that applies, we make sure there
3693 * are corresponding edges in the superstate NFA.
3694 */
3695 {
3696 struct rx_distinct_future * saved;
3697 saved = *dfout;
3698 *dfout = include_futures (rx, *dfout, e->dest, superstate);
3699 if (!*dfout)
3700 {
3701 struct rx_distinct_future * df;
3702 df = saved;
3703 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3704 while (df)
3705 {
3706 struct rx_distinct_future *dft;
3707 dft = df;
3708 df = df->next_same_super_edge[0];
3709
3710 if (dft->future && dft->future->transition_refs == dft)
3711 {
3712 dft->future->transition_refs = dft->next_same_dest;
3713 if (dft->future->transition_refs == dft)
3714 dft->future->transition_refs = 0;
3715 }
3716 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
3717 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
3718 rx_cache_free (rx->cache,
3719 &rx->cache->free_discernable_futures,
3720 (char *)dft);
3721 }
3722 return 0;
3723 }
3724 }
3725 /* We also trim the character set a bit. */
3726 rx_bitset_intersection (rx->local_cset_size,
3727 csetout, e->params.cset);
3728 }
3729 else
3730 /* An edge that doesn't apply at least tells us some characters
3731 * that don't share the same edge set as CHR.
3732 */
3733 rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
3734 stateset = stateset->cdr;
3735 }
3736 return 1;
3737}
3738
3739
3740
3741/* This is a constructor for RX_SUPER_EDGE structures. These are
3742 * wrappers for lists of superstate NFA edges that share character sets labels.
3743 * If a transition class contains more than one rx_distinct_future (superstate
3744 * edge), then it represents a non-determinism in the superstate NFA.
3745 */
3746
3747#ifdef __STDC__
3748static struct rx_super_edge *
3749rx_super_edge (struct rx *rx,
3750 struct rx_superstate *super, rx_Bitset cset,
3751 struct rx_distinct_future *df)
3752#else
3753static struct rx_super_edge *
3754rx_super_edge (rx, super, cset, df)
3755 struct rx *rx;
3756 struct rx_superstate *super;
3757 rx_Bitset cset;
3758 struct rx_distinct_future *df;
3759#endif
3760{
3761 struct rx_super_edge *tc =
3762 (struct rx_super_edge *)rx_cache_malloc_or_get
3763 (rx->cache, &rx->cache->free_transition_classes,
3764 sizeof (struct rx_super_edge) + rx_sizeof_bitset (rx->local_cset_size));
3765
3766 if (!tc)
3767 return 0;
3768 tc->next = super->edges;
3769 super->edges = tc;
3770 tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
3771 tc->rx_backtrack_frame.data = 0;
3772 tc->rx_backtrack_frame.data_2 = (void *) tc;
3773 tc->options = df;
3774 tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
3775 rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
3776 if (df)
3777 {
3778 struct rx_distinct_future * dfp = df;
3779 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3780 while (dfp)
3781 {
3782 dfp->edge = tc;
3783 dfp = dfp->next_same_super_edge[0];
3784 }
3785 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
3786 }
3787 return tc;
3788}
3789
3790
3791/* There are three kinds of cache miss. The first occurs when a
3792 * transition is taken that has never been computed during the
3793 * lifetime of the source superstate. That cache miss is handled by
3794 * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
3795 * occurs when the destination superstate of a transition doesn't
3796 * exist. SOLVE_DESTINATION is used to construct the destination superstate.
3797 * Finally, the third kind of cache miss occurs when the destination
3798 * superstate of a transition is in a `semi-free state'. That case is
3799 * handled by UNFREE_SUPERSTATE.
3800 *
3801 * The function of HANDLE_CACHE_MISS is to figure out which of these
3802 * cases applies.
3803 */
3804
3805#ifdef __STDC__
3806static void
3807install_partial_transition (struct rx_superstate *super,
3808 struct rx_inx *answer,
3809 RX_subset set, int offset)
3810#else
3811static void
3812install_partial_transition (super, answer, set, offset)
3813 struct rx_superstate *super;
3814 struct rx_inx *answer;
3815 RX_subset set;
3816 int offset;
3817#endif
3818{
3819 int start = offset;
3820 int end = start + 32;
3821 RX_subset pos = 1;
3822 struct rx_inx * transitions = super->transitions;
3823
3824 while (start < end)
3825 {
3826 if (set & pos)
3827 transitions[start] = *answer;
3828 pos <<= 1;
3829 ++start;
3830 }
3831}
3832
3833
3834#ifdef __STDC__
3835RX_DECL struct rx_inx *
3836rx_handle_cache_miss
3837 (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
3838#else
3839RX_DECL struct rx_inx *
3840rx_handle_cache_miss (rx, super, chr, data)
3841 struct rx *rx;
3842 struct rx_superstate *super;
3843 unsigned char chr;
3844 void *data;
3845#endif
3846{
3847 int offset = chr / RX_subset_bits;
3848 struct rx_distinct_future *df = data;
3849
3850 if (!df) /* must be the shared_cache_miss_frame */
3851 {
3852 /* Perhaps this is just a transition waiting to be filled. */
3853 struct rx_super_edge *tc;
3854 RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
3855
3856 for (tc = super->edges; tc; tc = tc->next)
3857 if (tc->cset[offset] & mask)
3858 {
3859 struct rx_inx * answer;
3860 df = tc->options;
3861 answer = ((tc->options->next_same_super_edge[0] != tc->options)
3862 ? &tc->rx_backtrack_frame
3863 : (df->effects
3864 ? &df->side_effects_frame
3865 : &df->future_frame));
3866 install_partial_transition (super, answer,
3867 tc->cset [offset], offset * 32);
3868 return answer;
3869 }
3870 /* Otherwise, it's a flushed or newly encountered edge. */
3871 {
3872 char cset_space[1024]; /* this limit is far from unreasonable */
3873 rx_Bitset trcset;
3874 struct rx_inx *answer;
3875
3876 if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
3877 return 0; /* If the arbitrary limit is hit, always fail */
3878 /* cleanly. */
3879 trcset = (rx_Bitset)cset_space;
3880 rx_lock_superstate (rx, super);
3881 if (!compute_super_edge (rx, &df, trcset, super, chr))
3882 {
3883 rx_unlock_superstate (rx, super);
3884 return 0;
3885 }
3886 if (!df) /* We just computed the fail transition. */
3887 {
3888 static struct rx_inx
3889 shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
3890 answer = &shared_fail_frame;
3891 }
3892 else
3893 {
3894 tc = rx_super_edge (rx, super, trcset, df);
3895 if (!tc)
3896 {
3897 rx_unlock_superstate (rx, super);
3898 return 0;
3899 }
3900 answer = ((tc->options->next_same_super_edge[0] != tc->options)
3901 ? &tc->rx_backtrack_frame
3902 : (df->effects
3903 ? &df->side_effects_frame
3904 : &df->future_frame));
3905 }
3906 install_partial_transition (super, answer,
3907 trcset[offset], offset * 32);
3908 rx_unlock_superstate (rx, super);
3909 return answer;
3910 }
3911 }
3912 else if (df->future) /* A cache miss on an edge with a future? Must be
3913 * a semi-free destination. */
3914 {
3915 if (df->future->is_semifree)
3916 refresh_semifree_superstate (rx->cache, df->future);
3917 return &df->future_frame;
3918 }
3919 else
3920 /* no future superstate on an existing edge */
3921 {
3922 rx_lock_superstate (rx, super);
3923 if (!solve_destination (rx, df))
3924 {
3925 rx_unlock_superstate (rx, super);
3926 return 0;
3927 }
3928 if (!df->effects
3929 && (df->edge->options->next_same_super_edge[0] == df->edge->options))
3930 install_partial_transition (super, &df->future_frame,
3931 df->edge->cset[offset], offset * 32);
3932 rx_unlock_superstate (rx, super);
3933 return &df->future_frame;
3934 }
3935}
3936
3937
3938
3939
3940
3941/* The rest of the code provides a regex.c compatable interface. */
3942
3943
3944__const__ char *re_error_msg[] =
3945{
3946 0, /* REG_NOUT */
3947 "No match", /* REG_NOMATCH */
3948 "Invalid regular expression", /* REG_BADPAT */
3949 "Invalid collation character", /* REG_ECOLLATE */
3950 "Invalid character class name", /* REG_ECTYPE */
3951 "Trailing backslash", /* REG_EESCAPE */
3952 "Invalid back reference", /* REG_ESUBREG */
3953 "Unmatched [ or [^", /* REG_EBRACK */
3954 "Unmatched ( or \\(", /* REG_EPAREN */
3955 "Unmatched \\{", /* REG_EBRACE */
3956 "Invalid content of \\{\\}", /* REG_BADBR */
3957 "Invalid range end", /* REG_ERANGE */
3958 "Memory exhausted", /* REG_ESPACE */
3959 "Invalid preceding regular expression", /* REG_BADRPT */
3960 "Premature end of regular expression", /* REG_EEND */
3961 "Regular expression too big", /* REG_ESIZE */
3962 "Unmatched ) or \\)", /* REG_ERPAREN */
3963};
3964
3965
3966
3967
3968/*
3969 * Macros used while compiling patterns.
3970 *
3971 * By convention, PEND points just past the end of the uncompiled pattern,
3972 * P points to the read position in the pattern. `translate' is the name
3973 * of the translation table (`TRANSLATE' is the name of a macro that looks
3974 * things up in `translate').
3975 */
3976
3977
3978/*
3979 * Fetch the next character in the uncompiled pattern---translating it
3980 * if necessary. *Also cast from a signed character in the constant
3981 * string passed to us by the user to an unsigned char that we can use
3982 * as an array index (in, e.g., `translate').
3983 */
3984#define PATFETCH(c) \
3985 do {if (p == pend) return REG_EEND; \
3986 c = (unsigned char) *p++; \
3987 c = translate[c]; \
3988 } while (0)
3989
3990/*
3991 * Fetch the next character in the uncompiled pattern, with no
3992 * translation.
3993 */
3994#define PATFETCH_RAW(c) \
3995 do {if (p == pend) return REG_EEND; \
3996 c = (unsigned char) *p++; \
3997 } while (0)
3998
3999/* Go backwards one character in the pattern. */
4000#define PATUNFETCH p--
4001
4002
4003#define TRANSLATE(d) translate[(unsigned char) (d)]
4004
4005typedef unsigned regnum_t;
4006
4007/* Since offsets can go either forwards or backwards, this type needs to
4008 * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
4009 */
4010typedef int pattern_offset_t;
4011
4012typedef struct
4013{
4014 struct rexp_node ** top_expression; /* was begalt */
4015 struct rexp_node ** last_expression; /* was laststart */
4016 pattern_offset_t inner_group_offset;
4017 regnum_t regnum;
4018} compile_stack_elt_t;
4019
4020typedef struct
4021{
4022 compile_stack_elt_t *stack;
4023 unsigned size;
4024 unsigned avail; /* Offset of next open position. */
4025} compile_stack_type;
4026
4027
4028#define INIT_COMPILE_STACK_SIZE 32
4029
4030#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
4031#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
4032
4033/* The next available element. */
4034#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
4035
4036
4037/* Set the bit for character C in a list. */
4038#define SET_LIST_BIT(c) \
4039 (b[((unsigned char) (c)) / CHARBITS] \
4040 |= 1 << (((unsigned char) c) % CHARBITS))
4041
4042/* Get the next unsigned number in the uncompiled pattern. */
4043#define GET_UNSIGNED_NUMBER(num) \
4044 { if (p != pend) \
4045 { \
4046 PATFETCH (c); \
4047 while (isdigit (c)) \
4048 { \
4049 if (num < 0) \
4050 num = 0; \
4051 num = num * 10 + c - '0'; \
4052 if (p == pend) \
4053 break; \
4054 PATFETCH (c); \
4055 } \
4056 } \
4057 }
4058
4059#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
4060
4061#define IS_CHAR_CLASS(string) \
4062 (!strcmp (string, "alpha") || !strcmp (string, "upper") \
4063 || !strcmp (string, "lower") || !strcmp (string, "digit") \
4064 || !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
4065 || !strcmp (string, "space") || !strcmp (string, "print") \
4066 || !strcmp (string, "punct") || !strcmp (string, "graph") \
4067 || !strcmp (string, "cntrl") || !strcmp (string, "blank"))
4068
4069
4070
4071/* These predicates are used in regex_compile. */
4072
4073/* P points to just after a ^ in PATTERN. Return true if that ^ comes
4074 * after an alternative or a begin-subexpression. We assume there is at
4075 * least one character before the ^.
4076 */
4077
4078#ifdef __STDC__
4079static boolean
4080at_begline_loc_p (__const__ char *pattern, __const__ char * p, reg_syntax_t syntax)
4081#else
4082static boolean
4083at_begline_loc_p (pattern, p, syntax)
4084 __const__ char *pattern;
4085 __const__ char * p;
4086 reg_syntax_t syntax;
4087#endif
4088{
4089 __const__ char *prev = p - 2;
4090 boolean prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
4091
4092 return
4093
4094 (/* After a subexpression? */
4095 ((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
4096 ||
4097 /* After an alternative? */
4098 ((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
4099 );
4100}
4101
4102/* The dual of at_begline_loc_p. This one is for $. We assume there is
4103 * at least one character after the $, i.e., `P < PEND'.
4104 */
4105
4106#ifdef __STDC__
4107static boolean
4108at_endline_loc_p (__const__ char *p, __const__ char *pend, int syntax)
4109#else
4110static boolean
4111at_endline_loc_p (p, pend, syntax)
4112 __const__ char *p;
4113 __const__ char *pend;
4114 int syntax;
4115#endif
4116{
4117 __const__ char *next = p;
4118 boolean next_backslash = (*next == '\\');
4119 __const__ char *next_next = (p + 1 < pend) ? (p + 1) : 0;
4120
4121 return
4122 (
4123 /* Before a subexpression? */
4124 ((syntax & RE_NO_BK_PARENS)
4125 ? (*next == ')')
4126 : (next_backslash && next_next && (*next_next == ')')))
4127 ||
4128 /* Before an alternative? */
4129 ((syntax & RE_NO_BK_VBAR)
4130 ? (*next == '|')
4131 : (next_backslash && next_next && (*next_next == '|')))
4132 );
4133}
4134
4135
4136
4137unsigned char rx_id_translation[256] =
4138{
4139 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
4140 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
4141 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
4142 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
4143 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
4144 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
4145 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
4146 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
4147 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
4148 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
4149
4150 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
4151 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
4152 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
4153 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
4154 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
4155 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
4156 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
4157 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
4158 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
4159 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
4160
4161 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
4162 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
4163 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
4164 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
4165 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
4166 250, 251, 252, 253, 254, 255
4167};
4168
4169/* The compiler keeps an inverted translation table.
4170 * This looks up/inititalize elements.
4171 * VALID is an array of booleans that validate CACHE.
4172 */
4173
4174#ifdef __STDC__
4175static rx_Bitset
4176inverse_translation (struct re_pattern_buffer * rxb,
4177 char * valid, rx_Bitset cache,
4178 unsigned char * translate, int c)
4179#else
4180static rx_Bitset
4181inverse_translation (rxb, valid, cache, translate, c)
4182 struct re_pattern_buffer * rxb;
4183 char * valid;
4184 rx_Bitset cache;
4185 unsigned char * translate;
4186 int c;
4187#endif
4188{
4189 rx_Bitset cs
4190 = cache + c * rx_bitset_numb_subsets (rxb->rx.local_cset_size);
4191
4192 if (!valid[c])
4193 {
4194 int x;
4195 int c_tr = TRANSLATE(c);
4196 rx_bitset_null (rxb->rx.local_cset_size, cs);
4197 for (x = 0; x < 256; ++x) /* &&&& 13.37 */
4198 if (TRANSLATE(x) == c_tr)
4199 RX_bitset_enjoin (cs, x);
4200 valid[c] = 1;
4201 }
4202 return cs;
4203}
4204
4205
4206
4207
4208
4209/* More subroutine declarations and macros for regex_compile. */
4210
4211/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4212 false if it's not. */
4213
4214#ifdef __STDC__
4215static boolean
4216group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
4217#else
4218static boolean
4219group_in_compile_stack (compile_stack, regnum)
4220 compile_stack_type compile_stack;
4221 regnum_t regnum;
4222#endif
4223{
4224 int this_element;
4225
4226 for (this_element = compile_stack.avail - 1;
4227 this_element >= 0;
4228 this_element--)
4229 if (compile_stack.stack[this_element].regnum == regnum)
4230 return true;
4231
4232 return false;
4233}
4234
4235
4236/*
4237 * Read the ending character of a range (in a bracket expression) from the
4238 * uncompiled pattern *P_PTR (which ends at PEND). We assume the
4239 * starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
4240 * Then we set the translation of all bits between the starting and
4241 * ending characters (inclusive) in the compiled pattern B.
4242 *
4243 * Return an error code.
4244 *
4245 * We use these short variable names so we can use the same macros as
4246 * `regex_compile' itself.
4247 */
4248
4249#ifdef __STDC__
4250static reg_errcode_t
4251compile_range (struct re_pattern_buffer * rxb, rx_Bitset cs,
4252 __const__ char ** p_ptr, __const__ char * pend,
4253 unsigned char * translate, reg_syntax_t syntax,
4254 rx_Bitset inv_tr, char * valid_inv_tr)
4255#else
4256static reg_errcode_t
4257compile_range (rxb, cs, p_ptr, pend, translate, syntax, inv_tr, valid_inv_tr)
4258 struct re_pattern_buffer * rxb;
4259 rx_Bitset cs;
4260 __const__ char ** p_ptr;
4261 __const__ char * pend;
4262 unsigned char * translate;
4263 reg_syntax_t syntax;
4264 rx_Bitset inv_tr;
4265 char * valid_inv_tr;
4266#endif
4267{
4268 unsigned this_char;
4269
4270 __const__ char *p = *p_ptr;
4271
4272 unsigned char range_end;
4273 unsigned char range_start = TRANSLATE(p[-2]);
4274
4275 if (p == pend)
4276 return REG_ERANGE;
4277
4278 PATFETCH (range_end);
4279
4280 (*p_ptr)++;
4281
4282 if (range_start > range_end)
4283 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4284
4285 for (this_char = range_start; this_char <= range_end; this_char++)
4286 {
4287 rx_Bitset it =
4288 inverse_translation (rxb, valid_inv_tr, inv_tr, translate, this_char);
4289 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
4290 }
4291
4292 return REG_NOERROR;
4293}
4294
4295
4296
4297/* This searches a regexp for backreference side effects.
4298 * It fills in the array OUT with 1 at the index of every register pair
4299 * referenced by a backreference.
4300 *
4301 * This is used to help optimize patterns for searching. The information is
4302 * useful because, if the caller doesn't want register values, backreferenced
4303 * registers are the only registers for which we need rx_backtrack.
4304 */
4305
4306#ifdef __STDC__
4307static void
4308find_backrefs (char * out, struct rexp_node * rexp,
4309 struct re_se_params * params)
4310#else
4311static void
4312find_backrefs (out, rexp, params)
4313 char * out;
4314 struct rexp_node * rexp;
4315 struct re_se_params * params;
4316#endif
4317{
4318 if (rexp)
4319 switch (rexp->type)
4320 {
4321 case r_cset:
4322 case r_data:
4323 return;
4324 case r_alternate:
4325 case r_concat:
4326 case r_opt:
4327 case r_star:
4328 case r_2phase_star:
4329 find_backrefs (out, rexp->params.pair.left, params);
4330 find_backrefs (out, rexp->params.pair.right, params);
4331 return;
4332 case r_side_effect:
4333 if ( ((long)rexp->params.side_effect >= 0)
4334 && (params [(long)rexp->params.side_effect].se == re_se_backref))
4335 out[ params [(long)rexp->params.side_effect].op1] = 1;
4336 return;
4337 }
4338}
4339
4340
4341
4342
4343/* Returns 0 unless the pattern can match the empty string. */
4344
4345#ifdef __STDC__
4346static int
4347compute_fastset (struct re_pattern_buffer * rxb, struct rexp_node * rexp)
4348#else
4349static int
4350compute_fastset (rxb, rexp)
4351 struct re_pattern_buffer * rxb;
4352 struct rexp_node * rexp;
4353#endif
4354{
4355 if (!rexp)
4356 return 1;
4357 switch (rexp->type)
4358 {
4359 case r_data:
4360 return 1;
4361 case r_cset:
4362 {
4363 rx_bitset_union (rxb->rx.local_cset_size,
4364 rxb->fastset, rexp->params.cset);
4365 }
4366 return 0;
4367 case r_concat:
4368 return (compute_fastset (rxb, rexp->params.pair.left)
4369 && compute_fastset (rxb, rexp->params.pair.right));
4370 case r_2phase_star:
4371 compute_fastset (rxb, rexp->params.pair.left);
4372 /* compute_fastset (rxb, rexp->params.pair.right); nope... */
4373 return 1;
4374 case r_alternate:
4375 return !!(compute_fastset (rxb, rexp->params.pair.left)
4376 + compute_fastset (rxb, rexp->params.pair.right));
4377 case r_opt:
4378 case r_star:
4379 compute_fastset (rxb, rexp->params.pair.left);
4380 return 1;
4381 case r_side_effect:
4382 return 1;
4383 }
4384
4385 /* this should never happen */
4386 return 0;
4387}
4388
4389
4390
4391/* returns
4392 * 1 -- yes, definately anchored by the given side effect.
4393 * 2 -- maybe anchored, maybe the empty string.
4394 * 0 -- definately not anchored
4395 * There is simply no other possibility.
4396 */
4397
4398#ifdef __STDC__
4399static int
4400is_anchored (struct rexp_node * rexp, rx_side_effect se)
4401#else
4402static int
4403is_anchored (rexp, se)
4404 struct rexp_node * rexp;
4405 rx_side_effect se;
4406#endif
4407{
4408 if (!rexp)
4409 return 2;
4410 switch (rexp->type)
4411 {
4412 case r_cset:
4413 case r_data:
4414 return 0;
4415 case r_concat:
4416 case r_2phase_star:
4417 {
4418 int l = is_anchored (rexp->params.pair.left, se);
4419 return (l == 2 ? is_anchored (rexp->params.pair.right, se) : l);
4420 }
4421 case r_alternate:
4422 {
4423 int l = is_anchored (rexp->params.pair.left, se);
4424 int r = l ? is_anchored (rexp->params.pair.right, se) : 0;
4425
4426 if (l == r)
4427 return l;
4428 else if ((l == 0) || (r == 0))
4429 return 0;
4430 else
4431 return 2;
4432 }
4433 case r_opt:
4434 case r_star:
4435 return is_anchored (rexp->params.pair.left, se) ? 2 : 0;
4436
4437 case r_side_effect:
4438 return ((rexp->params.side_effect == se)
4439 ? 1 : 2);
4440 }
4441
4442 /* this should never happen */
4443 return 0;
4444}
4445
4446
4447
4448/* This removes register assignments that aren't required by backreferencing.
4449 * This can speed up explore_future, especially if it eliminates
4450 * non-determinism in the superstate NFA.
4451 *
4452 * NEEDED is an array of characters, presumably filled in by FIND_BACKREFS.
4453 * The non-zero elements of the array indicate which register assignments
4454 * can NOT be removed from the expression.
4455 */
4456
4457#ifdef __STDC__
4458static struct rexp_node *
4459remove_unecessary_side_effects (struct rx * rx, char * needed,
4460 struct rexp_node * rexp,
4461 struct re_se_params * params)
4462#else
4463static struct rexp_node *
4464remove_unecessary_side_effects (rx, needed, rexp, params)
4465 struct rx * rx;
4466 char * needed;
4467 struct rexp_node * rexp;
4468 struct re_se_params * params;
4469#endif
4470{
4471 struct rexp_node * l;
4472 struct rexp_node * r;
4473 if (!rexp)
4474 return 0;
4475 else
4476 switch (rexp->type)
4477 {
4478 case r_cset:
4479 case r_data:
4480 return rexp;
4481 case r_alternate:
4482 case r_concat:
4483 case r_2phase_star:
4484 l = remove_unecessary_side_effects (rx, needed,
4485 rexp->params.pair.left, params);
4486 r = remove_unecessary_side_effects (rx, needed,
4487 rexp->params.pair.right, params);
4488 if ((l && r) || (rexp->type != r_concat))
4489 {
4490 rexp->params.pair.left = l;
4491 rexp->params.pair.right = r;
4492 return rexp;
4493 }
4494 else
4495 {
4496 rexp->params.pair.left = rexp->params.pair.right = 0;
4497 rx_free_rexp (rx, rexp);
4498 return l ? l : r;
4499 }
4500 case r_opt:
4501 case r_star:
4502 l = remove_unecessary_side_effects (rx, needed,
4503 rexp->params.pair.left, params);
4504 if (l)
4505 {
4506 rexp->params.pair.left = l;
4507 return rexp;
4508 }
4509 else
4510 {
4511 rexp->params.pair.left = 0;
4512 rx_free_rexp (rx, rexp);
4513 return 0;
4514 }
4515 case r_side_effect:
4516 {
4517 int se = (long)rexp->params.side_effect;
4518 if ( (se >= 0)
4519 && ( ((enum re_side_effects)params[se].se == re_se_lparen)
4520 || ((enum re_side_effects)params[se].se == re_se_rparen))
4521 && (params [se].op1 > 0)
4522 && (!needed [params [se].op1]))
4523 {
4524 rx_free_rexp (rx, rexp);
4525 return 0;
4526 }
4527 else
4528 return rexp;
4529 }
4530 }
4531
4532 /* this should never happen */
4533 return 0;
4534}
4535
4536
4537
4538
4539#ifdef __STDC__
4540static int
4541pointless_if_repeated (struct rexp_node * node, struct re_se_params * params)
4542#else
4543static int
4544pointless_if_repeated (node, params)
4545 struct rexp_node * node;
4546 struct re_se_params * params;
4547#endif
4548{
4549 if (!node)
4550 return 1;
4551 switch (node->type)
4552 {
4553 case r_cset:
4554 return 0;
4555 case r_alternate:
4556 case r_concat:
4557 case r_2phase_star:
4558 return (pointless_if_repeated (node->params.pair.left, params)
4559 && pointless_if_repeated (node->params.pair.right, params));
4560 case r_opt:
4561 case r_star:
4562 return pointless_if_repeated (node->params.pair.left, params);
4563 case r_side_effect:
4564 switch ((enum re_side_effects) (((long)node->params.side_effect < 0)
4565 ? (enum re_side_effects) (long) node->params.side_effect
4566 : (enum re_side_effects)params[(long)node->params.side_effect].se))
4567 {
4568 case re_se_try:
4569 case re_se_at_dot:
4570 case re_se_begbuf:
4571 case re_se_hat:
4572 case re_se_wordbeg:
4573 case re_se_wordbound:
4574 case re_se_notwordbound:
4575 case re_se_wordend:
4576 case re_se_endbuf:
4577 case re_se_dollar:
4578 case re_se_fail:
4579 case re_se_win:
4580 return 1;
4581 case re_se_lparen:
4582 case re_se_rparen:
4583 case re_se_iter:
4584 case re_se_end_iter:
4585 case re_se_syntax:
4586 case re_se_not_syntax:
4587 case re_se_backref:
4588 return 0;
4589 }
4590 case r_data:
4591 default:
4592 return 0;
4593 }
4594}
4595
4596
4597
4598
4599#ifdef __STDC__
4600static int
4601registers_on_stack (struct re_pattern_buffer * rxb,
4602 struct rexp_node * rexp, int in_danger,
4603 struct re_se_params * params)
4604#else
4605static int
4606registers_on_stack (rxb, rexp, in_danger, params)
4607 struct re_pattern_buffer * rxb;
4608 struct rexp_node * rexp;
4609 int in_danger;
4610 struct re_se_params * params;
4611#endif
4612{
4613 if (!rexp)
4614 return 0;
4615 else
4616 switch (rexp->type)
4617 {
4618 case r_cset:
4619 case r_data:
4620 return 0;
4621 case r_alternate:
4622 case r_concat:
4623 return ( registers_on_stack (rxb, rexp->params.pair.left,
4624 in_danger, params)
4625 || (registers_on_stack
4626 (rxb, rexp->params.pair.right,
4627 in_danger, params)));
4628 case r_opt:
4629 return registers_on_stack (rxb, rexp->params.pair.left, 0, params);
4630 case r_star:
4631 return registers_on_stack (rxb, rexp->params.pair.left, 1, params);
4632 case r_2phase_star:
4633 return
4634 ( registers_on_stack (rxb, rexp->params.pair.left, 1, params)
4635 || registers_on_stack (rxb, rexp->params.pair.right, 1, params));
4636 case r_side_effect:
4637 {
4638 int se = (long)rexp->params.side_effect;
4639 if ( in_danger
4640 && (se >= 0)
4641 && (params [se].op1 > 0)
4642 && ( ((enum re_side_effects)params[se].se == re_se_lparen)
4643 || ((enum re_side_effects)params[se].se == re_se_rparen)))
4644 return 1;
4645 else
4646 return 0;
4647 }
4648 }
4649
4650 /* this should never happen */
4651 return 0;
4652}
4653
4654
4655
4656
4657static char idempotent_complex_se[] =
4658{
4659#define RX_WANT_SE_DEFS 1
4660#undef RX_DEF_SE
4661#undef RX_DEF_CPLX_SE
4662#define RX_DEF_SE(IDEM, NAME, VALUE)
4663#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) IDEM,
4664#include "rx.h"
4665#undef RX_DEF_SE
4666#undef RX_DEF_CPLX_SE
4667#undef RX_WANT_SE_DEFS
4668 23
4669};
4670
4671static char idempotent_se[] =
4672{
4673 13,
4674#define RX_WANT_SE_DEFS 1
4675#undef RX_DEF_SE
4676#undef RX_DEF_CPLX_SE
4677#define RX_DEF_SE(IDEM, NAME, VALUE) IDEM,
4678#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)
4679#include "rx.h"
4680#undef RX_DEF_SE
4681#undef RX_DEF_CPLX_SE
4682#undef RX_WANT_SE_DEFS
4683 42
4684};
4685
4686
4687
4688
4689
4690#ifdef __STDC__
4691static int
4692has_any_se (struct rx * rx,
4693 struct rexp_node * rexp)
4694#else
4695static int
4696has_any_se (rx, rexp)
4697 struct rx * rx;
4698 struct rexp_node * rexp;
4699#endif
4700{
4701 if (!rexp)
4702 return 0;
4703
4704 switch (rexp->type)
4705 {
4706 case r_cset:
4707 case r_data:
4708 return 0;
4709
4710 case r_side_effect:
4711 return 1;
4712
4713 case r_2phase_star:
4714 case r_concat:
4715 case r_alternate:
4716 return
4717 ( has_any_se (rx, rexp->params.pair.left)
4718 || has_any_se (rx, rexp->params.pair.right));
4719
4720 case r_opt:
4721 case r_star:
4722 return has_any_se (rx, rexp->params.pair.left);
4723 }
4724
4725 /* this should never happen */
4726 return 0;
4727}
4728
4729
4730
4731
4732/* This must be called AFTER `convert_hard_loops' for a given REXP. */
4733#ifdef __STDC__
4734static int
4735has_non_idempotent_epsilon_path (struct rx * rx,
4736 struct rexp_node * rexp,
4737 struct re_se_params * params)
4738#else
4739static int
4740has_non_idempotent_epsilon_path (rx, rexp, params)
4741 struct rx * rx;
4742 struct rexp_node * rexp;
4743 struct re_se_params * params;
4744#endif
4745{
4746 if (!rexp)
4747 return 0;
4748
4749 switch (rexp->type)
4750 {
4751 case r_cset:
4752 case r_data:
4753 case r_star:
4754 return 0;
4755
4756 case r_side_effect:
4757 return
4758 !((long)rexp->params.side_effect > 0
4759 ? idempotent_complex_se [ params [(long)rexp->params.side_effect].se ]
4760 : idempotent_se [-(long)rexp->params.side_effect]);
4761
4762 case r_alternate:
4763 return
4764 ( has_non_idempotent_epsilon_path (rx,
4765 rexp->params.pair.left, params)
4766 || has_non_idempotent_epsilon_path (rx,
4767 rexp->params.pair.right, params));
4768
4769 case r_2phase_star:
4770 case r_concat:
4771 return
4772 ( has_non_idempotent_epsilon_path (rx,
4773 rexp->params.pair.left, params)
4774 && has_non_idempotent_epsilon_path (rx,
4775 rexp->params.pair.right, params));
4776
4777 case r_opt:
4778 return has_non_idempotent_epsilon_path (rx,
4779 rexp->params.pair.left, params);
4780 }
4781
4782 /* this should never happen */
4783 return 0;
4784}
4785
4786
4787
4788
4789/* This computes rougly what it's name suggests. It can (and does) go wrong
4790 * in the direction of returning spurious 0 without causing disasters.
4791 */
4792#ifdef __STDC__
4793static int
4794begins_with_complex_se (struct rx * rx, struct rexp_node * rexp)
4795#else
4796static int
4797begins_with_complex_se (rx, rexp)
4798 struct rx * rx;
4799 struct rexp_node * rexp;
4800#endif
4801{
4802 if (!rexp)
4803 return 0;
4804
4805 switch (rexp->type)
4806 {
4807 case r_cset:
4808 case r_data:
4809 return 0;
4810
4811 case r_side_effect:
4812 return ((long)rexp->params.side_effect >= 0);
4813
4814 case r_alternate:
4815 return
4816 ( begins_with_complex_se (rx, rexp->params.pair.left)
4817 && begins_with_complex_se (rx, rexp->params.pair.right));
4818
4819
4820 case r_concat:
4821 return has_any_se (rx, rexp->params.pair.left);
4822 case r_opt:
4823 case r_star:
4824 case r_2phase_star:
4825 return 0;
4826 }
4827
4828 /* this should never happen */
4829 return 0;
4830}
4831
4832
4833
4834/* This destructively removes some of the re_se_tv side effects from
4835 * a rexp tree. In particular, during parsing re_se_tv was inserted on the
4836 * right half of every | to guarantee that posix path preference could be
4837 * honored. This function removes some which it can be determined aren't
4838 * needed.
4839 */
4840
4841#ifdef __STDC__
4842static void
4843speed_up_alt (struct rx * rx,
4844 struct rexp_node * rexp,
4845 int unposix)
4846#else
4847static void
4848speed_up_alt (rx, rexp, unposix)
4849 struct rx * rx;
4850 struct rexp_node * rexp;
4851 int unposix;
4852#endif
4853{
4854 if (!rexp)
4855 return;
4856
4857 switch (rexp->type)
4858 {
4859 case r_cset:
4860 case r_data:
4861 case r_side_effect:
4862 return;
4863
4864 case r_opt:
4865 case r_star:
4866 speed_up_alt (rx, rexp->params.pair.left, unposix);
4867 return;
4868
4869 case r_2phase_star:
4870 case r_concat:
4871 speed_up_alt (rx, rexp->params.pair.left, unposix);
4872 speed_up_alt (rx, rexp->params.pair.right, unposix);
4873 return;
4874
4875 case r_alternate:
4876 /* the right child is guaranteed to be (concat re_se_tv <subexp>) */
4877
4878 speed_up_alt (rx, rexp->params.pair.left, unposix);
4879 speed_up_alt (rx, rexp->params.pair.right->params.pair.right, unposix);
4880
4881 if ( unposix
4882 || (begins_with_complex_se
4883 (rx, rexp->params.pair.right->params.pair.right))
4884 || !( has_any_se (rx, rexp->params.pair.right->params.pair.right)
4885 || has_any_se (rx, rexp->params.pair.left)))
4886 {
4887 struct rexp_node * conc = rexp->params.pair.right;
4888 rexp->params.pair.right = conc->params.pair.right;
4889 conc->params.pair.right = 0;
4890 rx_free_rexp (rx, conc);
4891 }
4892 }
4893}
4894
4895
4896
4897
4898
4899
4900/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
4901 Returns one of error codes defined in `regex.h', or zero for success.
4902
4903 Assumes the `allocated' (and perhaps `buffer') and `translate'
4904 fields are set in BUFP on entry.
4905
4906 If it succeeds, results are put in BUFP (if it returns an error, the
4907 contents of BUFP are undefined):
4908 `buffer' is the compiled pattern;
4909 `syntax' is set to SYNTAX;
4910 `used' is set to the length of the compiled pattern;
4911 `fastmap_accurate' is set to zero;
4912 `re_nsub' is set to the number of groups in PATTERN;
4913 `not_bol' and `not_eol' are set to zero.
4914
4915 The `fastmap' and `newline_anchor' fields are neither
4916 examined nor set. */
4917
4918
4919
4920#ifdef __STDC__
4921RX_DECL reg_errcode_t
4922rx_compile (__const__ char *pattern, int size,
4923 reg_syntax_t syntax,
4924 struct re_pattern_buffer * rxb)
4925#else
4926RX_DECL reg_errcode_t
4927rx_compile (pattern, size, syntax, rxb)
4928 __const__ char *pattern;
4929 int size;
4930 reg_syntax_t syntax;
4931 struct re_pattern_buffer * rxb;
4932#endif
4933{
4934 RX_subset
4935 inverse_translate [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
4936 char
4937 validate_inv_tr [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
4938
4939 /* We fetch characters from PATTERN here. Even though PATTERN is
4940 `char *' (i.e., signed), we declare these variables as unsigned, so
4941 they can be reliably used as array indices. */
4942 register unsigned char c, c1;
4943
4944 /* A random tempory spot in PATTERN. */
4945 __const__ char *p1;
4946
4947 /* Keeps track of unclosed groups. */
4948 compile_stack_type compile_stack;
4949
4950 /* Points to the current (ending) position in the pattern. */
4951 __const__ char *p = pattern;
4952 __const__ char *pend = pattern + size;
4953
4954 /* How to translate the characters in the pattern. */
4955 unsigned char *translate = (rxb->translate
4956 ? rxb->translate
4957 : rx_id_translation);
4958
4959 /* When parsing is done, this will hold the expression tree. */
4960 struct rexp_node * rexp = 0;
4961
4962 /* In the midst of compilation, this holds onto the regexp
4963 * first parst while rexp goes on to aquire additional constructs.
4964 */
4965 struct rexp_node * orig_rexp = 0;
4966 struct rexp_node * fewer_side_effects = 0;
4967
4968 /* This and top_expression are saved on the compile stack. */
4969 struct rexp_node ** top_expression = &rexp;
4970 struct rexp_node ** last_expression = top_expression;
4971
4972 /* Parameter to `goto append_node' */
4973 struct rexp_node * append;
4974
4975 /* Counts open-groups as they are encountered. This is the index of the
4976 * innermost group being compiled.
4977 */
4978 regnum_t regnum = 0;
4979
4980 /* Place in the uncompiled pattern (i.e., the {) to
4981 * which to go back if the interval is invalid.
4982 */
4983 __const__ char *beg_interval;
4984
4985 struct re_se_params * params = 0;
4986 int paramc = 0; /* How many complex side effects so far? */
4987
4988 rx_side_effect side; /* param to `goto add_side_effect' */
4989
4990 bzero (validate_inv_tr, sizeof (validate_inv_tr));
4991
4992 rxb->rx.instruction_table = rx_id_instruction_table;
4993
4994
4995 /* Initialize the compile stack. */
4996 compile_stack.stack = (( compile_stack_elt_t *) malloc ((INIT_COMPILE_STACK_SIZE) * sizeof ( compile_stack_elt_t)));
4997 if (compile_stack.stack == 0)
4998 return REG_ESPACE;
4999
5000 compile_stack.size = INIT_COMPILE_STACK_SIZE;
5001 compile_stack.avail = 0;
5002
5003 /* Initialize the pattern buffer. */
5004 rxb->rx.cache = &default_cache;
5005 rxb->syntax = syntax;
5006 rxb->fastmap_accurate = 0;
5007 rxb->not_bol = rxb->not_eol = 0;
5008 rxb->least_subs = 0;
5009
5010 /* Always count groups, whether or not rxb->no_sub is set.
5011 * The whole pattern is implicitly group 0, so counting begins
5012 * with 1.
5013 */
5014 rxb->re_nsub = 0;
5015
5016#if !defined (emacs) && !defined (SYNTAX_TABLE)
5017 /* Initialize the syntax table. */
5018 init_syntax_once ();
5019#endif
5020
5021 /* Loop through the uncompiled pattern until we're at the end. */
5022 while (p != pend)
5023 {
5024 PATFETCH (c);
5025
5026 switch (c)
5027 {
5028 case '^':
5029 {
5030 if ( /* If at start of pattern, it's an operator. */
5031 p == pattern + 1
5032 /* If context independent, it's an operator. */
5033 || syntax & RE_CONTEXT_INDEP_ANCHORS
5034 /* Otherwise, depends on what's come before. */
5035 || at_begline_loc_p (pattern, p, syntax))
5036 {
5037 struct rexp_node * n
5038 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_hat);
5039 if (!n)
5040 return REG_ESPACE;
5041 append = n;
5042 goto append_node;
5043 }
5044 else
5045 goto normal_char;
5046 }
5047 break;
5048
5049
5050 case '$':
5051 {
5052 if ( /* If at end of pattern, it's an operator. */
5053 p == pend
5054 /* If context independent, it's an operator. */
5055 || syntax & RE_CONTEXT_INDEP_ANCHORS
5056 /* Otherwise, depends on what's next. */
5057 || at_endline_loc_p (p, pend, syntax))
5058 {
5059 struct rexp_node * n
5060 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_dollar);
5061 if (!n)
5062 return REG_ESPACE;
5063 append = n;
5064 goto append_node;
5065 }
5066 else
5067 goto normal_char;
5068 }
5069 break;
5070
5071
5072 case '+':
5073 case '?':
5074 if ((syntax & RE_BK_PLUS_QM)
5075 || (syntax & RE_LIMITED_OPS))
5076 goto normal_char;
5077
5078 handle_plus:
5079 case '*':
5080 /* If there is no previous pattern... */
5081 if (pointless_if_repeated (*last_expression, params))
5082 {
5083 if (syntax & RE_CONTEXT_INVALID_OPS)
5084 return REG_BADRPT;
5085 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
5086 goto normal_char;
5087 }
5088
5089 {
5090 /* 1 means zero (many) matches is allowed. */
5091 char zero_times_ok = 0, many_times_ok = 0;
5092
5093 /* If there is a sequence of repetition chars, collapse it
5094 down to just one (the right one). We can't combine
5095 interval operators with these because of, e.g., `a{2}*',
5096 which should only match an even number of `a's. */
5097
5098 for (;;)
5099 {
5100 zero_times_ok |= c != '+';
5101 many_times_ok |= c != '?';
5102
5103 if (p == pend)
5104 break;
5105
5106 PATFETCH (c);
5107
5108 if (c == '*'
5109 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
5110 ;
5111
5112 else if (syntax & RE_BK_PLUS_QM && c == '\\')
5113 {
5114 if (p == pend) return REG_EESCAPE;
5115
5116 PATFETCH (c1);
5117 if (!(c1 == '+' || c1 == '?'))
5118 {
5119 PATUNFETCH;
5120 PATUNFETCH;
5121 break;
5122 }
5123
5124 c = c1;
5125 }
5126 else
5127 {
5128 PATUNFETCH;
5129 break;
5130 }
5131
5132 /* If we get here, we found another repeat character. */
5133 }
5134
5135 /* Star, etc. applied to an empty pattern is equivalent
5136 to an empty pattern. */
5137 if (!last_expression)
5138 break;
5139
5140 /* Now we know whether or not zero matches is allowed
5141 * and also whether or not two or more matches is allowed.
5142 */
5143
5144 {
5145 struct rexp_node * inner_exp = *last_expression;
5146 int need_sync = 0;
5147
5148 if (many_times_ok
5149 && has_non_idempotent_epsilon_path (&rxb->rx,
5150 inner_exp, params))
5151 {
5152 struct rexp_node * pusher
5153 = rx_mk_r_side_effect (&rxb->rx,
5154 (rx_side_effect)re_se_pushpos);
5155 struct rexp_node * checker
5156 = rx_mk_r_side_effect (&rxb->rx,
5157 (rx_side_effect)re_se_chkpos);
5158 struct rexp_node * pushback
5159 = rx_mk_r_side_effect (&rxb->rx,
5160 (rx_side_effect)re_se_pushback);
5161 rx_Bitset cs = rx_cset (&rxb->rx);
5162 struct rexp_node * lit_t = rx_mk_r_cset (&rxb->rx, cs);
5163 struct rexp_node * fake_state
5164 = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
5165 struct rexp_node * phase2
5166 = rx_mk_r_concat (&rxb->rx, checker, fake_state);
5167 struct rexp_node * popper
5168 = rx_mk_r_side_effect (&rxb->rx,
5169 (rx_side_effect)re_se_poppos);
5170 struct rexp_node * star
5171 = rx_mk_r_2phase_star (&rxb->rx, inner_exp, phase2);
5172 struct rexp_node * a
5173 = rx_mk_r_concat (&rxb->rx, pusher, star);
5174 struct rexp_node * whole_thing
5175 = rx_mk_r_concat (&rxb->rx, a, popper);
5176 if (!(pusher && star && pushback && lit_t && fake_state
5177 && lit_t && phase2 && checker && popper
5178 && a && whole_thing))
5179 return REG_ESPACE;
5180 RX_bitset_enjoin (cs, 't');
5181 *last_expression = whole_thing;
5182 }
5183 else
5184 {
5185 struct rexp_node * star =
5186 (many_times_ok ? rx_mk_r_star : rx_mk_r_opt)
5187 (&rxb->rx, *last_expression);
5188 if (!star)
5189 return REG_ESPACE;
5190 *last_expression = star;
5191 need_sync = has_any_se (&rxb->rx, *last_expression);
5192 }
5193 if (!zero_times_ok)
5194 {
5195 struct rexp_node * concat
5196 = rx_mk_r_concat (&rxb->rx, inner_exp,
5197 rx_copy_rexp (&rxb->rx,
5198 *last_expression));
5199 if (!concat)
5200 return REG_ESPACE;
5201 *last_expression = concat;
5202 }
5203 if (need_sync)
5204 {
5205 int sync_se = paramc;
5206 params = (params
5207 ? ((struct re_se_params *)
5208 realloc (params,
5209 sizeof (*params) * (1 + paramc)))
5210 : ((struct re_se_params *)
5211 malloc (sizeof (*params))));
5212 if (!params)
5213 return REG_ESPACE;
5214 ++paramc;
5215 params [sync_se].se = re_se_tv;
5216 side = (rx_side_effect)sync_se;
5217 goto add_side_effect;
5218 }
5219 }
5220 /* The old regex.c used to optimize `.*\n'.
5221 * Maybe rx should too?
5222 */
5223 }
5224 break;
5225
5226
5227 case '.':
5228 {
5229 rx_Bitset cs = rx_cset (&rxb->rx);
5230 struct rexp_node * n = rx_mk_r_cset (&rxb->rx, cs);
5231 if (!(cs && n))
5232 return REG_ESPACE;
5233
5234 rx_bitset_universe (rxb->rx.local_cset_size, cs);
5235 if (!(rxb->syntax & RE_DOT_NEWLINE))
5236 RX_bitset_remove (cs, '\n');
5237 if (!(rxb->syntax & RE_DOT_NOT_NULL))
5238 RX_bitset_remove (cs, 0);
5239
5240 append = n;
5241 goto append_node;
5242 break;
5243 }
5244
5245
5246 case '[':
5247 if (p == pend) return REG_EBRACK;
5248 {
5249 boolean had_char_class = false;
5250 rx_Bitset cs = rx_cset (&rxb->rx);
5251 struct rexp_node * node = rx_mk_r_cset (&rxb->rx, cs);
5252 int is_inverted = *p == '^';
5253
5254 if (!(node && cs))
5255 return REG_ESPACE;
5256
5257 /* This branch of the switch is normally exited with
5258 *`goto append_node'
5259 */
5260 append = node;
5261
5262 if (is_inverted)
5263 p++;
5264
5265 /* Remember the first position in the bracket expression. */
5266 p1 = p;
5267
5268 /* Read in characters and ranges, setting map bits. */
5269 for (;;)
5270 {
5271 if (p == pend) return REG_EBRACK;
5272
5273 PATFETCH (c);
5274
5275 /* \ might escape characters inside [...] and [^...]. */
5276 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
5277 {
5278 if (p == pend) return REG_EESCAPE;
5279
5280 PATFETCH (c1);
5281 {
5282 rx_Bitset it = inverse_translation (rxb,
5283 validate_inv_tr,
5284 inverse_translate,
5285 translate,
5286 c1);
5287 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
5288 }
5289 continue;
5290 }
5291
5292 /* Could be the end of the bracket expression. If it's
5293 not (i.e., when the bracket expression is `[]' so
5294 far), the ']' character bit gets set way below. */
5295 if (c == ']' && p != p1 + 1)
5296 goto finalize_class_and_append;
5297
5298 /* Look ahead to see if it's a range when the last thing
5299 was a character class. */
5300 if (had_char_class && c == '-' && *p != ']')
5301 return REG_ERANGE;
5302
5303 /* Look ahead to see if it's a range when the last thing
5304 was a character: if this is a hyphen not at the
5305 beginning or the end of a list, then it's the range
5306 operator. */
5307 if (c == '-'
5308 && !(p - 2 >= pattern && p[-2] == '[')
5309 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
5310 && *p != ']')
5311 {
5312 reg_errcode_t ret
5313 = compile_range (rxb, cs, &p, pend, translate, syntax,
5314 inverse_translate, validate_inv_tr);
5315 if (ret != REG_NOERROR) return ret;
5316 }
5317
5318 else if (p[0] == '-' && p[1] != ']')
5319 { /* This handles ranges made up of characters only. */
5320 reg_errcode_t ret;
5321
5322 /* Move past the `-'. */
5323 PATFETCH (c1);
5324
5325 ret = compile_range (rxb, cs, &p, pend, translate, syntax,
5326 inverse_translate, validate_inv_tr);
5327 if (ret != REG_NOERROR) return ret;
5328 }
5329
5330 /* See if we're at the beginning of a possible character
5331 class. */
5332
5333 else if ((syntax & RE_CHAR_CLASSES)
5334 && (c == '[') && (*p == ':'))
5335 {
5336 char str[CHAR_CLASS_MAX_LENGTH + 1];
5337
5338 PATFETCH (c);
5339 c1 = 0;
5340
5341 /* If pattern is `[[:'. */
5342 if (p == pend) return REG_EBRACK;
5343
5344 for (;;)
5345 {
5346 PATFETCH (c);
5347 if (c == ':' || c == ']' || p == pend
5348 || c1 == CHAR_CLASS_MAX_LENGTH)
5349 break;
5350 str[c1++] = c;
5351 }
5352 str[c1] = '\0';
5353
5354 /* If isn't a word bracketed by `[:' and:`]':
5355 undo the ending character, the letters, and leave
5356 the leading `:' and `[' (but set bits for them). */
5357 if (c == ':' && *p == ']')
5358 {
5359 int ch;
5360 boolean is_alnum = !strcmp (str, "alnum");
5361 boolean is_alpha = !strcmp (str, "alpha");
5362 boolean is_blank = !strcmp (str, "blank");
5363 boolean is_cntrl = !strcmp (str, "cntrl");
5364 boolean is_digit = !strcmp (str, "digit");
5365 boolean is_graph = !strcmp (str, "graph");
5366 boolean is_lower = !strcmp (str, "lower");
5367 boolean is_print = !strcmp (str, "print");
5368 boolean is_punct = !strcmp (str, "punct");
5369 boolean is_space = !strcmp (str, "space");
5370 boolean is_upper = !strcmp (str, "upper");
5371 boolean is_xdigit = !strcmp (str, "xdigit");
5372
5373 if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
5374
5375 /* Throw away the ] at the end of the character
5376 class. */
5377 PATFETCH (c);
5378
5379 if (p == pend) return REG_EBRACK;
5380
5381 for (ch = 0; ch < 1 << CHARBITS; ch++)
5382 {
5383 if ( (is_alnum && isalnum (ch))
5384 || (is_alpha && isalpha (ch))
5385 || (is_blank && isblank (ch))
5386 || (is_cntrl && iscntrl (ch))
5387 || (is_digit && isdigit (ch))
5388 || (is_graph && isgraph (ch))
5389 || (is_lower && islower (ch))
5390 || (is_print && isprint (ch))
5391 || (is_punct && ispunct (ch))
5392 || (is_space && isspace (ch))
5393 || (is_upper && isupper (ch))
5394 || (is_xdigit && isxdigit (ch)))
5395 {
5396 rx_Bitset it =
5397 inverse_translation (rxb,
5398 validate_inv_tr,
5399 inverse_translate,
5400 translate,
5401 ch);
5402 rx_bitset_union (rxb->rx.local_cset_size,
5403 cs, it);
5404 }
5405 }
5406 had_char_class = true;
5407 }
5408 else
5409 {
5410 c1++;
5411 while (c1--)
5412 PATUNFETCH;
5413 {
5414 rx_Bitset it =
5415 inverse_translation (rxb,
5416 validate_inv_tr,
5417 inverse_translate,
5418 translate,
5419 '[');
5420 rx_bitset_union (rxb->rx.local_cset_size,
5421 cs, it);
5422 }
5423 {
5424 rx_Bitset it =
5425 inverse_translation (rxb,
5426 validate_inv_tr,
5427 inverse_translate,
5428 translate,
5429 ':');
5430 rx_bitset_union (rxb->rx.local_cset_size,
5431 cs, it);
5432 }
5433 had_char_class = false;
5434 }
5435 }
5436 else
5437 {
5438 had_char_class = false;
5439 {
5440 rx_Bitset it = inverse_translation (rxb,
5441 validate_inv_tr,
5442 inverse_translate,
5443 translate,
5444 c);
5445 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
5446 }
5447 }
5448 }
5449
5450 finalize_class_and_append:
5451 if (is_inverted)
5452 {
5453 rx_bitset_complement (rxb->rx.local_cset_size, cs);
5454 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
5455 RX_bitset_remove (cs, '\n');
5456 }
5457 goto append_node;
5458 }
5459 break;
5460
5461
5462 case '(':
5463 if (syntax & RE_NO_BK_PARENS)
5464 goto handle_open;
5465 else
5466 goto normal_char;
5467
5468
5469 case ')':
5470 if (syntax & RE_NO_BK_PARENS)
5471 goto handle_close;
5472 else
5473 goto normal_char;
5474
5475
5476 case '\n':
5477 if (syntax & RE_NEWLINE_ALT)
5478 goto handle_alt;
5479 else
5480 goto normal_char;
5481
5482
5483 case '|':
5484 if (syntax & RE_NO_BK_VBAR)
5485 goto handle_alt;
5486 else
5487 goto normal_char;
5488
5489
5490 case '{':
5491 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5492 goto handle_interval;
5493 else
5494 goto normal_char;
5495
5496
5497 case '\\':
5498 if (p == pend) return REG_EESCAPE;
5499
5500 /* Do not translate the character after the \, so that we can
5501 distinguish, e.g., \B from \b, even if we normally would
5502 translate, e.g., B to b. */
5503 PATFETCH_RAW (c);
5504
5505 switch (c)
5506 {
5507 case '(':
5508 if (syntax & RE_NO_BK_PARENS)
5509 goto normal_backslash;
5510
5511 handle_open:
5512 rxb->re_nsub++;
5513 regnum++;
5514 if (COMPILE_STACK_FULL)
5515 {
5516 ((compile_stack.stack) =
5517 (compile_stack_elt_t *) realloc (compile_stack.stack, ( compile_stack.size << 1) * sizeof (
5518 compile_stack_elt_t)));
5519 if (compile_stack.stack == 0) return REG_ESPACE;
5520
5521 compile_stack.size <<= 1;
5522 }
5523
5524 if (*last_expression)
5525 {
5526 struct rexp_node * concat
5527 = rx_mk_r_concat (&rxb->rx, *last_expression, 0);
5528 if (!concat)
5529 return REG_ESPACE;
5530 *last_expression = concat;
5531 last_expression = &concat->params.pair.right;
5532 }
5533
5534 /*
5535 * These are the values to restore when we hit end of this
5536 * group.
5537 */
5538 COMPILE_STACK_TOP.top_expression = top_expression;
5539 COMPILE_STACK_TOP.last_expression = last_expression;
5540 COMPILE_STACK_TOP.regnum = regnum;
5541
5542 compile_stack.avail++;
5543
5544 top_expression = last_expression;
5545 break;
5546
5547
5548 case ')':
5549 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
5550
5551 handle_close:
5552 /* See similar code for backslashed left paren above. */
5553 if (COMPILE_STACK_EMPTY)
5554 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
5555 goto normal_char;
5556 else
5557 return REG_ERPAREN;
5558
5559 /* Since we just checked for an empty stack above, this
5560 ``can't happen''. */
5561
5562 {
5563 /* We don't just want to restore into `regnum', because
5564 later groups should continue to be numbered higher,
5565 as in `(ab)c(de)' -- the second group is #2. */
5566 regnum_t this_group_regnum;
5567 struct rexp_node ** inner = top_expression;
5568
5569 compile_stack.avail--;
5570 top_expression = COMPILE_STACK_TOP.top_expression;
5571 last_expression = COMPILE_STACK_TOP.last_expression;
5572 this_group_regnum = COMPILE_STACK_TOP.regnum;
5573 {
5574 int left_se = paramc;
5575 int right_se = paramc + 1;
5576
5577 params = (params
5578 ? ((struct re_se_params *)
5579 realloc (params,
5580 (paramc + 2) * sizeof (params[0])))
5581 : ((struct re_se_params *)
5582 malloc (2 * sizeof (params[0]))));
5583 if (!params)
5584 return REG_ESPACE;
5585 paramc += 2;
5586
5587 params[left_se].se = re_se_lparen;
5588 params[left_se].op1 = this_group_regnum;
5589 params[right_se].se = re_se_rparen;
5590 params[right_se].op1 = this_group_regnum;
5591 {
5592 struct rexp_node * left
5593 = rx_mk_r_side_effect (&rxb->rx,
5594 (rx_side_effect)left_se);
5595 struct rexp_node * right
5596 = rx_mk_r_side_effect (&rxb->rx,
5597 (rx_side_effect)right_se);
5598 struct rexp_node * c1
5599 = (*inner
5600 ? rx_mk_r_concat (&rxb->rx, left, *inner) : left);
5601 struct rexp_node * c2
5602 = rx_mk_r_concat (&rxb->rx, c1, right);
5603 if (!(left && right && c1 && c2))
5604 return REG_ESPACE;
5605 *inner = c2;
5606 }
5607 }
5608 break;
5609 }
5610
5611 case '|': /* `\|'. */
5612 if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR))
5613 goto normal_backslash;
5614 handle_alt:
5615 if (syntax & RE_LIMITED_OPS)
5616 goto normal_char;
5617
5618 {
5619 struct rexp_node * alt
5620 = rx_mk_r_alternate (&rxb->rx, *top_expression, 0);
5621 if (!alt)
5622 return REG_ESPACE;
5623 *top_expression = alt;
5624 last_expression = &alt->params.pair.right;
5625 {
5626 int sync_se = paramc;
5627
5628 params = (params
5629 ? ((struct re_se_params *)
5630 realloc (params,
5631 (paramc + 1) * sizeof (params[0])))
5632 : ((struct re_se_params *)
5633 malloc (sizeof (params[0]))));
5634 if (!params)
5635 return REG_ESPACE;
5636 ++paramc;
5637
5638 params[sync_se].se = re_se_tv;
5639 {
5640 struct rexp_node * sync
5641 = rx_mk_r_side_effect (&rxb->rx,
5642 (rx_side_effect)sync_se);
5643 struct rexp_node * conc
5644 = rx_mk_r_concat (&rxb->rx, sync, 0);
5645
5646 if (!sync || !conc)
5647 return REG_ESPACE;
5648
5649 *last_expression = conc;
5650 last_expression = &conc->params.pair.right;
5651 }
5652 }
5653 }
5654 break;
5655
5656
5657 case '{':
5658 /* If \{ is a literal. */
5659 if (!(syntax & RE_INTERVALS)
5660 /* If we're at `\{' and it's not the open-interval
5661 operator. */
5662 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5663 || (p - 2 == pattern && p == pend))
5664 goto normal_backslash;
5665
5666 handle_interval:
5667 {
5668 /* If got here, then the syntax allows intervals. */
5669
5670 /* At least (most) this many matches must be made. */
5671 int lower_bound = -1, upper_bound = -1;
5672
5673 beg_interval = p - 1;
5674
5675 if (p == pend)
5676 {
5677 if (syntax & RE_NO_BK_BRACES)
5678 goto unfetch_interval;
5679 else
5680 return REG_EBRACE;
5681 }
5682
5683 GET_UNSIGNED_NUMBER (lower_bound);
5684
5685 if (c == ',')
5686 {
5687 GET_UNSIGNED_NUMBER (upper_bound);
5688 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
5689 }
5690 else
5691 /* Interval such as `{1}' => match exactly once. */
5692 upper_bound = lower_bound;
5693
5694 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
5695 || lower_bound > upper_bound)
5696 {
5697 if (syntax & RE_NO_BK_BRACES)
5698 goto unfetch_interval;
5699 else
5700 return REG_BADBR;
5701 }
5702
5703 if (!(syntax & RE_NO_BK_BRACES))
5704 {
5705 if (c != '\\') return REG_EBRACE;
5706 PATFETCH (c);
5707 }
5708
5709 if (c != '}')
5710 {
5711 if (syntax & RE_NO_BK_BRACES)
5712 goto unfetch_interval;
5713 else
5714 return REG_BADBR;
5715 }
5716
5717 /* We just parsed a valid interval. */
5718
5719 /* If it's invalid to have no preceding re. */
5720 if (pointless_if_repeated (*last_expression, params))
5721 {
5722 if (syntax & RE_CONTEXT_INVALID_OPS)
5723 return REG_BADRPT;
5724 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
5725 goto unfetch_interval;
5726 /* was: else laststart = b; */
5727 }
5728
5729 /* If the upper bound is zero, don't want to iterate
5730 * at all.
5731 */
5732 if (upper_bound == 0)
5733 {
5734 if (*last_expression)
5735 {
5736 rx_free_rexp (&rxb->rx, *last_expression);
5737 *last_expression = 0;
5738 }
5739 }
5740 else
5741 /* Otherwise, we have a nontrivial interval. */
5742 {
5743 int iter_se = paramc;
5744 int end_se = paramc + 1;
5745 params = (params
5746 ? ((struct re_se_params *)
5747 realloc (params,
5748 sizeof (*params) * (2 + paramc)))
5749 : ((struct re_se_params *)
5750 malloc (2 * sizeof (*params))));
5751 if (!params)
5752 return REG_ESPACE;
5753 paramc += 2;
5754 params [iter_se].se = re_se_iter;
5755 params [iter_se].op1 = lower_bound;
5756 params[iter_se].op2 = upper_bound;
5757
5758 params[end_se].se = re_se_end_iter;
5759 params[end_se].op1 = lower_bound;
5760 params[end_se].op2 = upper_bound;
5761 {
5762 struct rexp_node * push0
5763 = rx_mk_r_side_effect (&rxb->rx,
5764 (rx_side_effect)re_se_push0);
5765 struct rexp_node * start_one_iter
5766 = rx_mk_r_side_effect (&rxb->rx,
5767 (rx_side_effect)iter_se);
5768 struct rexp_node * phase1
5769 = rx_mk_r_concat (&rxb->rx, start_one_iter,
5770 *last_expression);
5771 struct rexp_node * pushback
5772 = rx_mk_r_side_effect (&rxb->rx,
5773 (rx_side_effect)re_se_pushback);
5774 rx_Bitset cs = rx_cset (&rxb->rx);
5775 struct rexp_node * lit_t
5776 = rx_mk_r_cset (&rxb->rx, cs);
5777 struct rexp_node * phase2
5778 = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
5779 struct rexp_node * loop
5780 = rx_mk_r_2phase_star (&rxb->rx, phase1, phase2);
5781 struct rexp_node * push_n_loop
5782 = rx_mk_r_concat (&rxb->rx, push0, loop);
5783 struct rexp_node * final_test
5784 = rx_mk_r_side_effect (&rxb->rx,
5785 (rx_side_effect)end_se);
5786 struct rexp_node * full_exp
5787 = rx_mk_r_concat (&rxb->rx, push_n_loop, final_test);
5788
5789 if (!(push0 && start_one_iter && phase1
5790 && pushback && lit_t && phase2
5791 && loop && push_n_loop && final_test && full_exp))
5792 return REG_ESPACE;
5793
5794 RX_bitset_enjoin(cs, 't');
5795
5796 *last_expression = full_exp;
5797 }
5798 }
5799 beg_interval = 0;
5800 }
5801 break;
5802
5803 unfetch_interval:
5804 /* If an invalid interval, match the characters as literals. */
5805 p = beg_interval;
5806 beg_interval = 0;
5807
5808 /* normal_char and normal_backslash need `c'. */
5809 PATFETCH (c);
5810
5811 if (!(syntax & RE_NO_BK_BRACES))
5812 {
5813 if (p > pattern && p[-1] == '\\')
5814 goto normal_backslash;
5815 }
5816 goto normal_char;
5817
5818#ifdef emacs
5819 /* There is no way to specify the before_dot and after_dot
5820 operators. rms says this is ok. --karl */
5821 case '=':
5822 side = (rx_side_effect)rx_se_at_dot;
5823 goto add_side_effect;
5824 break;
5825
5826 case 's':
5827 case 'S':
5828 {
5829 rx_Bitset cs = rx_cset (&rxb->rx);
5830 struct rexp_node * set = rx_mk_r_cset (&rxb->rx, cs);
5831 if (!(cs && set))
5832 return REG_ESPACE;
5833 if (c == 'S')
5834 rx_bitset_universe (rxb->rx.local_cset_size, cs);
5835
5836 PATFETCH (c);
5837 {
5838 int x;
5839 enum syntaxcode code = syntax_spec_code [c];
5840 for (x = 0; x < 256; ++x)
5841 {
5842
5843 if (SYNTAX (x) == code)
5844 {
5845 rx_Bitset it =
5846 inverse_translation (rxb, validate_inv_tr,
5847 inverse_translate,
5848 translate, x);
5849 rx_bitset_xor (rxb->rx.local_cset_size, cs, it);
5850 }
5851 }
5852 }
5853 append = set;
5854 goto append_node;
5855 }
5856 break;
5857#endif /* emacs */
5858
5859
5860 case 'w':
5861 case 'W':
5862 {
5863 rx_Bitset cs = rx_cset (&rxb->rx);
5864 struct rexp_node * n = (cs ? rx_mk_r_cset (&rxb->rx, cs) : 0);
5865 if (!(cs && n))
5866 return REG_ESPACE;
5867 if (c == 'W')
5868 rx_bitset_universe (rxb->rx.local_cset_size ,cs);
5869 {
5870 int x;
5871 for (x = rxb->rx.local_cset_size - 1; x > 0; --x)
5872 if (SYNTAX(x) & Sword)
5873 RX_bitset_toggle (cs, x);
5874 }
5875 append = n;
5876 goto append_node;
5877 }
5878 break;
5879
5880/* With a little extra work, some of these side effects could be optimized
5881 * away (basicly by looking at what we already know about the surrounding
5882 * chars).
5883 */
5884 case '<':
5885 side = (rx_side_effect)re_se_wordbeg;
5886 goto add_side_effect;
5887 break;
5888
5889 case '>':
5890 side = (rx_side_effect)re_se_wordend;
5891 goto add_side_effect;
5892 break;
5893
5894 case 'b':
5895 side = (rx_side_effect)re_se_wordbound;
5896 goto add_side_effect;
5897 break;
5898
5899 case 'B':
5900 side = (rx_side_effect)re_se_notwordbound;
5901 goto add_side_effect;
5902 break;
5903
5904 case '`':
5905 side = (rx_side_effect)re_se_begbuf;
5906 goto add_side_effect;
5907 break;
5908
5909 case '\'':
5910 side = (rx_side_effect)re_se_endbuf;
5911 goto add_side_effect;
5912 break;
5913
5914 add_side_effect:
5915 {
5916 struct rexp_node * se
5917 = rx_mk_r_side_effect (&rxb->rx, side);
5918 if (!se)
5919 return REG_ESPACE;
5920 append = se;
5921 goto append_node;
5922 }
5923 break;
5924
5925 case '1': case '2': case '3': case '4': case '5':
5926 case '6': case '7': case '8': case '9':
5927 if (syntax & RE_NO_BK_REFS)
5928 goto normal_char;
5929
5930 c1 = c - '0';
5931
5932 if (c1 > regnum)
5933 return REG_ESUBREG;
5934
5935 /* Can't back reference to a subexpression if inside of it. */
5936 if (group_in_compile_stack (compile_stack, c1))
5937 return REG_ESUBREG;
5938
5939 {
5940 int backref_se = paramc;
5941 params = (params
5942 ? ((struct re_se_params *)
5943 realloc (params,
5944 sizeof (*params) * (1 + paramc)))
5945 : ((struct re_se_params *)
5946 malloc (sizeof (*params))));
5947 if (!params)
5948 return REG_ESPACE;
5949 ++paramc;
5950 params[backref_se].se = re_se_backref;
5951 params[backref_se].op1 = c1;
5952 side = (rx_side_effect)backref_se;
5953 goto add_side_effect;
5954 }
5955 break;
5956
5957 case '+':
5958 case '?':
5959 if (syntax & RE_BK_PLUS_QM)
5960 goto handle_plus;
5961 else
5962 goto normal_backslash;
5963
5964 default:
5965 normal_backslash:
5966 /* You might think it would be useful for \ to mean
5967 not to translate; but if we don't translate it
5968 it will never match anything. */
5969 c = TRANSLATE (c);
5970 goto normal_char;
5971 }
5972 break;
5973
5974
5975 default:
5976 /* Expects the character in `c'. */
5977 normal_char:
5978 {
5979 rx_Bitset cs = rx_cset(&rxb->rx);
5980 struct rexp_node * match = rx_mk_r_cset (&rxb->rx, cs);
5981 rx_Bitset it;
5982 if (!(cs && match))
5983 return REG_ESPACE;
5984 it = inverse_translation (rxb, validate_inv_tr,
5985 inverse_translate, translate, c);
5986 rx_bitset_union (CHAR_SET_SIZE, cs, it);
5987 append = match;
5988
5989 append_node:
5990 /* This genericly appends the rexp APPEND to *LAST_EXPRESSION
5991 * and then parses the next character normally.
5992 */
5993 if (*last_expression)
5994 {
5995 struct rexp_node * concat
5996 = rx_mk_r_concat (&rxb->rx, *last_expression, append);
5997 if (!concat)
5998 return REG_ESPACE;
5999 *last_expression = concat;
6000 last_expression = &concat->params.pair.right;
6001 }
6002 else
6003 *last_expression = append;
6004 }
6005 } /* switch (c) */
6006 } /* while p != pend */
6007
6008
6009 {
6010 int win_se = paramc;
6011 params = (params
6012 ? ((struct re_se_params *)
6013 realloc (params,
6014 sizeof (*params) * (1 + paramc)))
6015 : ((struct re_se_params *)
6016 malloc (sizeof (*params))));
6017 if (!params)
6018 return REG_ESPACE;
6019 ++paramc;
6020 params[win_se].se = re_se_win;
6021 {
6022 struct rexp_node * se
6023 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)win_se);
6024 struct rexp_node * concat
6025 = rx_mk_r_concat (&rxb->rx, rexp, se);
6026 if (!(se && concat))
6027 return REG_ESPACE;
6028 rexp = concat;
6029 }
6030 }
6031
6032
6033 /* Through the pattern now. */
6034
6035 if (!COMPILE_STACK_EMPTY)
6036 return REG_EPAREN;
6037
6038 free (compile_stack.stack);
6039
6040 orig_rexp = rexp;
6041#ifdef RX_DEBUG
6042 if (rx_debug_compile)
6043 {
6044 dbug_rxb = rxb;
6045 fputs ("\n\nCompiling ", stdout);
6046 fwrite (pattern, 1, size, stdout);
6047 fputs (":\n", stdout);
6048 rxb->se_params = params;
6049 print_rexp (&rxb->rx, orig_rexp, 2, re_seprint, stdout);
6050 }
6051#endif
6052 {
6053 rx_Bitset cs = rx_cset(&rxb->rx);
6054 rx_Bitset cs2 = rx_cset(&rxb->rx);
6055#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6056 char * se_map = (char *) _alloca (paramc);
6057#else
6058 char * se_map = (char *) alloca (paramc);
6059#endif
6060 struct rexp_node * new_rexp = 0;
6061
6062
6063 bzero (se_map, paramc);
6064 find_backrefs (se_map, rexp, params);
6065 fewer_side_effects =
6066 remove_unecessary_side_effects (&rxb->rx, se_map,
6067 rx_copy_rexp (&rxb->rx, rexp), params);
6068
6069 speed_up_alt (&rxb->rx, rexp, 0);
6070 speed_up_alt (&rxb->rx, fewer_side_effects, 1);
6071
6072 {
6073 char * syntax_parens = rxb->syntax_parens;
6074 if (syntax_parens == (char *)0x1)
6075 rexp = remove_unecessary_side_effects
6076 (&rxb->rx, se_map, rexp, params);
6077 else if (syntax_parens)
6078 {
6079 int x;
6080 for (x = 0; x < paramc; ++x)
6081 if (( (params[x].se == re_se_lparen)
6082 || (params[x].se == re_se_rparen))
6083 && (!syntax_parens [params[x].op1]))
6084 se_map [x] = 1;
6085 rexp = remove_unecessary_side_effects
6086 (&rxb->rx, se_map, rexp, params);
6087 }
6088 }
6089
6090 /* At least one more optimization would be nice to have here but i ran out
6091 * of time. The idea would be to delay side effects.
6092 * For examle, `(abc)' is the same thing as `abc()' except that the
6093 * left paren is offset by 3 (which we know at compile time).
6094 * (In this comment, write that second pattern `abc(:3:)'
6095 * where `(:3:' is a syntactic unit.)
6096 *
6097 * Trickier: `(abc|defg)' is the same as `(abc(:3:|defg(:4:))'
6098 * (The paren nesting may be hard to follow -- that's an alternation
6099 * of `abc(:3:' and `defg(:4:' inside (purely syntactic) parens
6100 * followed by the closing paren from the original expression.)
6101 *
6102 * Neither the expression tree representation nor the the nfa make
6103 * this very easy to write. :(
6104 */
6105
6106 /* What we compile is different than what the parser returns.
6107 * Suppose the parser returns expression R.
6108 * Let R' be R with unnecessary register assignments removed
6109 * (see REMOVE_UNECESSARY_SIDE_EFFECTS, above).
6110 *
6111 * What we will compile is the expression:
6112 *
6113 * m{try}R{win}\|s{try}R'{win}
6114 *
6115 * {try} and {win} denote side effect epsilons (see EXPLORE_FUTURE).
6116 *
6117 * When trying a match, we insert an `m' at the beginning of the
6118 * string if the user wants registers to be filled, `s' if not.
6119 */
6120 new_rexp =
6121 rx_mk_r_alternate
6122 (&rxb->rx,
6123 rx_mk_r_concat (&rxb->rx, rx_mk_r_cset (&rxb->rx, cs2), rexp),
6124 rx_mk_r_concat (&rxb->rx,
6125 rx_mk_r_cset (&rxb->rx, cs), fewer_side_effects));
6126
6127 if (!(new_rexp && cs && cs2))
6128 return REG_ESPACE;
6129 RX_bitset_enjoin (cs2, '\0'); /* prefixed to the rexp used for matching. */
6130 RX_bitset_enjoin (cs, '\1'); /* prefixed to the rexp used for searching. */
6131 rexp = new_rexp;
6132 }
6133
6134#ifdef RX_DEBUG
6135 if (rx_debug_compile)
6136 {
6137 fputs ("\n...which is compiled as:\n", stdout);
6138 print_rexp (&rxb->rx, rexp, 2, re_seprint, stdout);
6139 }
6140#endif
6141 {
6142 struct rx_nfa_state *start = 0;
6143 struct rx_nfa_state *end = 0;
6144
6145 if (!rx_build_nfa (&rxb->rx, rexp, &start, &end))
6146 return REG_ESPACE; /* */
6147 else
6148 {
6149 void * mem = (void *)rxb->buffer;
6150 unsigned long size = rxb->allocated;
6151 int start_id;
6152 char * perm_mem;
6153 int iterator_size = paramc * sizeof (params[0]);
6154
6155 end->is_final = 1;
6156 start->is_start = 1;
6157 rx_name_nfa_states (&rxb->rx);
6158 start_id = start->id;
6159#ifdef RX_DEBUG
6160 if (rx_debug_compile)
6161 {
6162 fputs ("...giving the NFA: \n", stdout);
6163 dbug_rxb = rxb;
6164 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6165 }
6166#endif
6167 if (!rx_eclose_nfa (&rxb->rx))
6168 return REG_ESPACE;
6169 else
6170 {
6171 rx_delete_epsilon_transitions (&rxb->rx);
6172
6173 /* For compatability reasons, we need to shove the
6174 * compiled nfa into one chunk of malloced memory.
6175 */
6176 rxb->rx.reserved = ( sizeof (params[0]) * paramc
6177 + rx_sizeof_bitset (rxb->rx.local_cset_size));
6178#ifdef RX_DEBUG
6179 if (rx_debug_compile)
6180 {
6181 dbug_rxb = rxb;
6182 fputs ("...which cooks down (uncompactified) to: \n", stdout);
6183 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6184 }
6185#endif
6186 if (!rx_compactify_nfa (&rxb->rx, &mem, &size))
6187 return REG_ESPACE;
6188 rxb->buffer = mem;
6189 rxb->allocated = size;
6190 rxb->rx.buffer = mem;
6191 rxb->rx.allocated = size;
6192 perm_mem = ((char *)rxb->rx.buffer
6193 + rxb->rx.allocated - rxb->rx.reserved);
6194 rxb->se_params = ((struct re_se_params *)perm_mem);
6195 bcopy (params, rxb->se_params, iterator_size);
6196 perm_mem += iterator_size;
6197 rxb->fastset = (rx_Bitset) perm_mem;
6198 rxb->start = rx_id_to_nfa_state (&rxb->rx, start_id);
6199 }
6200 rx_bitset_null (rxb->rx.local_cset_size, rxb->fastset);
6201 rxb->can_match_empty = compute_fastset (rxb, orig_rexp);
6202 rxb->match_regs_on_stack =
6203 registers_on_stack (rxb, orig_rexp, 0, params);
6204 rxb->search_regs_on_stack =
6205 registers_on_stack (rxb, fewer_side_effects, 0, params);
6206 if (rxb->can_match_empty)
6207 rx_bitset_universe (rxb->rx.local_cset_size, rxb->fastset);
6208 rxb->is_anchored = is_anchored (orig_rexp, (rx_side_effect) re_se_hat);
6209 rxb->begbuf_only = is_anchored (orig_rexp,
6210 (rx_side_effect) re_se_begbuf);
6211 }
6212 rx_free_rexp (&rxb->rx, rexp);
6213 if (params)
6214 free (params);
6215#ifdef RX_DEBUG
6216 if (rx_debug_compile)
6217 {
6218 dbug_rxb = rxb;
6219 fputs ("...which cooks down to: \n", stdout);
6220 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6221 }
6222#endif
6223 }
6224 return REG_NOERROR;
6225}
6226
6227
6228
6229
6230/* This table gives an error message for each of the error codes listed
6231 in regex.h. Obviously the order here has to be same as there. */
6232
6233__const__ char * rx_error_msg[] =
6234{ 0, /* REG_NOERROR */
6235 "No match", /* REG_NOMATCH */
6236 "Invalid regular expression", /* REG_BADPAT */
6237 "Invalid collation character", /* REG_ECOLLATE */
6238 "Invalid character class name", /* REG_ECTYPE */
6239 "Trailing backslash", /* REG_EESCAPE */
6240 "Invalid back reference", /* REG_ESUBREG */
6241 "Unmatched [ or [^", /* REG_EBRACK */
6242 "Unmatched ( or \\(", /* REG_EPAREN */
6243 "Unmatched \\{", /* REG_EBRACE */
6244 "Invalid content of \\{\\}", /* REG_BADBR */
6245 "Invalid range end", /* REG_ERANGE */
6246 "Memory exhausted", /* REG_ESPACE */
6247 "Invalid preceding regular expression", /* REG_BADRPT */
6248 "Premature end of regular expression", /* REG_EEND */
6249 "Regular expression too big", /* REG_ESIZE */
6250 "Unmatched ) or \\)", /* REG_ERPAREN */
6251};
6252
6253
6254
6255
6256
6257char rx_slowmap [256] =
6258{
6259 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6260 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6261 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6262 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6263 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6264 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6265 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6266 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6267 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6268 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6269 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6270 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6271 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6272 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6273 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6274 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6275};
6276
6277#ifdef __STDC__
6278RX_DECL void
6279rx_blow_up_fastmap (struct re_pattern_buffer * rxb)
6280#else
6281RX_DECL void
6282rx_blow_up_fastmap (rxb)
6283 struct re_pattern_buffer * rxb;
6284#endif
6285{
6286 int x;
6287 for (x = 0; x < 256; ++x) /* &&&& 3.6 % */
6288 rxb->fastmap [x] = !!RX_bitset_member (rxb->fastset, x);
6289 rxb->fastmap_accurate = 1;
6290}
6291
6292
6293
6294
6295
6296#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
6297#define RE_SEARCH_2_FN inner_re_search_2
6298#define RE_S2_QUAL static
6299#else
6300#define RE_SEARCH_2_FN re_search_2
6301#define RE_S2_QUAL
6302#endif
6303
6304struct re_search_2_closure
6305{
6306 __const__ char * string1;
6307 int size1;
6308 __const__ char * string2;
6309 int size2;
6310};
6311
6312
6313static __inline__ enum rx_get_burst_return
6314re_search_2_get_burst (pos, vclosure, stop)
6315 struct rx_string_position * pos;
6316 void * vclosure;
6317 int stop;
6318{
6319 struct re_search_2_closure * closure;
6320 closure = (struct re_search_2_closure *)vclosure;
6321 if (!closure->string2)
6322 {
6323 int inset;
6324
6325 inset = pos->pos - pos->string;
6326 if ((inset < -1) || (inset > closure->size1))
6327 return rx_get_burst_no_more;
6328 else
6329 {
6330 pos->pos = (__const__ unsigned char *) closure->string1 + inset;
6331 pos->string = (__const__ unsigned char *) closure->string1;
6332 pos->size = closure->size1;
6333 pos->end = ((__const__ unsigned char *)
6334 MIN(closure->string1 + closure->size1,
6335 closure->string1 + stop));
6336 pos->offset = 0;
6337 return ((pos->pos < pos->end)
6338 ? rx_get_burst_ok
6339 : rx_get_burst_no_more);
6340 }
6341 }
6342 else if (!closure->string1)
6343 {
6344 int inset;
6345
6346 inset = pos->pos - pos->string;
6347 pos->pos = (__const__ unsigned char *) closure->string2 + inset;
6348 pos->string = (__const__ unsigned char *) closure->string2;
6349 pos->size = closure->size2;
6350 pos->end = ((__const__ unsigned char *)
6351 MIN(closure->string2 + closure->size2,
6352 closure->string2 + stop));
6353 pos->offset = 0;
6354 return ((pos->pos < pos->end)
6355 ? rx_get_burst_ok
6356 : rx_get_burst_no_more);
6357 }
6358 else
6359 {
6360 int inset;
6361
6362 inset = pos->pos - pos->string + pos->offset;
6363 if (inset < closure->size1)
6364 {
6365 pos->pos = (__const__ unsigned char *) closure->string1 + inset;
6366 pos->string = (__const__ unsigned char *) closure->string1;
6367 pos->size = closure->size1;
6368 pos->end = ((__const__ unsigned char *)
6369 MIN(closure->string1 + closure->size1,
6370 closure->string1 + stop));
6371 pos->offset = 0;
6372 return rx_get_burst_ok;
6373 }
6374 else
6375 {
6376 pos->pos = ((__const__ unsigned char *)
6377 closure->string2 + inset - closure->size1);
6378 pos->string = (__const__ unsigned char *) closure->string2;
6379 pos->size = closure->size2;
6380 pos->end = ((__const__ unsigned char *)
6381 MIN(closure->string2 + closure->size2,
6382 closure->string2 + stop - closure->size1));
6383 pos->offset = closure->size1;
6384 return ((pos->pos < pos->end)
6385 ? rx_get_burst_ok
6386 : rx_get_burst_no_more);
6387 }
6388 }
6389}
6390
6391
6392static __inline__ enum rx_back_check_return
6393re_search_2_back_check (pos, lparen, rparen, translate, vclosure, stop)
6394 struct rx_string_position * pos;
6395 int lparen;
6396 int rparen;
6397 unsigned char * translate;
6398 void * vclosure;
6399 int stop;
6400{
6401 struct rx_string_position there;
6402 struct rx_string_position past;
6403
6404 there = *pos;
6405 there.pos = there.string + lparen - there.offset;
6406 re_search_2_get_burst (&there, vclosure, stop);
6407
6408 past = *pos;
6409 past.pos = past.string + rparen - there.offset;
6410 re_search_2_get_burst (&past, vclosure, stop);
6411
6412 ++pos->pos;
6413 re_search_2_get_burst (pos, vclosure, stop);
6414
6415 while ( (there.pos != past.pos)
6416 && (pos->pos != pos->end))
6417 if (TRANSLATE(*there.pos) != TRANSLATE(*pos->pos))
6418 return rx_back_check_fail;
6419 else
6420 {
6421 ++there.pos;
6422 ++pos->pos;
6423 if (there.pos == there.end)
6424 re_search_2_get_burst (&there, vclosure, stop);
6425 if (pos->pos == pos->end)
6426 re_search_2_get_burst (pos, vclosure, stop);
6427 }
6428
6429 if (there.pos != past.pos)
6430 return rx_back_check_fail;
6431 --pos->pos;
6432 re_search_2_get_burst (pos, vclosure, stop);
6433 return rx_back_check_pass;
6434}
6435
6436static __inline__ int
6437re_search_2_fetch_char (pos, offset, app_closure, stop)
6438 struct rx_string_position * pos;
6439 int offset;
6440 void * app_closure;
6441 int stop;
6442{
6443 struct re_search_2_closure * closure;
6444 closure = (struct re_search_2_closure *)app_closure;
6445 if (offset == 0)
6446 {
6447 if (pos->pos >= pos->string)
6448 return *pos->pos;
6449 else
6450 {
6451 if ( (pos->string == (__const__ unsigned char *) closure->string2)
6452 && (closure->string1)
6453 && (closure->size1))
6454 return closure->string1[closure->size1 - 1];
6455 else
6456 return 0; /* sure, why not. */
6457 }
6458 }
6459 if (pos->pos == pos->end)
6460 return *closure->string2;
6461 else
6462 return pos->pos[1];
6463}
6464
6465
6466#ifdef __STDC__
6467RE_S2_QUAL int
6468RE_SEARCH_2_FN (struct re_pattern_buffer *rxb,
6469 __const__ char * string1, int size1,
6470 __const__ char * string2, int size2,
6471 int startpos, int range,
6472 struct re_registers *regs,
6473 int stop)
6474#else
6475RE_S2_QUAL int
6476RE_SEARCH_2_FN (rxb,
6477 string1, size1, string2, size2, startpos, range, regs, stop)
6478 struct re_pattern_buffer *rxb;
6479 __const__ char * string1;
6480 int size1;
6481 __const__ char * string2;
6482 int size2;
6483 int startpos;
6484 int range;
6485 struct re_registers *regs;
6486 int stop;
6487#endif
6488{
6489 int answer;
6490 struct re_search_2_closure closure;
6491 closure.string1 = string1;
6492 closure.size1 = size1;
6493 closure.string2 = string2;
6494 closure.size2 = size2;
6495 answer = rx_search (rxb, startpos, range, stop, size1 + size2,
6496 re_search_2_get_burst,
6497 re_search_2_back_check,
6498 re_search_2_fetch_char,
6499 (void *)&closure,
6500 regs,
6501 0,
6502 0);
6503 switch (answer)
6504 {
6505 case rx_search_continuation:
6506 abort ();
6507 case rx_search_error:
6508 return -2;
6509 case rx_search_soft_fail:
6510 case rx_search_fail:
6511 return -1;
6512 default:
6513 return answer;
6514 }
6515}
6516
6517/* Export rx_search to callers outside this file. */
6518
6519int
6520re_rx_search (rxb, startpos, range, stop, total_size,
6521 get_burst, back_check, fetch_char,
6522 app_closure, regs, resume_state, save_state)
6523 struct re_pattern_buffer * rxb;
6524 int startpos;
6525 int range;
6526 int stop;
6527 int total_size;
6528 rx_get_burst_fn get_burst;
6529 rx_back_check_fn back_check;
6530 rx_fetch_char_fn fetch_char;
6531 void * app_closure;
6532 struct re_registers * regs;
6533 struct rx_search_state * resume_state;
6534 struct rx_search_state * save_state;
6535{
6536 return rx_search (rxb, startpos, range, stop, total_size,
6537 get_burst, back_check, fetch_char, app_closure,
6538 regs, resume_state, save_state);
6539}
6540
6541#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
6542#ifdef __STDC__
6543int
6544re_search_2 (struct re_pattern_buffer *rxb,
6545 __const__ char * string1, int size1,
6546 __const__ char * string2, int size2,
6547 int startpos, int range,
6548 struct re_registers *regs,
6549 int stop)
6550#else
6551int
6552re_search_2 (rxb, string1, size1, string2, size2, startpos, range, regs, stop)
6553 struct re_pattern_buffer *rxb;
6554 __const__ char * string1;
6555 int size1;
6556 __const__ char * string2;
6557 int size2;
6558 int startpos;
6559 int range;
6560 struct re_registers *regs;
6561 int stop;
6562#endif
6563{
6564 int ret;
6565 ret = inner_re_search_2 (rxb, string1, size1, string2, size2, startpos,
6566 range, regs, stop);
6567#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6568 _alloca (0);
6569#else
6570 alloca (0);
6571#endif
6572 return ret;
6573}
6574#endif
6575
6576
6577/* Like re_search_2, above, but only one string is specified, and
6578 * doesn't let you say where to stop matching.
6579 */
6580
6581#ifdef __STDC__
6582int
6583re_search (struct re_pattern_buffer * rxb, __const__ char *string,
6584 int size, int startpos, int range,
6585 struct re_registers *regs)
6586#else
6587int
6588re_search (rxb, string, size, startpos, range, regs)
6589 struct re_pattern_buffer * rxb;
6590 __const__ char * string;
6591 int size;
6592 int startpos;
6593 int range;
6594 struct re_registers *regs;
6595#endif
6596{
6597 return re_search_2 (rxb, 0, 0, string, size, startpos, range, regs, size);
6598}
6599
6600#ifdef __STDC__
6601int
6602re_match_2 (struct re_pattern_buffer * rxb,
6603 __const__ char * string1, int size1,
6604 __const__ char * string2, int size2,
6605 int pos, struct re_registers *regs, int stop)
6606#else
6607int
6608re_match_2 (rxb, string1, size1, string2, size2, pos, regs, stop)
6609 struct re_pattern_buffer * rxb;
6610 __const__ char * string1;
6611 int size1;
6612 __const__ char * string2;
6613 int size2;
6614 int pos;
6615 struct re_registers *regs;
6616 int stop;
6617#endif
6618{
6619 struct re_registers some_regs;
6620 regoff_t start;
6621 regoff_t end;
6622 int srch;
6623 int save = rxb->regs_allocated;
6624 struct re_registers * regs_to_pass = regs;
6625
6626 if (!regs)
6627 {
6628 some_regs.start = &start;
6629 some_regs.end = &end;
6630 some_regs.num_regs = 1;
6631 regs_to_pass = &some_regs;
6632 rxb->regs_allocated = REGS_FIXED;
6633 }
6634
6635 srch = re_search_2 (rxb, string1, size1, string2, size2,
6636 pos, 1, regs_to_pass, stop);
6637 if (regs_to_pass != regs)
6638 rxb->regs_allocated = save;
6639 if (srch < 0)
6640 return srch;
6641 return regs_to_pass->end[0] - regs_to_pass->start[0];
6642}
6643
6644/* re_match is like re_match_2 except it takes only a single string. */
6645
6646#ifdef __STDC__
6647int
6648re_match (struct re_pattern_buffer * rxb,
6649 __const__ char * string,
6650 int size, int pos,
6651 struct re_registers *regs)
6652#else
6653int
6654re_match (rxb, string, size, pos, regs)
6655 struct re_pattern_buffer * rxb;
6656 __const__ char *string;
6657 int size;
6658 int pos;
6659 struct re_registers *regs;
6660#endif
6661{
6662 return re_match_2 (rxb, string, size, 0, 0, pos, regs, size);
6663}
6664
6665
6666
6667
6668/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
6669 also be assigned to arbitrarily: each pattern buffer stores its own
6670 syntax, so it can be changed between regex compilations. */
6671reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
6672
6673
6674/* Specify the precise syntax of regexps for compilation. This provides
6675 for compatibility for various utilities which historically have
6676 different, incompatible syntaxes.
6677
6678 The argument SYNTAX is a bit mask comprised of the various bits
6679 defined in regex.h. We return the old syntax. */
6680
6681#ifdef __STDC__
6682reg_syntax_t
6683re_set_syntax (reg_syntax_t syntax)
6684#else
6685reg_syntax_t
6686re_set_syntax (syntax)
6687 reg_syntax_t syntax;
6688#endif
6689{
6690 reg_syntax_t ret = re_syntax_options;
6691
6692 re_syntax_options = syntax;
6693 return ret;
6694}
6695
6696
6697
6698/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
6699 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
6700 this memory for recording register information. STARTS and ENDS
6701 must be allocated using the malloc library routine, and must each
6702 be at least NUM_REGS * sizeof (regoff_t) bytes long.
6703
6704 If NUM_REGS == 0, then subsequent matches should allocate their own
6705 register data.
6706
6707 Unless this function is called, the first search or match using
6708 PATTERN_BUFFER will allocate its own register data, without
6709 freeing the old data. */
6710
6711#ifdef __STDC__
6712void
6713re_set_registers (struct re_pattern_buffer *bufp,
6714 struct re_registers *regs,
6715 unsigned num_regs,
6716 regoff_t * starts, regoff_t * ends)
6717#else
6718void
6719re_set_registers (bufp, regs, num_regs, starts, ends)
6720 struct re_pattern_buffer *bufp;
6721 struct re_registers *regs;
6722 unsigned num_regs;
6723 regoff_t * starts;
6724 regoff_t * ends;
6725#endif
6726{
6727 if (num_regs)
6728 {
6729 bufp->regs_allocated = REGS_REALLOCATE;
6730 regs->num_regs = num_regs;
6731 regs->start = starts;
6732 regs->end = ends;
6733 }
6734 else
6735 {
6736 bufp->regs_allocated = REGS_UNALLOCATED;
6737 regs->num_regs = 0;
6738 regs->start = regs->end = (regoff_t) 0;
6739 }
6740}
6741
6742
6743
6744
6745
6746#ifdef __STDC__
6747static int
6748cplx_se_sublist_len (struct rx_se_list * list)
6749#else
6750static int
6751cplx_se_sublist_len (list)
6752 struct rx_se_list * list;
6753#endif
6754{
6755 int x = 0;
6756 while (list)
6757 {
6758 if ((long)list->car >= 0)
6759 ++x;
6760 list = list->cdr;
6761 }
6762 return x;
6763}
6764
6765
6766/* For rx->se_list_cmp */
6767
6768#ifdef __STDC__
6769static int
6770posix_se_list_order (struct rx * rx,
6771 struct rx_se_list * a, struct rx_se_list * b)
6772#else
6773static int
6774posix_se_list_order (rx, a, b)
6775 struct rx * rx;
6776 struct rx_se_list * a;
6777 struct rx_se_list * b;
6778#endif
6779{
6780 int al = cplx_se_sublist_len (a);
6781 int bl = cplx_se_sublist_len (b);
6782
6783 if (!al && !bl)
6784 return ((a == b)
6785 ? 0
6786 : ((a < b) ? -1 : 1));
6787
6788 else if (!al)
6789 return -1;
6790
6791 else if (!bl)
6792 return 1;
6793
6794 else
6795 {
6796#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6797 rx_side_effect * av = ((rx_side_effect *)
6798 _alloca (sizeof (rx_side_effect) * (al + 1)));
6799 rx_side_effect * bv = ((rx_side_effect *)
6800 _alloca (sizeof (rx_side_effect) * (bl + 1)));
6801#else
6802 rx_side_effect * av = ((rx_side_effect *)
6803 alloca (sizeof (rx_side_effect) * (al + 1)));
6804 rx_side_effect * bv = ((rx_side_effect *)
6805 alloca (sizeof (rx_side_effect) * (bl + 1)));
6806#endif
6807 struct rx_se_list * ap = a;
6808 struct rx_se_list * bp = b;
6809 int ai, bi;
6810
6811 for (ai = al - 1; ai >= 0; --ai)
6812 {
6813 while ((long)ap->car < 0)
6814 ap = ap->cdr;
6815 av[ai] = ap->car;
6816 ap = ap->cdr;
6817 }
6818 av[al] = (rx_side_effect)-2;
6819 for (bi = bl - 1; bi >= 0; --bi)
6820 {
6821 while ((long)bp->car < 0)
6822 bp = bp->cdr;
6823 bv[bi] = bp->car;
6824 bp = bp->cdr;
6825 }
6826 bv[bl] = (rx_side_effect)-1;
6827
6828 {
6829 int ret;
6830 int x = 0;
6831 while (av[x] == bv[x])
6832 ++x;
6833 ret = (((unsigned *)(av[x]) < (unsigned *)(bv[x])) ? -1 : 1);
6834 return ret;
6835 }
6836 }
6837}
6838
6839
6840
6841
6842
6843/* re_compile_pattern is the GNU regular expression compiler: it
6844 compiles PATTERN (of length SIZE) and puts the result in RXB.
6845 Returns 0 if the pattern was valid, otherwise an error string.
6846
6847 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6848 are set in RXB on entry.
6849
6850 We call rx_compile to do the actual compilation. */
6851
6852#ifdef __STDC__
6853__const__ char *
6854re_compile_pattern (__const__ char *pattern,
6855 int length,
6856 struct re_pattern_buffer * rxb)
6857#else
6858__const__ char *
6859re_compile_pattern (pattern, length, rxb)
6860 __const__ char *pattern;
6861 int length;
6862 struct re_pattern_buffer * rxb;
6863#endif
6864{
6865 reg_errcode_t ret;
6866
6867 /* GNU code is written to assume at least RE_NREGS registers will be set
6868 (and at least one extra will be -1). */
6869 rxb->regs_allocated = REGS_UNALLOCATED;
6870
6871 /* And GNU code determines whether or not to get register information
6872 by passing null for the REGS argument to re_match, etc., not by
6873 setting no_sub. */
6874 rxb->no_sub = 0;
6875
6876 rxb->rx.local_cset_size = 256;
6877
6878 /* Match anchors at newline. */
6879 rxb->newline_anchor = 1;
6880
6881 rxb->re_nsub = 0;
6882 rxb->start = 0;
6883 rxb->se_params = 0;
6884 rxb->rx.nodec = 0;
6885 rxb->rx.epsnodec = 0;
6886 rxb->rx.instruction_table = 0;
6887 rxb->rx.nfa_states = 0;
6888 rxb->rx.se_list_cmp = posix_se_list_order;
6889 rxb->rx.start_set = 0;
6890
6891 ret = rx_compile (pattern, length, re_syntax_options, rxb);
6892#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6893 _alloca (0);
6894#else
6895 alloca (0);
6896#endif
6897 return rx_error_msg[(int) ret];
6898}
6899
6900
6901
6902#ifdef __STDC__
6903int
6904re_compile_fastmap (struct re_pattern_buffer * rxb)
6905#else
6906int
6907re_compile_fastmap (rxb)
6908 struct re_pattern_buffer * rxb;
6909#endif
6910{
6911 rx_blow_up_fastmap (rxb);
6912 return 0;
6913}
6914
6915
6916
6917
6918
6919/* Entry points compatible with 4.2 BSD regex library. We don't define
6920 them if this is an Emacs or POSIX compilation. */
6921
6922#if (!defined (emacs) && !defined (_POSIX_SOURCE)) || defined(USE_BSD_REGEX)
6923
6924/* BSD has one and only one pattern buffer. */
6925static struct re_pattern_buffer rx_comp_buf;
6926
6927#ifdef __STDC__
6928char *
6929re_comp (__const__ char *s)
6930#else
6931char *
6932re_comp (s)
6933 __const__ char *s;
6934#endif
6935{
6936 reg_errcode_t ret;
6937
6938 if (!s || (*s == '\0'))
6939 {
6940 if (!rx_comp_buf.buffer)
6941 return "No previous regular expression";
6942 return 0;
6943 }
6944
6945 if (!rx_comp_buf.fastmap)
6946 {
6947 rx_comp_buf.fastmap = (char *) malloc (1 << CHARBITS);
6948 if (!rx_comp_buf.fastmap)
6949 return "Memory exhausted";
6950 }
6951
6952 /* Since `rx_exec' always passes NULL for the `regs' argument, we
6953 don't need to initialize the pattern buffer fields which affect it. */
6954
6955 /* Match anchors at newlines. */
6956 rx_comp_buf.newline_anchor = 1;
6957
6958 rx_comp_buf.re_nsub = 0;
6959 rx_comp_buf.start = 0;
6960 rx_comp_buf.se_params = 0;
6961 rx_comp_buf.rx.nodec = 0;
6962 rx_comp_buf.rx.epsnodec = 0;
6963 rx_comp_buf.rx.instruction_table = 0;
6964 rx_comp_buf.rx.nfa_states = 0;
6965 rx_comp_buf.rx.start = 0;
6966 rx_comp_buf.rx.se_list_cmp = posix_se_list_order;
6967 rx_comp_buf.rx.local_cset_size = 256;
6968
6969 ret = rx_compile (s, strlen (s), re_syntax_options, &rx_comp_buf);
6970#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6971 _alloca (0);
6972#else
6973 alloca (0);
6974#endif
6975
6976 /* Yes, we're discarding `__const__' here. */
6977 return (char *) rx_error_msg[(int) ret];
6978}
6979
6980
6981#ifdef __STDC__
6982int
6983re_exec (__const__ char *s)
6984#else
6985int
6986re_exec (s)
6987 __const__ char *s;
6988#endif
6989{
6990 __const__ int len = strlen (s);
6991 return
6992 0 <= re_search (&rx_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6993}
6994#endif /* not emacs and not _POSIX_SOURCE */
6995
6996
6997
6998
6999/* POSIX.2 functions. Don't define these for Emacs. */
7000
7001#if !defined(emacs)
7002
7003/* regcomp takes a regular expression as a string and compiles it.
7004
7005 PREG is a regex_t *. We do not expect any fields to be initialized,
7006 since POSIX says we shouldn't. Thus, we set
7007
7008 `buffer' to the compiled pattern;
7009 `used' to the length of the compiled pattern;
7010 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
7011 REG_EXTENDED bit in CFLAGS is set; otherwise, to
7012 RE_SYNTAX_POSIX_BASIC;
7013 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
7014 `fastmap' and `fastmap_accurate' to zero;
7015 `re_nsub' to the number of subexpressions in PATTERN.
7016
7017 PATTERN is the address of the pattern string.
7018
7019 CFLAGS is a series of bits which affect compilation.
7020
7021 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
7022 use POSIX basic syntax.
7023
7024 If REG_NEWLINE is set, then . and [^...] don't match newline.
7025 Also, regexec will try a match beginning after every newline.
7026
7027 If REG_ICASE is set, then we considers upper- and lowercase
7028 versions of letters to be equivalent when matching.
7029
7030 If REG_NOSUB is set, then when PREG is passed to regexec, that
7031 routine will report only success or failure, and nothing about the
7032 registers.
7033
7034 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
7035 the return codes and their meanings.) */
7036
7037
7038#ifdef __STDC__
7039int
7040regcomp (regex_t * preg, __const__ char * pattern, int cflags)
7041#else
7042int
7043regcomp (preg, pattern, cflags)
7044 regex_t * preg;
7045 __const__ char * pattern;
7046 int cflags;
7047#endif
7048{
7049 reg_errcode_t ret;
7050 unsigned syntax
7051 = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
7052
7053 /* regex_compile will allocate the space for the compiled pattern. */
7054 preg->buffer = 0;
7055 preg->allocated = 0;
7056 preg->fastmap = malloc (256);
7057 if (!preg->fastmap)
7058 return REG_ESPACE;
7059 preg->fastmap_accurate = 0;
7060
7061 if (cflags & REG_ICASE)
7062 {
7063 unsigned i;
7064
7065 preg->translate = (unsigned char *) malloc (256);
7066 if (!preg->translate)
7067 return (int) REG_ESPACE;
7068
7069 /* Map uppercase characters to corresponding lowercase ones. */
7070 for (i = 0; i < CHAR_SET_SIZE; i++)
7071 preg->translate[i] = isupper (i) ? tolower (i) : i;
7072 }
7073 else
7074 preg->translate = 0;
7075
7076 /* If REG_NEWLINE is set, newlines are treated differently. */
7077 if (cflags & REG_NEWLINE)
7078 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
7079 syntax &= ~RE_DOT_NEWLINE;
7080 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
7081 /* It also changes the matching behavior. */
7082 preg->newline_anchor = 1;
7083 }
7084 else
7085 preg->newline_anchor = 0;
7086
7087 preg->no_sub = !!(cflags & REG_NOSUB);
7088
7089 /* POSIX says a null character in the pattern terminates it, so we
7090 can use strlen here in compiling the pattern. */
7091 preg->re_nsub = 0;
7092 preg->start = 0;
7093 preg->se_params = 0;
7094 preg->syntax_parens = 0;
7095 preg->rx.nodec = 0;
7096 preg->rx.epsnodec = 0;
7097 preg->rx.instruction_table = 0;
7098 preg->rx.nfa_states = 0;
7099 preg->rx.local_cset_size = 256;
7100 preg->rx.start = 0;
7101 preg->rx.se_list_cmp = posix_se_list_order;
7102 preg->rx.start_set = 0;
7103 ret = rx_compile (pattern, strlen (pattern), syntax, preg);
7104
7105#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
7106 _alloca (0);
7107#else
7108 alloca (0);
7109#endif
7110
7111 /* POSIX doesn't distinguish between an unmatched open-group and an
7112 unmatched close-group: both are REG_EPAREN. */
7113 if (ret == REG_ERPAREN) ret = REG_EPAREN;
7114
7115 return (int) ret;
7116}
7117
7118
7119/* regexec searches for a given pattern, specified by PREG, in the
7120 string STRING.
7121
7122 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
7123 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
7124 least NMATCH elements, and we set them to the offsets of the
7125 corresponding matched substrings.
7126
7127 EFLAGS specifies `execution flags' which affect matching: if
7128 REG_NOTBOL is set, then ^ does not match at the beginning of the
7129 string; if REG_NOTEOL is set, then $ does not match at the end.
7130
7131 We return 0 if we find a match and REG_NOMATCH if not. */
7132
7133#ifdef __STDC__
7134int
7135regexec (__const__ regex_t *preg, __const__ char *string,
7136 size_t nmatch, regmatch_t pmatch[],
7137 int eflags)
7138#else
7139int
7140regexec (preg, string, nmatch, pmatch, eflags)
7141 __const__ regex_t *preg;
7142 __const__ char *string;
7143 size_t nmatch;
7144 regmatch_t pmatch[];
7145 int eflags;
7146#endif
7147{
7148 int ret;
7149 struct re_registers regs;
7150 regex_t private_preg;
7151 int len = strlen (string);
7152 boolean want_reg_info = !preg->no_sub && nmatch > 0;
7153
7154 private_preg = *preg;
7155
7156 private_preg.not_bol = !!(eflags & REG_NOTBOL);
7157 private_preg.not_eol = !!(eflags & REG_NOTEOL);
7158
7159 /* The user has told us exactly how many registers to return
7160 * information about, via `nmatch'. We have to pass that on to the
7161 * matching routines.
7162 */
7163 private_preg.regs_allocated = REGS_FIXED;
7164
7165 if (want_reg_info)
7166 {
7167 regs.num_regs = nmatch;
7168 regs.start = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
7169 regs.end = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
7170 if (regs.start == 0 || regs.end == 0)
7171 return (int) REG_NOMATCH;
7172 }
7173
7174 /* Perform the searching operation. */
7175 ret = re_search (&private_preg,
7176 string, len,
7177 /* start: */ 0,
7178 /* range: */ len,
7179 want_reg_info ? &regs : (struct re_registers *) 0);
7180
7181 /* Copy the register information to the POSIX structure. */
7182 if (want_reg_info)
7183 {
7184 if (ret >= 0)
7185 {
7186 unsigned r;
7187
7188 for (r = 0; r < nmatch; r++)
7189 {
7190 pmatch[r].rm_so = regs.start[r];
7191 pmatch[r].rm_eo = regs.end[r];
7192 }
7193 }
7194
7195 /* If we needed the temporary register info, free the space now. */
7196 free (regs.start);
7197 free (regs.end);
7198 }
7199
7200 /* We want zero return to mean success, unlike `re_search'. */
7201 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
7202}
7203
7204
7205/* Returns a message corresponding to an error code, ERRCODE, returned
7206 from either regcomp or regexec. */
7207
7208#ifdef __STDC__
7209size_t
7210regerror (int rx_errcode, __const__ regex_t *preg,
7211 char *errbuf, size_t errbuf_size)
7212#else
7213size_t
7214regerror (rx_errcode, preg, errbuf, errbuf_size)
7215 int rx_errcode;
7216 __const__ regex_t *preg;
7217 char *errbuf;
7218 size_t errbuf_size;
7219#endif
7220{
7221 __const__ char *msg
7222 = rx_error_msg[rx_errcode] == 0 ? "Success" : rx_error_msg[rx_errcode];
7223 size_t msg_size = strlen (msg) + 1; /* Includes the 0. */
7224
7225 if (errbuf_size != 0)
7226 {
7227 if (msg_size > errbuf_size)
7228 {
7229 strncpy (errbuf, msg, errbuf_size - 1);
7230 errbuf[errbuf_size - 1] = 0;
7231 }
7232 else
7233 strcpy (errbuf, msg);
7234 }
7235
7236 return msg_size;
7237}
7238
7239
7240/* Free dynamically allocated space used by PREG. */
7241
7242#ifdef __STDC__
7243void
7244regfree (regex_t *preg)
7245#else
7246void
7247regfree (preg)
7248 regex_t *preg;
7249#endif
7250{
7251 if (preg->buffer != 0)
7252 free (preg->buffer);
7253 preg->buffer = 0;
7254 preg->allocated = 0;
7255
7256 if (preg->fastmap != 0)
7257 free (preg->fastmap);
7258 preg->fastmap = 0;
7259 preg->fastmap_accurate = 0;
7260
7261 if (preg->translate != 0)
7262 free (preg->translate);
7263 preg->translate = 0;
7264}
7265
7266#endif /* not emacs */
7267
7268
7269
7270
7271
Note: See TracBrowser for help on using the repository browser.