source: trunk/indexers/mg/lib/rx.c@ 13657

Last change on this file since 13657 was 13657, checked in by kjdon, 15 years ago

The previous log message should have been
Changed a variable named "errcode" to "rx_errcode", so this compiles with Visual Studio 2005.

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 174.9 KB
Line 
1
2/* Copyright (C) 1992, 1993, 1994, 1995 Free Software Foundation, Inc.
3
4This file is part of the librx library.
5
6Librx is free software; you can redistribute it and/or modify it under
7the terms of the GNU Library General Public License as published by
8the Free Software Foundation; either version 2, or (at your option)
9any later version.
10
11Librx is distributed in the hope that it will be useful, but WITHOUT
12ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
13FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
14for more details.
15
16You should have received a copy of the GNU Library General Public
17License along with this software; see the file COPYING.LIB. If not,
18write to the Free Software Foundation, 675 Mass Ave, Cambridge, MA
1902139, USA. */
20
21/* NOTE!!! AIX is so losing it requires this to be the first thing in the
22 * file.
23 * Do not put ANYTHING before it!
24 */
25#if !defined (__GNUC__) && defined (_AIX)
26 #pragma alloca
27#endif
28
29/* To make linux happy? */
30#ifndef _GNU_SOURCE
31#define _GNU_SOURCE
32#endif
33
34#if HAVE_CONFIG_H
35# ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
36# include <win32cfg.h>
37# include <malloc.h>
38# else
39# include <sysfuncs.h>
40# endif
41#endif
42
43
44
45char rx_version_string[] = "GNU Rx version 0.07.1";
46
47 /* ``Too hard!''
48 * -- anon.
49 */
50
51
52
53#include <stdio.h>
54#include <ctype.h>
55#ifndef isgraph
56#define isgraph(c) (isprint (c) && !isspace (c))
57#endif
58#ifndef isblank
59#define isblank(c) ((c) == ' ' || (c) == '\t')
60#endif
61
62#include <sys/types.h>
63
64#undef MAX
65#undef MIN
66#define MAX(a, b) ((a) > (b) ? (a) : (b))
67#define MIN(a, b) ((a) < (b) ? (a) : (b))
68
69typedef char boolean;
70#define false 0
71#define true 1
72
73#ifndef __GCC__
74#undef __inline__
75#define __inline__
76#endif
77
78/* Emacs already defines alloca, sometimes. */
79#ifndef alloca
80
81/* Make alloca work the best possible way. */
82#ifdef __GNUC__
83#define alloca __builtin_alloca
84#else /* not __GNUC__ */
85#if HAVE_ALLOCA_H
86#include <alloca.h>
87#else /* not __GNUC__ or HAVE_ALLOCA_H */
88#ifndef _AIX /* Already did AIX, up at the top. */
89char *alloca ();
90#endif /* not _AIX */
91#endif /* not HAVE_ALLOCA_H */
92#endif /* not __GNUC__ */
93
94#endif /* not alloca */
95
96/* Memory management and stuff for emacs. */
97
98#define CHARBITS 8
99#define remalloc(M, S) (M ? realloc (M, S) : malloc (S))
100
101
102/* Should we use malloc or alloca? If REGEX_MALLOC is not defined, we
103 * use `alloca' instead of `malloc' for the backtracking stack.
104 *
105 * Emacs will die miserably if we don't do this.
106 */
107
108#ifdef REGEX_MALLOC
109#define REGEX_ALLOCATE malloc
110#else /* not REGEX_MALLOC */
111#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
112#define REGEX_ALLOCATE _alloca
113#else
114#define REGEX_ALLOCATE alloca
115#endif /* __WIN32__ */
116#endif /* not REGEX_MALLOC */
117
118
119#ifdef RX_WANT_RX_DEFS
120#define RX_DECL extern
121#define RX_DEF_QUAL
122#else
123#define RX_WANT_RX_DEFS
124#define RX_DECL static
125#define RX_DEF_QUAL static
126#endif
127#include "rx.h"
128#undef RX_DECL
129#define RX_DECL RX_DEF_QUAL
130
131
132#ifndef emacs
133
134#ifdef SYNTAX_TABLE
135extern char *re_syntax_table;
136#else /* not SYNTAX_TABLE */
137
138#ifdef __STDC__
139static void
140init_syntax_once (void)
141#else
142static void
143init_syntax_once ()
144#endif
145{
146 register int c;
147 static int done = 0;
148
149 if (done)
150 return;
151
152 bzero (re_syntax_table, sizeof re_syntax_table);
153
154 for (c = 'a'; c <= 'z'; c++)
155 re_syntax_table[c] = Sword;
156
157 for (c = 'A'; c <= 'Z'; c++)
158 re_syntax_table[c] = Sword;
159
160 for (c = '0'; c <= '9'; c++)
161 re_syntax_table[c] = Sword;
162
163 re_syntax_table['_'] = Sword;
164
165 done = 1;
166}
167#endif /* not SYNTAX_TABLE */
168#endif /* not emacs */
169
170
171/* Compile with `-DRX_DEBUG' and use the following flags.
172 *
173 * Debugging flags:
174 * rx_debug - print information as a regexp is compiled
175 * rx_debug_trace - print information as a regexp is executed
176 */
177
178#ifdef RX_DEBUG
179
180int rx_debug_compile = 0;
181int rx_debug_trace = 0;
182static struct re_pattern_buffer * dbug_rxb = 0;
183
184#ifdef __STDC__
185typedef void (*side_effect_printer) (struct rx *, void *, FILE *);
186#else
187typedef void (*side_effect_printer) ();
188#endif
189
190#ifdef __STDC__
191static void print_cset (struct rx *rx, rx_Bitset cset, FILE * fp);
192#else
193static void print_cset ();
194#endif
195
196#ifdef __STDC__
197static void
198print_rexp (struct rx *rx,
199 struct rexp_node *node, int depth,
200 side_effect_printer seprint, FILE * fp)
201#else
202static void
203print_rexp (rx, node, depth, seprint, fp)
204 struct rx *rx;
205 struct rexp_node *node;
206 int depth;
207 side_effect_printer seprint;
208 FILE * fp;
209#endif
210{
211 if (!node)
212 return;
213 else
214 {
215 switch (node->type)
216 {
217 case r_cset:
218 {
219 fprintf (fp, "%*s", depth, "");
220 print_cset (rx, node->params.cset, fp);
221 fputc ('\n', fp);
222 break;
223 }
224
225 case r_opt:
226 case r_star:
227 fprintf (fp, "%*s%s\n", depth, "",
228 node->type == r_opt ? "opt" : "star");
229 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
230 break;
231
232 case r_2phase_star:
233 fprintf (fp, "%*s2phase star\n", depth, "");
234 print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
235 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
236 break;
237
238
239 case r_alternate:
240 case r_concat:
241 fprintf (fp, "%*s%s\n", depth, "",
242 node->type == r_alternate ? "alt" : "concat");
243 print_rexp (rx, node->params.pair.left, depth + 3, seprint, fp);
244 print_rexp (rx, node->params.pair.right, depth + 3, seprint, fp);
245 break;
246 case r_side_effect:
247 fprintf (fp, "%*sSide effect: ", depth, "");
248 seprint (rx, node->params.side_effect, fp);
249 fputc ('\n', fp);
250 }
251 }
252}
253
254#ifdef __STDC__
255static void
256print_nfa (struct rx * rx,
257 struct rx_nfa_state * n,
258 side_effect_printer seprint, FILE * fp)
259#else
260static void
261print_nfa (rx, n, seprint, fp)
262 struct rx * rx;
263 struct rx_nfa_state * n;
264 side_effect_printer seprint;
265 FILE * fp;
266#endif
267{
268 while (n)
269 {
270 struct rx_nfa_edge *e = n->edges;
271 struct rx_possible_future *ec = n->futures;
272 fprintf (fp, "node %d %s\n", n->id,
273 n->is_final ? "final" : (n->is_start ? "start" : ""));
274 while (e)
275 {
276 fprintf (fp, " edge to %d, ", e->dest->id);
277 switch (e->type)
278 {
279 case ne_epsilon:
280 fprintf (fp, "epsilon\n");
281 break;
282 case ne_side_effect:
283 fprintf (fp, "side effect ");
284 seprint (rx, e->params.side_effect, fp);
285 fputc ('\n', fp);
286 break;
287 case ne_cset:
288 fprintf (fp, "cset ");
289 print_cset (rx, e->params.cset, fp);
290 fputc ('\n', fp);
291 break;
292 }
293 e = e->next;
294 }
295
296 while (ec)
297 {
298 int x;
299 struct rx_nfa_state_set * s;
300 struct rx_se_list * l;
301 fprintf (fp, " eclosure to {");
302 for (s = ec->destset; s; s = s->cdr)
303 fprintf (fp, "%d ", s->car->id);
304 fprintf (fp, "} (");
305 for (l = ec->effects; l; l = l->cdr)
306 {
307 seprint (rx, l->car, fp);
308 fputc (' ', fp);
309 }
310 fprintf (fp, ")\n");
311 ec = ec->next;
312 }
313 n = n->next;
314 }
315}
316
317static char * efnames [] =
318{
319 "bogon",
320 "re_se_try",
321 "re_se_pushback",
322 "re_se_push0",
323 "re_se_pushpos",
324 "re_se_chkpos",
325 "re_se_poppos",
326 "re_se_at_dot",
327 "re_se_syntax",
328 "re_se_not_syntax",
329 "re_se_begbuf",
330 "re_se_hat",
331 "re_se_wordbeg",
332 "re_se_wordbound",
333 "re_se_notwordbound",
334 "re_se_wordend",
335 "re_se_endbuf",
336 "re_se_dollar",
337 "re_se_fail",
338};
339
340static char * efnames2[] =
341{
342 "re_se_win",
343 "re_se_lparen",
344 "re_se_rparen",
345 "re_se_backref",
346 "re_se_iter",
347 "re_se_end_iter",
348 "re_se_tv"
349};
350
351static char * inx_names[] =
352{
353 "rx_backtrack_point",
354 "rx_do_side_effects",
355 "rx_cache_miss",
356 "rx_next_char",
357 "rx_backtrack",
358 "rx_error_inx",
359 "rx_num_instructions"
360};
361
362
363#ifdef __STDC__
364static void
365re_seprint (struct rx * rx, void * effect, FILE * fp)
366#else
367static void
368re_seprint (rx, effect, fp)
369 struct rx * rx;
370 void * effect;
371 FILE * fp;
372#endif
373{
374 if ((int)effect < 0)
375 fputs (efnames[-(int)effect], fp);
376 else if (dbug_rxb)
377 {
378 struct re_se_params * p = &dbug_rxb->se_params[(int)effect];
379 fprintf (fp, "%s(%d,%d)", efnames2[p->se], p->op1, p->op2);
380 }
381 else
382 fprintf (fp, "[complex op # %d]", (int)effect);
383}
384
385
386/* These are so the regex.c regression tests will compile. */
387void
388print_compiled_pattern (rxb)
389 struct re_pattern_buffer * rxb;
390{
391}
392
393void
394print_fastmap (fm)
395 char * fm;
396{
397}
398
399#endif /* RX_DEBUG */
400
401
402
403
404/* This page: Bitsets. Completely unintersting. */
405
406#ifdef __STDC__
407RX_DECL int
408rx_bitset_is_equal (int size, rx_Bitset a, rx_Bitset b)
409#else
410RX_DECL int
411rx_bitset_is_equal (size, a, b)
412 int size;
413 rx_Bitset a;
414 rx_Bitset b;
415#endif
416{
417 int x;
418 RX_subset s = b[0];
419 b[0] = ~a[0];
420
421 for (x = rx_bitset_numb_subsets(size) - 1; a[x] == b[x]; --x)
422 ;
423
424 b[0] = s;
425 return !x && s == a[0];
426}
427
428#ifdef __STDC__
429RX_DECL int
430rx_bitset_is_subset (int size, rx_Bitset a, rx_Bitset b)
431#else
432RX_DECL int
433rx_bitset_is_subset (size, a, b)
434 int size;
435 rx_Bitset a;
436 rx_Bitset b;
437#endif
438{
439 int x = rx_bitset_numb_subsets(size) - 1;
440 while (x-- && (a[x] & b[x]) == a[x]);
441 return x == -1;
442}
443
444
445#ifdef __STDC__
446RX_DECL int
447rx_bitset_empty (int size, rx_Bitset set)
448#else
449RX_DECL int
450rx_bitset_empty (size, set)
451 int size;
452 rx_Bitset set;
453#endif
454{
455 int x;
456 RX_subset s = set[0];
457 set[0] = 1;
458 for (x = rx_bitset_numb_subsets(size) - 1; !set[x]; --x)
459 ;
460 set[0] = s;
461 return !s;
462}
463
464#ifdef __STDC__
465RX_DECL void
466rx_bitset_null (int size, rx_Bitset b)
467#else
468RX_DECL void
469rx_bitset_null (size, b)
470 int size;
471 rx_Bitset b;
472#endif
473{
474 bzero (b, rx_sizeof_bitset(size));
475}
476
477
478#ifdef __STDC__
479RX_DECL void
480rx_bitset_universe (int size, rx_Bitset b)
481#else
482RX_DECL void
483rx_bitset_universe (size, b)
484 int size;
485 rx_Bitset b;
486#endif
487{
488 int x = rx_bitset_numb_subsets (size);
489 while (x--)
490 *b++ = ~(RX_subset)0;
491}
492
493
494#ifdef __STDC__
495RX_DECL void
496rx_bitset_complement (int size, rx_Bitset b)
497#else
498RX_DECL void
499rx_bitset_complement (size, b)
500 int size;
501 rx_Bitset b;
502#endif
503{
504 int x = rx_bitset_numb_subsets (size);
505 while (x--)
506 {
507 *b = ~*b;
508 ++b;
509 }
510}
511
512
513#ifdef __STDC__
514RX_DECL void
515rx_bitset_assign (int size, rx_Bitset a, rx_Bitset b)
516#else
517RX_DECL void
518rx_bitset_assign (size, a, b)
519 int size;
520 rx_Bitset a;
521 rx_Bitset b;
522#endif
523{
524 int x;
525 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
526 a[x] = b[x];
527}
528
529
530#ifdef __STDC__
531RX_DECL void
532rx_bitset_union (int size, rx_Bitset a, rx_Bitset b)
533#else
534RX_DECL void
535rx_bitset_union (size, a, b)
536 int size;
537 rx_Bitset a;
538 rx_Bitset b;
539#endif
540{
541 int x;
542 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
543 a[x] |= b[x];
544}
545
546
547#ifdef __STDC__
548RX_DECL void
549rx_bitset_intersection (int size,
550 rx_Bitset a, rx_Bitset b)
551#else
552RX_DECL void
553rx_bitset_intersection (size, a, b)
554 int size;
555 rx_Bitset a;
556 rx_Bitset b;
557#endif
558{
559 int x;
560 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
561 a[x] &= b[x];
562}
563
564
565#ifdef __STDC__
566RX_DECL void
567rx_bitset_difference (int size, rx_Bitset a, rx_Bitset b)
568#else
569RX_DECL void
570rx_bitset_difference (size, a, b)
571 int size;
572 rx_Bitset a;
573 rx_Bitset b;
574#endif
575{
576 int x;
577 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
578 a[x] &= ~ b[x];
579}
580
581
582#ifdef __STDC__
583RX_DECL void
584rx_bitset_revdifference (int size,
585 rx_Bitset a, rx_Bitset b)
586#else
587RX_DECL void
588rx_bitset_revdifference (size, a, b)
589 int size;
590 rx_Bitset a;
591 rx_Bitset b;
592#endif
593{
594 int x;
595 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
596 a[x] = ~a[x] & b[x];
597}
598
599#ifdef __STDC__
600RX_DECL void
601rx_bitset_xor (int size, rx_Bitset a, rx_Bitset b)
602#else
603RX_DECL void
604rx_bitset_xor (size, a, b)
605 int size;
606 rx_Bitset a;
607 rx_Bitset b;
608#endif
609{
610 int x;
611 for (x = rx_bitset_numb_subsets(size) - 1; x >=0; --x)
612 a[x] ^= b[x];
613}
614
615
616#ifdef __STDC__
617RX_DECL unsigned long
618rx_bitset_hash (int size, rx_Bitset b)
619#else
620RX_DECL unsigned long
621rx_bitset_hash (size, b)
622 int size;
623 rx_Bitset b;
624#endif
625{
626 int x;
627 unsigned long hash = (unsigned long)rx_bitset_hash;
628
629 for (x = rx_bitset_numb_subsets(size) - 1; x >= 0; --x)
630 hash ^= rx_bitset_subset_val(b, x);
631
632 return hash;
633}
634
635
636RX_DECL RX_subset rx_subset_singletons [RX_subset_bits] =
637{
638 0x1,
639 0x2,
640 0x4,
641 0x8,
642 0x10,
643 0x20,
644 0x40,
645 0x80,
646 0x100,
647 0x200,
648 0x400,
649 0x800,
650 0x1000,
651 0x2000,
652 0x4000,
653 0x8000,
654 0x10000,
655 0x20000,
656 0x40000,
657 0x80000,
658 0x100000,
659 0x200000,
660 0x400000,
661 0x800000,
662 0x1000000,
663 0x2000000,
664 0x4000000,
665 0x8000000,
666 0x10000000,
667 0x20000000,
668 0x40000000,
669 0x80000000
670};
671
672#ifdef RX_DEBUG
673
674#ifdef __STDC__
675static void
676print_cset (struct rx *rx, rx_Bitset cset, FILE * fp)
677#else
678static void
679print_cset (rx, cset, fp)
680 struct rx *rx;
681 rx_Bitset cset;
682 FILE * fp;
683#endif
684{
685 int x;
686 fputc ('[', fp);
687 for (x = 0; x < rx->local_cset_size; ++x)
688 if (RX_bitset_member (cset, x))
689 {
690 if (isprint(x))
691 fputc (x, fp);
692 else
693 fprintf (fp, "\\0%o ", x);
694 }
695 fputc (']', fp);
696}
697
698#endif /* RX_DEBUG */
699
700
701
702
703static unsigned long rx_hash_masks[4] =
704{
705 0x12488421,
706 0x96699669,
707 0xbe7dd7eb,
708 0xffffffff
709};
710
711
712/* Hash tables */
713#ifdef __STDC__
714RX_DECL struct rx_hash_item *
715rx_hash_find (struct rx_hash * table,
716 unsigned long hash,
717 void * value,
718 struct rx_hash_rules * rules)
719#else
720RX_DECL struct rx_hash_item *
721rx_hash_find (table, hash, value, rules)
722 struct rx_hash * table;
723 unsigned long hash;
724 void * value;
725 struct rx_hash_rules * rules;
726#endif
727{
728 rx_hash_eq eq = rules->eq;
729 int maskc = 0;
730 long mask = rx_hash_masks [0];
731 int bucket = (hash & mask) % 13;
732
733 while (table->children [bucket])
734 {
735 table = table->children [bucket];
736 ++maskc;
737 mask = rx_hash_masks[maskc];
738 bucket = (hash & mask) % 13;
739 }
740
741 {
742 struct rx_hash_item * it = table->buckets[bucket];
743 while (it)
744 if (eq (it->data, value))
745 return it;
746 else
747 it = it->next_same_hash;
748 }
749
750 return 0;
751}
752
753
754#ifdef __STDC__
755RX_DECL struct rx_hash_item *
756rx_hash_store (struct rx_hash * table,
757 unsigned long hash,
758 void * value,
759 struct rx_hash_rules * rules)
760#else
761RX_DECL struct rx_hash_item *
762rx_hash_store (table, hash, value, rules)
763 struct rx_hash * table;
764 unsigned long hash;
765 void * value;
766 struct rx_hash_rules * rules;
767#endif
768{
769 rx_hash_eq eq = rules->eq;
770 int maskc = 0;
771 long mask = rx_hash_masks[0];
772 int bucket = (hash & mask) % 13;
773 int depth = 0;
774
775 while (table->children [bucket])
776 {
777 table = table->children [bucket];
778 ++maskc;
779 mask = rx_hash_masks[maskc];
780 bucket = (hash & mask) % 13;
781 ++depth;
782 }
783
784 {
785 struct rx_hash_item * it = table->buckets[bucket];
786 while (it)
787 if (eq (it->data, value))
788 return it;
789 else
790 it = it->next_same_hash;
791 }
792
793 {
794 if ( (depth < 3)
795 && (table->bucket_size [bucket] >= 4))
796 {
797 struct rx_hash * newtab = ((struct rx_hash *)
798 rules->hash_alloc (rules));
799 if (!newtab)
800 goto add_to_bucket;
801 bzero (newtab, sizeof (*newtab));
802 newtab->parent = table;
803 {
804 struct rx_hash_item * them = table->buckets[bucket];
805 unsigned long newmask = rx_hash_masks[maskc + 1];
806 while (them)
807 {
808 struct rx_hash_item * save = them->next_same_hash;
809 int new_buck = (them->hash & newmask) % 13;
810 them->next_same_hash = newtab->buckets[new_buck];
811 newtab->buckets[new_buck] = them;
812 them->table = newtab;
813 them = save;
814 ++newtab->bucket_size[new_buck];
815 ++newtab->refs;
816 }
817 table->refs = (table->refs - table->bucket_size[bucket] + 1);
818 table->bucket_size[bucket] = 0;
819 table->buckets[bucket] = 0;
820 table->children[bucket] = newtab;
821 table = newtab;
822 bucket = (hash & newmask) % 13;
823 }
824 }
825 }
826 add_to_bucket:
827 {
828 struct rx_hash_item * it = ((struct rx_hash_item *)
829 rules->hash_item_alloc (rules, value));
830 if (!it)
831 return 0;
832 it->hash = hash;
833 it->table = table;
834 /* DATA and BINDING are to be set in hash_item_alloc */
835 it->next_same_hash = table->buckets [bucket];
836 table->buckets[bucket] = it;
837 ++table->bucket_size [bucket];
838 ++table->refs;
839 return it;
840 }
841}
842
843
844#ifdef __STDC__
845RX_DECL void
846rx_hash_free (struct rx_hash_item * it, struct rx_hash_rules * rules)
847#else
848RX_DECL void
849rx_hash_free (it, rules)
850 struct rx_hash_item * it;
851 struct rx_hash_rules * rules;
852#endif
853{
854 if (it)
855 {
856 struct rx_hash * table = it->table;
857 unsigned long hash = it->hash;
858 int depth = (table->parent
859 ? (table->parent->parent
860 ? (table->parent->parent->parent
861 ? 3
862 : 2)
863 : 1)
864 : 0);
865 int bucket = (hash & rx_hash_masks [depth]) % 13;
866 struct rx_hash_item ** pos = &table->buckets [bucket];
867
868 while (*pos != it)
869 pos = &(*pos)->next_same_hash;
870 *pos = it->next_same_hash;
871 rules->free_hash_item (it, rules);
872 --table->bucket_size[bucket];
873 --table->refs;
874 while (!table->refs && depth)
875 {
876 struct rx_hash * save = table;
877 table = table->parent;
878 --depth;
879 bucket = (hash & rx_hash_masks [depth]) % 13;
880 --table->refs;
881 table->children[bucket] = 0;
882 rules->free_hash (save, rules);
883 }
884 }
885}
886
887#ifdef __STDC__
888RX_DECL void
889rx_free_hash_table (struct rx_hash * tab, rx_hash_freefn freefn,
890 struct rx_hash_rules * rules)
891#else
892RX_DECL void
893rx_free_hash_table (tab, freefn, rules)
894 struct rx_hash * tab;
895 rx_hash_freefn freefn;
896 struct rx_hash_rules * rules;
897#endif
898{
899 int x;
900
901 for (x = 0; x < 13; ++x)
902 if (tab->children[x])
903 {
904 rx_free_hash_table (tab->children[x], freefn, rules);
905 rules->free_hash (tab->children[x], rules);
906 }
907 else
908 {
909 struct rx_hash_item * them = tab->buckets[x];
910 while (them)
911 {
912 struct rx_hash_item * that = them;
913 them = that->next_same_hash;
914 freefn (that);
915 rules->free_hash_item (that, rules);
916 }
917 }
918}
919
920
921
922
923/* Utilities for manipulating bitset represntations of characters sets. */
924
925#ifdef __STDC__
926RX_DECL rx_Bitset
927rx_cset (struct rx *rx)
928#else
929RX_DECL rx_Bitset
930rx_cset (rx)
931 struct rx *rx;
932#endif
933{
934 rx_Bitset b = (rx_Bitset) malloc (rx_sizeof_bitset (rx->local_cset_size));
935 if (b)
936 rx_bitset_null (rx->local_cset_size, b);
937 return b;
938}
939
940
941#ifdef __STDC__
942RX_DECL rx_Bitset
943rx_copy_cset (struct rx *rx, rx_Bitset a)
944#else
945RX_DECL rx_Bitset
946rx_copy_cset (rx, a)
947 struct rx *rx;
948 rx_Bitset a;
949#endif
950{
951 rx_Bitset cs = rx_cset (rx);
952
953 if (cs)
954 rx_bitset_union (rx->local_cset_size, cs, a);
955
956 return cs;
957}
958
959
960#ifdef __STDC__
961RX_DECL void
962rx_free_cset (struct rx * rx, rx_Bitset c)
963#else
964RX_DECL void
965rx_free_cset (rx, c)
966 struct rx * rx;
967 rx_Bitset c;
968#endif
969{
970 if (c)
971 free ((char *)c);
972}
973
974
975
976/* Hash table memory allocation policy for the regexp compiler */
977
978#ifdef __STDC__
979static struct rx_hash *
980compiler_hash_alloc (struct rx_hash_rules * rules)
981#else
982static struct rx_hash *
983compiler_hash_alloc (rules)
984 struct rx_hash_rules * rules;
985#endif
986{
987 return (struct rx_hash *)malloc (sizeof (struct rx_hash));
988}
989
990
991#ifdef __STDC__
992static struct rx_hash_item *
993compiler_hash_item_alloc (struct rx_hash_rules * rules, void * value)
994#else
995static struct rx_hash_item *
996compiler_hash_item_alloc (rules, value)
997 struct rx_hash_rules * rules;
998 void * value;
999#endif
1000{
1001 struct rx_hash_item * it;
1002 it = (struct rx_hash_item *)malloc (sizeof (*it));
1003 if (it)
1004 {
1005 it->data = value;
1006 it->binding = 0;
1007 }
1008 return it;
1009}
1010
1011
1012#ifdef __STDC__
1013static void
1014compiler_free_hash (struct rx_hash * tab,
1015 struct rx_hash_rules * rules)
1016#else
1017static void
1018compiler_free_hash (tab, rules)
1019 struct rx_hash * tab;
1020 struct rx_hash_rules * rules;
1021#endif
1022{
1023 free ((char *)tab);
1024}
1025
1026
1027#ifdef __STDC__
1028static void
1029compiler_free_hash_item (struct rx_hash_item * item,
1030 struct rx_hash_rules * rules)
1031#else
1032static void
1033compiler_free_hash_item (item, rules)
1034 struct rx_hash_item * item;
1035 struct rx_hash_rules * rules;
1036#endif
1037{
1038 free ((char *)item);
1039}
1040
1041
1042
1043/* This page: REXP_NODE (expression tree) structures. */
1044
1045#ifdef __STDC__
1046RX_DECL struct rexp_node *
1047rexp_node (struct rx *rx,
1048 enum rexp_node_type type)
1049#else
1050RX_DECL struct rexp_node *
1051rexp_node (rx, type)
1052 struct rx *rx;
1053 enum rexp_node_type type;
1054#endif
1055{
1056 struct rexp_node *n;
1057
1058 n = (struct rexp_node *)malloc (sizeof (*n));
1059 bzero (n, sizeof (*n));
1060 if (n)
1061 n->type = type;
1062 return n;
1063}
1064
1065
1066/* free_rexp_node assumes that the bitset passed to rx_mk_r_cset
1067 * can be freed using rx_free_cset.
1068 */
1069#ifdef __STDC__
1070RX_DECL struct rexp_node *
1071rx_mk_r_cset (struct rx * rx,
1072 rx_Bitset b)
1073#else
1074RX_DECL struct rexp_node *
1075rx_mk_r_cset (rx, b)
1076 struct rx * rx;
1077 rx_Bitset b;
1078#endif
1079{
1080 struct rexp_node * n = rexp_node (rx, r_cset);
1081 if (n)
1082 n->params.cset = b;
1083 return n;
1084}
1085
1086
1087#ifdef __STDC__
1088RX_DECL struct rexp_node *
1089rx_mk_r_concat (struct rx * rx,
1090 struct rexp_node * a,
1091 struct rexp_node * b)
1092#else
1093RX_DECL struct rexp_node *
1094rx_mk_r_concat (rx, a, b)
1095 struct rx * rx;
1096 struct rexp_node * a;
1097 struct rexp_node * b;
1098#endif
1099{
1100 struct rexp_node * n = rexp_node (rx, r_concat);
1101 if (n)
1102 {
1103 n->params.pair.left = a;
1104 n->params.pair.right = b;
1105 }
1106 return n;
1107}
1108
1109
1110#ifdef __STDC__
1111RX_DECL struct rexp_node *
1112rx_mk_r_alternate (struct rx * rx,
1113 struct rexp_node * a,
1114 struct rexp_node * b)
1115#else
1116RX_DECL struct rexp_node *
1117rx_mk_r_alternate (rx, a, b)
1118 struct rx * rx;
1119 struct rexp_node * a;
1120 struct rexp_node * b;
1121#endif
1122{
1123 struct rexp_node * n = rexp_node (rx, r_alternate);
1124 if (n)
1125 {
1126 n->params.pair.left = a;
1127 n->params.pair.right = b;
1128 }
1129 return n;
1130}
1131
1132
1133#ifdef __STDC__
1134RX_DECL struct rexp_node *
1135rx_mk_r_opt (struct rx * rx,
1136 struct rexp_node * a)
1137#else
1138RX_DECL struct rexp_node *
1139rx_mk_r_opt (rx, a)
1140 struct rx * rx;
1141 struct rexp_node * a;
1142#endif
1143{
1144 struct rexp_node * n = rexp_node (rx, r_opt);
1145 if (n)
1146 {
1147 n->params.pair.left = a;
1148 n->params.pair.right = 0;
1149 }
1150 return n;
1151}
1152
1153
1154#ifdef __STDC__
1155RX_DECL struct rexp_node *
1156rx_mk_r_star (struct rx * rx,
1157 struct rexp_node * a)
1158#else
1159RX_DECL struct rexp_node *
1160rx_mk_r_star (rx, a)
1161 struct rx * rx;
1162 struct rexp_node * a;
1163#endif
1164{
1165 struct rexp_node * n = rexp_node (rx, r_star);
1166 if (n)
1167 {
1168 n->params.pair.left = a;
1169 n->params.pair.right = 0;
1170 }
1171 return n;
1172}
1173
1174
1175#ifdef __STDC__
1176RX_DECL struct rexp_node *
1177rx_mk_r_2phase_star (struct rx * rx,
1178 struct rexp_node * a,
1179 struct rexp_node * b)
1180#else
1181RX_DECL struct rexp_node *
1182rx_mk_r_2phase_star (rx, a, b)
1183 struct rx * rx;
1184 struct rexp_node * a;
1185 struct rexp_node * b;
1186#endif
1187{
1188 struct rexp_node * n = rexp_node (rx, r_2phase_star);
1189 if (n)
1190 {
1191 n->params.pair.left = a;
1192 n->params.pair.right = b;
1193 }
1194 return n;
1195}
1196
1197
1198#ifdef __STDC__
1199RX_DECL struct rexp_node *
1200rx_mk_r_side_effect (struct rx * rx,
1201 rx_side_effect a)
1202#else
1203RX_DECL struct rexp_node *
1204rx_mk_r_side_effect (rx, a)
1205 struct rx * rx;
1206 rx_side_effect a;
1207#endif
1208{
1209 struct rexp_node * n = rexp_node (rx, r_side_effect);
1210 if (n)
1211 {
1212 n->params.side_effect = a;
1213 n->params.pair.right = 0;
1214 }
1215 return n;
1216}
1217
1218
1219#ifdef __STDC__
1220RX_DECL struct rexp_node *
1221rx_mk_r_data (struct rx * rx,
1222 void * a)
1223#else
1224RX_DECL struct rexp_node *
1225rx_mk_r_data (rx, a)
1226 struct rx * rx;
1227 void * a;
1228#endif
1229{
1230 struct rexp_node * n = rexp_node (rx, r_data);
1231 if (n)
1232 {
1233 n->params.pair.left = a;
1234 n->params.pair.right = 0;
1235 }
1236 return n;
1237}
1238
1239
1240#ifdef __STDC__
1241RX_DECL void
1242rx_free_rexp (struct rx * rx, struct rexp_node * node)
1243#else
1244RX_DECL void
1245rx_free_rexp (rx, node)
1246 struct rx * rx;
1247 struct rexp_node * node;
1248#endif
1249{
1250 if (node)
1251 {
1252 switch (node->type)
1253 {
1254 case r_cset:
1255 if (node->params.cset)
1256 rx_free_cset (rx, node->params.cset);
1257
1258 case r_side_effect:
1259 break;
1260
1261 case r_concat:
1262 case r_alternate:
1263 case r_2phase_star:
1264 case r_opt:
1265 case r_star:
1266 rx_free_rexp (rx, node->params.pair.left);
1267 rx_free_rexp (rx, node->params.pair.right);
1268 break;
1269
1270 case r_data:
1271 /* This shouldn't occur. */
1272 break;
1273 }
1274 free ((char *)node);
1275 }
1276}
1277
1278
1279#ifdef __STDC__
1280RX_DECL struct rexp_node *
1281rx_copy_rexp (struct rx *rx,
1282 struct rexp_node *node)
1283#else
1284RX_DECL struct rexp_node *
1285rx_copy_rexp (rx, node)
1286 struct rx *rx;
1287 struct rexp_node *node;
1288#endif
1289{
1290 if (!node)
1291 return 0;
1292 else
1293 {
1294 struct rexp_node *n = rexp_node (rx, node->type);
1295 if (!n)
1296 return 0;
1297 switch (node->type)
1298 {
1299 case r_cset:
1300 n->params.cset = rx_copy_cset (rx, node->params.cset);
1301 if (!n->params.cset)
1302 {
1303 rx_free_rexp (rx, n);
1304 return 0;
1305 }
1306 break;
1307
1308 case r_side_effect:
1309 n->params.side_effect = node->params.side_effect;
1310 break;
1311
1312 case r_concat:
1313 case r_alternate:
1314 case r_opt:
1315 case r_2phase_star:
1316 case r_star:
1317 n->params.pair.left =
1318 rx_copy_rexp (rx, node->params.pair.left);
1319 n->params.pair.right =
1320 rx_copy_rexp (rx, node->params.pair.right);
1321 if ( (node->params.pair.left && !n->params.pair.left)
1322 || (node->params.pair.right && !n->params.pair.right))
1323 {
1324 rx_free_rexp (rx, n);
1325 return 0;
1326 }
1327 break;
1328 case r_data:
1329 /* shouldn't happen */
1330 break;
1331 }
1332 return n;
1333 }
1334}
1335
1336
1337
1338
1339/* This page: functions to build and destroy graphs that describe nfa's */
1340
1341/* Constructs a new nfa node. */
1342#ifdef __STDC__
1343RX_DECL struct rx_nfa_state *
1344rx_nfa_state (struct rx *rx)
1345#else
1346RX_DECL struct rx_nfa_state *
1347rx_nfa_state (rx)
1348 struct rx *rx;
1349#endif
1350{
1351 struct rx_nfa_state * n = (struct rx_nfa_state *)malloc (sizeof (*n));
1352 if (!n)
1353 return 0;
1354 bzero (n, sizeof (*n));
1355 n->next = rx->nfa_states;
1356 rx->nfa_states = n;
1357 return n;
1358}
1359
1360
1361#ifdef __STDC__
1362RX_DECL void
1363rx_free_nfa_state (struct rx_nfa_state * n)
1364#else
1365RX_DECL void
1366rx_free_nfa_state (n)
1367 struct rx_nfa_state * n;
1368#endif
1369{
1370 free ((char *)n);
1371}
1372
1373
1374/* This looks up an nfa node, given a numeric id. Numeric id's are
1375 * assigned after the nfa has been built.
1376 */
1377#ifdef __STDC__
1378RX_DECL struct rx_nfa_state *
1379rx_id_to_nfa_state (struct rx * rx,
1380 int id)
1381#else
1382RX_DECL struct rx_nfa_state *
1383rx_id_to_nfa_state (rx, id)
1384 struct rx * rx;
1385 int id;
1386#endif
1387{
1388 struct rx_nfa_state * n;
1389 for (n = rx->nfa_states; n; n = n->next)
1390 if (n->id == id)
1391 return n;
1392 return 0;
1393}
1394
1395
1396/* This adds an edge between two nodes, but doesn't initialize the
1397 * edge label.
1398 */
1399
1400#ifdef __STDC__
1401RX_DECL struct rx_nfa_edge *
1402rx_nfa_edge (struct rx *rx,
1403 enum rx_nfa_etype type,
1404 struct rx_nfa_state *start,
1405 struct rx_nfa_state *dest)
1406#else
1407RX_DECL struct rx_nfa_edge *
1408rx_nfa_edge (rx, type, start, dest)
1409 struct rx *rx;
1410 enum rx_nfa_etype type;
1411 struct rx_nfa_state *start;
1412 struct rx_nfa_state *dest;
1413#endif
1414{
1415 struct rx_nfa_edge *e;
1416 e = (struct rx_nfa_edge *)malloc (sizeof (*e));
1417 if (!e)
1418 return 0;
1419 e->next = start->edges;
1420 start->edges = e;
1421 e->type = type;
1422 e->dest = dest;
1423 return e;
1424}
1425
1426
1427#ifdef __STDC__
1428RX_DECL void
1429rx_free_nfa_edge (struct rx_nfa_edge * e)
1430#else
1431RX_DECL void
1432rx_free_nfa_edge (e)
1433 struct rx_nfa_edge * e;
1434#endif
1435{
1436 free ((char *)e);
1437}
1438
1439
1440/* This constructs a POSSIBLE_FUTURE, which is a kind epsilon-closure
1441 * of an NFA. These are added to an nfa automaticly by eclose_nfa.
1442 */
1443
1444#ifdef __STDC__
1445static struct rx_possible_future *
1446rx_possible_future (struct rx * rx,
1447 struct rx_se_list * effects)
1448#else
1449static struct rx_possible_future *
1450rx_possible_future (rx, effects)
1451 struct rx * rx;
1452 struct rx_se_list * effects;
1453#endif
1454{
1455 struct rx_possible_future *ec;
1456 ec = (struct rx_possible_future *) malloc (sizeof (*ec));
1457 if (!ec)
1458 return 0;
1459 ec->destset = 0;
1460 ec->next = 0;
1461 ec->effects = effects;
1462 return ec;
1463}
1464
1465
1466#ifdef __STDC__
1467static void
1468rx_free_possible_future (struct rx_possible_future * pf)
1469#else
1470static void
1471rx_free_possible_future (pf)
1472 struct rx_possible_future * pf;
1473#endif
1474{
1475 free ((char *)pf);
1476}
1477
1478
1479#ifdef __STDC__
1480RX_DECL void
1481rx_free_nfa (struct rx *rx)
1482#else
1483RX_DECL void
1484rx_free_nfa (rx)
1485 struct rx *rx;
1486#endif
1487{
1488 while (rx->nfa_states)
1489 {
1490 while (rx->nfa_states->edges)
1491 {
1492 switch (rx->nfa_states->edges->type)
1493 {
1494 case ne_cset:
1495 rx_free_cset (rx, rx->nfa_states->edges->params.cset);
1496 break;
1497 default:
1498 break;
1499 }
1500 {
1501 struct rx_nfa_edge * e;
1502 e = rx->nfa_states->edges;
1503 rx->nfa_states->edges = rx->nfa_states->edges->next;
1504 rx_free_nfa_edge (e);
1505 }
1506 } /* while (rx->nfa_states->edges) */
1507 {
1508 /* Iterate over the partial epsilon closures of rx->nfa_states */
1509 struct rx_possible_future * pf = rx->nfa_states->futures;
1510 while (pf)
1511 {
1512 struct rx_possible_future * pft = pf;
1513 pf = pf->next;
1514 rx_free_possible_future (pft);
1515 }
1516 }
1517 {
1518 struct rx_nfa_state *n;
1519 n = rx->nfa_states;
1520 rx->nfa_states = rx->nfa_states->next;
1521 rx_free_nfa_state (n);
1522 }
1523 }
1524}
1525
1526
1527
1528
1529/* This page: translating a pattern expression into an nfa and doing the
1530 * static part of the nfa->super-nfa translation.
1531 */
1532
1533/* This is the thompson regexp->nfa algorithm.
1534 * It is modified to allow for `side-effect epsilons.' Those are
1535 * edges that are taken whenever a similar epsilon edge would be,
1536 * but which imply that some side effect occurs when the edge
1537 * is taken.
1538 *
1539 * Side effects are used to model parts of the pattern langauge
1540 * that are not regular (in the formal sense).
1541 */
1542
1543#ifdef __STDC__
1544RX_DECL int
1545rx_build_nfa (struct rx *rx,
1546 struct rexp_node *rexp,
1547 struct rx_nfa_state **start,
1548 struct rx_nfa_state **end)
1549#else
1550RX_DECL int
1551rx_build_nfa (rx, rexp, start, end)
1552 struct rx *rx;
1553 struct rexp_node *rexp;
1554 struct rx_nfa_state **start;
1555 struct rx_nfa_state **end;
1556#endif
1557{
1558 struct rx_nfa_edge *edge;
1559
1560 /* Start & end nodes may have been allocated by the caller. */
1561 *start = *start ? *start : rx_nfa_state (rx);
1562
1563 if (!*start)
1564 return 0;
1565
1566 if (!rexp)
1567 {
1568 *end = *start;
1569 return 1;
1570 }
1571
1572 *end = *end ? *end : rx_nfa_state (rx);
1573
1574 if (!*end)
1575 {
1576 rx_free_nfa_state (*start);
1577 return 0;
1578 }
1579
1580 switch (rexp->type)
1581 {
1582 case r_data:
1583 return 0;
1584
1585 case r_cset:
1586 edge = rx_nfa_edge (rx, ne_cset, *start, *end);
1587 if (!edge)
1588 return 0;
1589 edge->params.cset = rx_copy_cset (rx, rexp->params.cset);
1590 if (!edge->params.cset)
1591 {
1592 rx_free_nfa_edge (edge);
1593 return 0;
1594 }
1595 return 1;
1596
1597 case r_opt:
1598 return (rx_build_nfa (rx, rexp->params.pair.left, start, end)
1599 && rx_nfa_edge (rx, ne_epsilon, *start, *end));
1600
1601 case r_star:
1602 {
1603 struct rx_nfa_state * star_start = 0;
1604 struct rx_nfa_state * star_end = 0;
1605 return (rx_build_nfa (rx, rexp->params.pair.left,
1606 &star_start, &star_end)
1607 && star_start
1608 && star_end
1609 && rx_nfa_edge (rx, ne_epsilon, star_start, star_end)
1610 && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
1611 && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
1612
1613 && rx_nfa_edge (rx, ne_epsilon, star_end, star_start));
1614 }
1615
1616 case r_2phase_star:
1617 {
1618 struct rx_nfa_state * star_start = 0;
1619 struct rx_nfa_state * star_end = 0;
1620 struct rx_nfa_state * loop_exp_start = 0;
1621 struct rx_nfa_state * loop_exp_end = 0;
1622
1623 return (rx_build_nfa (rx, rexp->params.pair.left,
1624 &star_start, &star_end)
1625 && rx_build_nfa (rx, rexp->params.pair.right,
1626 &loop_exp_start, &loop_exp_end)
1627 && star_start
1628 && star_end
1629 && loop_exp_end
1630 && loop_exp_start
1631 && rx_nfa_edge (rx, ne_epsilon, star_start, *end)
1632 && rx_nfa_edge (rx, ne_epsilon, *start, star_start)
1633 && rx_nfa_edge (rx, ne_epsilon, star_end, *end)
1634
1635 && rx_nfa_edge (rx, ne_epsilon, star_end, loop_exp_start)
1636 && rx_nfa_edge (rx, ne_epsilon, loop_exp_end, star_start));
1637 }
1638
1639
1640 case r_concat:
1641 {
1642 struct rx_nfa_state *shared = 0;
1643 return
1644 (rx_build_nfa (rx, rexp->params.pair.left, start, &shared)
1645 && rx_build_nfa (rx, rexp->params.pair.right, &shared, end));
1646 }
1647
1648 case r_alternate:
1649 {
1650 struct rx_nfa_state *ls = 0;
1651 struct rx_nfa_state *le = 0;
1652 struct rx_nfa_state *rs = 0;
1653 struct rx_nfa_state *re = 0;
1654 return (rx_build_nfa (rx, rexp->params.pair.left, &ls, &le)
1655 && rx_build_nfa (rx, rexp->params.pair.right, &rs, &re)
1656 && rx_nfa_edge (rx, ne_epsilon, *start, ls)
1657 && rx_nfa_edge (rx, ne_epsilon, *start, rs)
1658 && rx_nfa_edge (rx, ne_epsilon, le, *end)
1659 && rx_nfa_edge (rx, ne_epsilon, re, *end));
1660 }
1661
1662 case r_side_effect:
1663 edge = rx_nfa_edge (rx, ne_side_effect, *start, *end);
1664 if (!edge)
1665 return 0;
1666 edge->params.side_effect = rexp->params.side_effect;
1667 return 1;
1668 }
1669
1670 /* this should never happen */
1671 return 0;
1672}
1673
1674
1675/* RX_NAME_NFA_STATES identifies all nodes with outgoing non-epsilon
1676 * transitions. Only these nodes can occur in super-states.
1677 * All nodes are given an integer id.
1678 * The id is non-negative if the node has non-epsilon out-transitions, negative
1679 * otherwise (this is because we want the non-negative ids to be used as
1680 * array indexes in a few places).
1681 */
1682
1683#ifdef __STDC__
1684RX_DECL void
1685rx_name_nfa_states (struct rx *rx)
1686#else
1687RX_DECL void
1688rx_name_nfa_states (rx)
1689 struct rx *rx;
1690#endif
1691{
1692 struct rx_nfa_state *n = rx->nfa_states;
1693
1694 rx->nodec = 0;
1695 rx->epsnodec = -1;
1696
1697 while (n)
1698 {
1699 struct rx_nfa_edge *e = n->edges;
1700
1701 if (n->is_start)
1702 n->eclosure_needed = 1;
1703
1704 while (e)
1705 {
1706 switch (e->type)
1707 {
1708 case ne_epsilon:
1709 case ne_side_effect:
1710 break;
1711
1712 case ne_cset:
1713 n->id = rx->nodec++;
1714 {
1715 struct rx_nfa_edge *from_n = n->edges;
1716 while (from_n)
1717 {
1718 from_n->dest->eclosure_needed = 1;
1719 from_n = from_n->next;
1720 }
1721 }
1722 goto cont;
1723 }
1724 e = e->next;
1725 }
1726 n->id = rx->epsnodec--;
1727 cont:
1728 n = n->next;
1729 }
1730 rx->epsnodec = -rx->epsnodec;
1731}
1732
1733
1734
1735/* This page: data structures for the static part of the nfa->supernfa
1736 * translation.
1737 *
1738 * There are side effect lists -- lists of side effects occuring
1739 * along an uninterrupted, acyclic path of side-effect epsilon edges.
1740 * Such paths are collapsed to single edges in the course of computing
1741 * epsilon closures. Such single edges are labled with a list of all
1742 * the side effects entailed in crossing them. Like lists of side
1743 * effects are made == by the constructors below.
1744 *
1745 * There are also nfa state sets. These are used to hold a list of all
1746 * states reachable from a starting state for a given type of transition
1747 * and side effect list. These are also hash-consed.
1748 */
1749
1750/* The next several functions compare, construct, etc. lists of side
1751 * effects. See ECLOSE_NFA (below) for details.
1752 */
1753
1754/* Ordering of rx_se_list
1755 * (-1, 0, 1 return value convention).
1756 */
1757
1758#ifdef __STDC__
1759static int
1760se_list_cmp (void * va, void * vb)
1761#else
1762static int
1763se_list_cmp (va, vb)
1764 void * va;
1765 void * vb;
1766#endif
1767{
1768 struct rx_se_list * a = (struct rx_se_list *)va;
1769 struct rx_se_list * b = (struct rx_se_list *)vb;
1770
1771 return ((va == vb)
1772 ? 0
1773 : (!va
1774 ? -1
1775 : (!vb
1776 ? 1
1777 : ((long)a->car < (long)b->car
1778 ? 1
1779 : ((long)a->car > (long)b->car
1780 ? -1
1781 : se_list_cmp ((void *)a->cdr, (void *)b->cdr))))));
1782}
1783
1784
1785#ifdef __STDC__
1786static int
1787se_list_equal (void * va, void * vb)
1788#else
1789static int
1790se_list_equal (va, vb)
1791 void * va;
1792 void * vb;
1793#endif
1794{
1795 return !(se_list_cmp (va, vb));
1796}
1797
1798static struct rx_hash_rules se_list_hash_rules =
1799{
1800 se_list_equal,
1801 compiler_hash_alloc,
1802 compiler_free_hash,
1803 compiler_hash_item_alloc,
1804 compiler_free_hash_item
1805};
1806
1807
1808#ifdef __STDC__
1809static struct rx_se_list *
1810side_effect_cons (struct rx * rx,
1811 void * se, struct rx_se_list * list)
1812#else
1813static struct rx_se_list *
1814side_effect_cons (rx, se, list)
1815 struct rx * rx;
1816 void * se;
1817 struct rx_se_list * list;
1818#endif
1819{
1820 struct rx_se_list * l;
1821 l = ((struct rx_se_list *) malloc (sizeof (*l)));
1822 if (!l)
1823 return 0;
1824 l->car = se;
1825 l->cdr = list;
1826 return l;
1827}
1828
1829
1830#ifdef __STDC__
1831static struct rx_se_list *
1832hash_cons_se_prog (struct rx * rx,
1833 struct rx_hash * memo,
1834 void * car, struct rx_se_list * cdr)
1835#else
1836static struct rx_se_list *
1837hash_cons_se_prog (rx, memo, car, cdr)
1838 struct rx * rx;
1839 struct rx_hash * memo;
1840 void * car;
1841 struct rx_se_list * cdr;
1842#endif
1843{
1844 long hash = (long)car ^ (long)cdr;
1845 struct rx_se_list template;
1846
1847 template.car = car;
1848 template.cdr = cdr;
1849 {
1850 struct rx_hash_item * it = rx_hash_store (memo, hash,
1851 (void *)&template,
1852 &se_list_hash_rules);
1853 if (!it)
1854 return 0;
1855 if (it->data == (void *)&template)
1856 {
1857 struct rx_se_list * consed;
1858 consed = (struct rx_se_list *) malloc (sizeof (*consed));
1859 *consed = template;
1860 it->data = (void *)consed;
1861 }
1862 return (struct rx_se_list *)it->data;
1863 }
1864}
1865
1866
1867#ifdef __STDC__
1868static struct rx_se_list *
1869hash_se_prog (struct rx * rx, struct rx_hash * memo, struct rx_se_list * prog)
1870#else
1871static struct rx_se_list *
1872hash_se_prog (rx, memo, prog)
1873 struct rx * rx;
1874 struct rx_hash * memo;
1875 struct rx_se_list * prog;
1876#endif
1877{
1878 struct rx_se_list * answer = 0;
1879 while (prog)
1880 {
1881 answer = hash_cons_se_prog (rx, memo, prog->car, answer);
1882 if (!answer)
1883 return 0;
1884 prog = prog->cdr;
1885 }
1886 return answer;
1887}
1888
1889#ifdef __STDC__
1890static int
1891nfa_set_cmp (void * va, void * vb)
1892#else
1893static int
1894nfa_set_cmp (va, vb)
1895 void * va;
1896 void * vb;
1897#endif
1898{
1899 struct rx_nfa_state_set * a = (struct rx_nfa_state_set *)va;
1900 struct rx_nfa_state_set * b = (struct rx_nfa_state_set *)vb;
1901
1902 return ((va == vb)
1903 ? 0
1904 : (!va
1905 ? -1
1906 : (!vb
1907 ? 1
1908 : (a->car->id < b->car->id
1909 ? 1
1910 : (a->car->id > b->car->id
1911 ? -1
1912 : nfa_set_cmp ((void *)a->cdr, (void *)b->cdr))))));
1913}
1914
1915#ifdef __STDC__
1916static int
1917nfa_set_equal (void * va, void * vb)
1918#else
1919static int
1920nfa_set_equal (va, vb)
1921 void * va;
1922 void * vb;
1923#endif
1924{
1925 return !nfa_set_cmp (va, vb);
1926}
1927
1928static struct rx_hash_rules nfa_set_hash_rules =
1929{
1930 nfa_set_equal,
1931 compiler_hash_alloc,
1932 compiler_free_hash,
1933 compiler_hash_item_alloc,
1934 compiler_free_hash_item
1935};
1936
1937
1938#ifdef __STDC__
1939static struct rx_nfa_state_set *
1940nfa_set_cons (struct rx * rx,
1941 struct rx_hash * memo, struct rx_nfa_state * state,
1942 struct rx_nfa_state_set * set)
1943#else
1944static struct rx_nfa_state_set *
1945nfa_set_cons (rx, memo, state, set)
1946 struct rx * rx;
1947 struct rx_hash * memo;
1948 struct rx_nfa_state * state;
1949 struct rx_nfa_state_set * set;
1950#endif
1951{
1952 struct rx_nfa_state_set template;
1953 struct rx_hash_item * node;
1954 template.car = state;
1955 template.cdr = set;
1956 node = rx_hash_store (memo,
1957 (((long)state) >> 8) ^ (long)set,
1958 &template, &nfa_set_hash_rules);
1959 if (!node)
1960 return 0;
1961 if (node->data == &template)
1962 {
1963 struct rx_nfa_state_set * l;
1964 l = (struct rx_nfa_state_set *) malloc (sizeof (*l));
1965 node->data = (void *) l;
1966 if (!l)
1967 return 0;
1968 *l = template;
1969 }
1970 return (struct rx_nfa_state_set *)node->data;
1971}
1972
1973
1974#ifdef __STDC__
1975static struct rx_nfa_state_set *
1976nfa_set_enjoin (struct rx * rx,
1977 struct rx_hash * memo, struct rx_nfa_state * state,
1978 struct rx_nfa_state_set * set)
1979#else
1980static struct rx_nfa_state_set *
1981nfa_set_enjoin (rx, memo, state, set)
1982 struct rx * rx;
1983 struct rx_hash * memo;
1984 struct rx_nfa_state * state;
1985 struct rx_nfa_state_set * set;
1986#endif
1987{
1988 if (!set || state->id < set->car->id)
1989 return nfa_set_cons (rx, memo, state, set);
1990 if (state->id == set->car->id)
1991 return set;
1992 else
1993 {
1994 struct rx_nfa_state_set * newcdr
1995 = nfa_set_enjoin (rx, memo, state, set->cdr);
1996 if (newcdr != set->cdr)
1997 set = nfa_set_cons (rx, memo, set->car, newcdr);
1998 return set;
1999 }
2000}
2001
2002
2003
2004
2005/* This page: computing epsilon closures. The closures aren't total.
2006 * Each node's closures are partitioned according to the side effects entailed
2007 * along the epsilon edges. Return true on success.
2008 */
2009
2010struct eclose_frame
2011{
2012 struct rx_se_list *prog_backwards;
2013};
2014
2015
2016#ifdef __STDC__
2017static int
2018eclose_node (struct rx *rx, struct rx_nfa_state *outnode,
2019 struct rx_nfa_state *node, struct eclose_frame *frame)
2020#else
2021static int
2022eclose_node (rx, outnode, node, frame)
2023 struct rx *rx;
2024 struct rx_nfa_state *outnode;
2025 struct rx_nfa_state *node;
2026 struct eclose_frame *frame;
2027#endif
2028{
2029 struct rx_nfa_edge *e = node->edges;
2030
2031 /* For each node, we follow all epsilon paths to build the closure.
2032 * The closure omits nodes that have only epsilon edges.
2033 * The closure is split into partial closures -- all the states in
2034 * a partial closure are reached by crossing the same list of
2035 * of side effects (though not necessarily the same path).
2036 */
2037 if (node->mark)
2038 return 1;
2039 node->mark = 1;
2040
2041 if (node->id >= 0 || node->is_final)
2042 {
2043 struct rx_possible_future **ec;
2044 struct rx_se_list * prog_in_order
2045 = ((struct rx_se_list *)hash_se_prog (rx,
2046 &rx->se_list_memo,
2047 frame->prog_backwards));
2048 int cmp;
2049
2050 ec = &outnode->futures;
2051
2052 while (*ec)
2053 {
2054 cmp = se_list_cmp ((void *)(*ec)->effects, (void *)prog_in_order);
2055 if (cmp <= 0)
2056 break;
2057 ec = &(*ec)->next;
2058 }
2059 if (!*ec || (cmp < 0))
2060 {
2061 struct rx_possible_future * saved = *ec;
2062 *ec = rx_possible_future (rx, prog_in_order);
2063 (*ec)->next = saved;
2064 if (!*ec)
2065 return 0;
2066 }
2067 if (node->id >= 0)
2068 {
2069 (*ec)->destset = nfa_set_enjoin (rx, &rx->set_list_memo,
2070 node, (*ec)->destset);
2071 if (!(*ec)->destset)
2072 return 0;
2073 }
2074 }
2075
2076 while (e)
2077 {
2078 switch (e->type)
2079 {
2080 case ne_epsilon:
2081 if (!eclose_node (rx, outnode, e->dest, frame))
2082 return 0;
2083 break;
2084 case ne_side_effect:
2085 {
2086 frame->prog_backwards = side_effect_cons (rx,
2087 e->params.side_effect,
2088 frame->prog_backwards);
2089 if (!frame->prog_backwards)
2090 return 0;
2091 if (!eclose_node (rx, outnode, e->dest, frame))
2092 return 0;
2093 {
2094 struct rx_se_list * dying = frame->prog_backwards;
2095 frame->prog_backwards = frame->prog_backwards->cdr;
2096 free ((char *)dying);
2097 }
2098 break;
2099 }
2100 default:
2101 break;
2102 }
2103 e = e->next;
2104 }
2105 node->mark = 0;
2106 return 1;
2107}
2108
2109
2110#ifdef __STDC__
2111RX_DECL int
2112rx_eclose_nfa (struct rx *rx)
2113#else
2114RX_DECL int
2115rx_eclose_nfa (rx)
2116 struct rx *rx;
2117#endif
2118{
2119 struct rx_nfa_state *n = rx->nfa_states;
2120 struct eclose_frame frame;
2121 static int rx_id = 0;
2122
2123 frame.prog_backwards = 0;
2124 rx->rx_id = rx_id++;
2125 bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
2126 bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
2127 while (n)
2128 {
2129 n->futures = 0;
2130 if (n->eclosure_needed && !eclose_node (rx, n, n, &frame))
2131 return 0;
2132 /* clear_marks (rx); */
2133 n = n->next;
2134 }
2135 return 1;
2136}
2137
2138
2139/* This deletes epsilon edges from an NFA. After running eclose_node,
2140 * we have no more need for these edges. They are removed to simplify
2141 * further operations on the NFA.
2142 */
2143
2144#ifdef __STDC__
2145RX_DECL void
2146rx_delete_epsilon_transitions (struct rx *rx)
2147#else
2148RX_DECL void
2149rx_delete_epsilon_transitions (rx)
2150 struct rx *rx;
2151#endif
2152{
2153 struct rx_nfa_state *n = rx->nfa_states;
2154 struct rx_nfa_edge **e;
2155
2156 while (n)
2157 {
2158 e = &n->edges;
2159 while (*e)
2160 {
2161 struct rx_nfa_edge *t;
2162 switch ((*e)->type)
2163 {
2164 case ne_epsilon:
2165 case ne_side_effect:
2166 t = *e;
2167 *e = t->next;
2168 rx_free_nfa_edge (t);
2169 break;
2170
2171 default:
2172 e = &(*e)->next;
2173 break;
2174 }
2175 }
2176 n = n->next;
2177 }
2178}
2179
2180
2181
2182/* This page: storing the nfa in a contiguous region of memory for
2183 * subsequent conversion to a super-nfa.
2184 */
2185
2186/* This is for qsort on an array of nfa_states. The order
2187 * is based on state ids and goes
2188 * [0...MAX][MIN..-1] where (MAX>=0) and (MIN<0)
2189 * This way, positive ids double as array indices.
2190 */
2191
2192#ifdef __STDC__
2193static int
2194nfacmp (void * va, void * vb)
2195#else
2196static int
2197nfacmp (va, vb)
2198 void * va;
2199 void * vb;
2200#endif
2201{
2202 struct rx_nfa_state **a = (struct rx_nfa_state **)va;
2203 struct rx_nfa_state **b = (struct rx_nfa_state **)vb;
2204 return (*a == *b /* &&&& 3.18 */
2205 ? 0
2206 : (((*a)->id < 0) == ((*b)->id < 0)
2207 ? (((*a)->id < (*b)->id) ? -1 : 1)
2208 : (((*a)->id < 0)
2209 ? 1 : -1)));
2210}
2211
2212#ifdef __STDC__
2213static int
2214count_hash_nodes (struct rx_hash * st)
2215#else
2216static int
2217count_hash_nodes (st)
2218 struct rx_hash * st;
2219#endif
2220{
2221 int x;
2222 int count = 0;
2223 for (x = 0; x < 13; ++x)
2224 count += ((st->children[x])
2225 ? count_hash_nodes (st->children[x])
2226 : st->bucket_size[x]);
2227
2228 return count;
2229}
2230
2231
2232#ifdef __STDC__
2233static void
2234se_memo_freer (struct rx_hash_item * node)
2235#else
2236static void
2237se_memo_freer (node)
2238 struct rx_hash_item * node;
2239#endif
2240{
2241 free ((char *)node->data);
2242}
2243
2244
2245#ifdef __STDC__
2246static void
2247nfa_set_freer (struct rx_hash_item * node)
2248#else
2249static void
2250nfa_set_freer (node)
2251 struct rx_hash_item * node;
2252#endif
2253{
2254 free ((char *)node->data);
2255}
2256
2257
2258/* This copies an entire NFA into a single malloced block of memory.
2259 * Mostly this is for compatability with regex.c, though it is convenient
2260 * to have the nfa nodes in an array.
2261 */
2262
2263#ifdef __STDC__
2264RX_DECL int
2265rx_compactify_nfa (struct rx *rx,
2266 void **mem, unsigned long *size)
2267#else
2268RX_DECL int
2269rx_compactify_nfa (rx, mem, size)
2270 struct rx *rx;
2271 void **mem;
2272 unsigned long *size;
2273#endif
2274{
2275 int total_nodec;
2276 struct rx_nfa_state *n;
2277 int edgec = 0;
2278 int eclosec = 0;
2279 int se_list_consc = count_hash_nodes (&rx->se_list_memo);
2280 int nfa_setc = count_hash_nodes (&rx->set_list_memo);
2281 unsigned long total_size;
2282
2283 /* This takes place in two stages. First, the total size of the
2284 * nfa is computed, then structures are copied.
2285 */
2286 n = rx->nfa_states;
2287 total_nodec = 0;
2288 while (n)
2289 {
2290 struct rx_nfa_edge *e = n->edges;
2291 struct rx_possible_future *ec = n->futures;
2292 ++total_nodec;
2293 while (e)
2294 {
2295 ++edgec;
2296 e = e->next;
2297 }
2298 while (ec)
2299 {
2300 ++eclosec;
2301 ec = ec->next;
2302 }
2303 n = n->next;
2304 }
2305
2306 total_size = (total_nodec * sizeof (struct rx_nfa_state)
2307 + edgec * rx_sizeof_bitset (rx->local_cset_size)
2308 + edgec * sizeof (struct rx_nfa_edge)
2309 + nfa_setc * sizeof (struct rx_nfa_state_set)
2310 + eclosec * sizeof (struct rx_possible_future)
2311 + se_list_consc * sizeof (struct rx_se_list)
2312 + rx->reserved);
2313
2314 if (total_size > *size)
2315 {
2316 *mem = remalloc (*mem, total_size);
2317 if (*mem)
2318 *size = total_size;
2319 else
2320 return 0;
2321 }
2322 /* Now we've allocated the memory; this copies the NFA. */
2323 {
2324 static struct rx_nfa_state **scratch = 0;
2325 static int scratch_alloc = 0;
2326 struct rx_nfa_state *state_base = (struct rx_nfa_state *) * mem;
2327 struct rx_nfa_state *new_state = state_base;
2328 struct rx_nfa_edge *new_edge =
2329 (struct rx_nfa_edge *)
2330 ((char *) state_base + total_nodec * sizeof (struct rx_nfa_state));
2331 struct rx_se_list * new_se_list =
2332 (struct rx_se_list *)
2333 ((char *)new_edge + edgec * sizeof (struct rx_nfa_edge));
2334 struct rx_possible_future *new_close =
2335 ((struct rx_possible_future *)
2336 ((char *) new_se_list
2337 + se_list_consc * sizeof (struct rx_se_list)));
2338 struct rx_nfa_state_set * new_nfa_set =
2339 ((struct rx_nfa_state_set *)
2340 ((char *)new_close + eclosec * sizeof (struct rx_possible_future)));
2341 char *new_bitset =
2342 ((char *) new_nfa_set + nfa_setc * sizeof (struct rx_nfa_state_set));
2343 int x;
2344 struct rx_nfa_state *n;
2345
2346 if (scratch_alloc < total_nodec)
2347 {
2348 scratch = ((struct rx_nfa_state **)
2349 remalloc (scratch, total_nodec * sizeof (*scratch)));
2350 if (scratch)
2351 scratch_alloc = total_nodec;
2352 else
2353 {
2354 scratch_alloc = 0;
2355 return 0;
2356 }
2357 }
2358
2359 for (x = 0, n = rx->nfa_states; n; n = n->next)
2360 scratch[x++] = n;
2361
2362 qsort (scratch, total_nodec,
2363 sizeof (struct rx_nfa_state *), (int (*)())nfacmp);
2364
2365 for (x = 0; x < total_nodec; ++x)
2366 {
2367 struct rx_possible_future *eclose = scratch[x]->futures;
2368 struct rx_nfa_edge *edge = scratch[x]->edges;
2369 struct rx_nfa_state *cn = new_state++;
2370 cn->futures = 0;
2371 cn->edges = 0;
2372 cn->next = (x == total_nodec - 1) ? 0 : (cn + 1);
2373 cn->id = scratch[x]->id;
2374 cn->is_final = scratch[x]->is_final;
2375 cn->is_start = scratch[x]->is_start;
2376 cn->mark = 0;
2377 while (edge)
2378 {
2379 int indx = (edge->dest->id < 0
2380 ? (total_nodec + edge->dest->id)
2381 : edge->dest->id);
2382 struct rx_nfa_edge *e = new_edge++;
2383 rx_Bitset cset = (rx_Bitset) new_bitset;
2384 new_bitset += rx_sizeof_bitset (rx->local_cset_size);
2385 rx_bitset_null (rx->local_cset_size, cset);
2386 rx_bitset_union (rx->local_cset_size, cset, edge->params.cset);
2387 e->next = cn->edges;
2388 cn->edges = e;
2389 e->type = edge->type;
2390 e->dest = state_base + indx;
2391 e->params.cset = cset;
2392 edge = edge->next;
2393 }
2394 while (eclose)
2395 {
2396 struct rx_possible_future *ec = new_close++;
2397 struct rx_hash_item * sp;
2398 struct rx_se_list ** sepos;
2399 struct rx_se_list * sesrc;
2400 struct rx_nfa_state_set * destlst;
2401 struct rx_nfa_state_set ** destpos;
2402 ec->next = cn->futures;
2403 cn->futures = ec;
2404 for (sepos = &ec->effects, sesrc = eclose->effects;
2405 sesrc;
2406 sesrc = sesrc->cdr, sepos = &(*sepos)->cdr)
2407 {
2408 sp = rx_hash_find (&rx->se_list_memo,
2409 (long)sesrc->car ^ (long)sesrc->cdr,
2410 sesrc, &se_list_hash_rules);
2411 if (sp->binding)
2412 {
2413 sesrc = (struct rx_se_list *)sp->binding;
2414 break;
2415 }
2416 *new_se_list = *sesrc;
2417 sp->binding = (void *)new_se_list;
2418 *sepos = new_se_list;
2419 ++new_se_list;
2420 }
2421 *sepos = sesrc;
2422 for (destpos = &ec->destset, destlst = eclose->destset;
2423 destlst;
2424 destpos = &(*destpos)->cdr, destlst = destlst->cdr)
2425 {
2426 sp = rx_hash_find (&rx->set_list_memo,
2427 ((((long)destlst->car) >> 8)
2428 ^ (long)destlst->cdr),
2429 destlst, &nfa_set_hash_rules);
2430 if (sp->binding)
2431 {
2432 destlst = (struct rx_nfa_state_set *)sp->binding;
2433 break;
2434 }
2435 *new_nfa_set = *destlst;
2436 new_nfa_set->car = state_base + destlst->car->id;
2437 sp->binding = (void *)new_nfa_set;
2438 *destpos = new_nfa_set;
2439 ++new_nfa_set;
2440 }
2441 *destpos = destlst;
2442 eclose = eclose->next;
2443 }
2444 }
2445 }
2446 rx_free_hash_table (&rx->se_list_memo, se_memo_freer, &se_list_hash_rules);
2447 bzero (&rx->se_list_memo, sizeof (rx->se_list_memo));
2448 rx_free_hash_table (&rx->set_list_memo, nfa_set_freer, &nfa_set_hash_rules);
2449 bzero (&rx->set_list_memo, sizeof (rx->set_list_memo));
2450
2451 rx_free_nfa (rx);
2452 rx->nfa_states = (struct rx_nfa_state *)*mem;
2453 return 1;
2454}
2455
2456
2457
2458/* The functions in the next several pages define the lazy-NFA-conversion used
2459 * by matchers. The input to this construction is an NFA such as
2460 * is built by compactify_nfa (rx.c). The output is the superNFA.
2461 */
2462
2463/* Match engines can use arbitrary values for opcodes. So, the parse tree
2464 * is built using instructions names (enum rx_opcode), but the superstate
2465 * nfa is populated with mystery opcodes (void *).
2466 *
2467 * For convenience, here is an id table. The opcodes are == to their inxs
2468 *
2469 * The lables in re_search_2 would make good values for instructions.
2470 */
2471
2472void * rx_id_instruction_table[rx_num_instructions] =
2473{
2474 (void *) rx_backtrack_point,
2475 (void *) rx_do_side_effects,
2476 (void *) rx_cache_miss,
2477 (void *) rx_next_char,
2478 (void *) rx_backtrack,
2479 (void *) rx_error_inx
2480};
2481
2482
2483
2484
2485/* Memory mgt. for superstate graphs. */
2486
2487#ifdef __STDC__
2488static char *
2489rx_cache_malloc (struct rx_cache * cache, int bytes)
2490#else
2491static char *
2492rx_cache_malloc (cache, bytes)
2493 struct rx_cache * cache;
2494 int bytes;
2495#endif
2496{
2497 while (cache->bytes_left < bytes)
2498 {
2499 if (cache->memory_pos)
2500 cache->memory_pos = cache->memory_pos->next;
2501 if (!cache->memory_pos)
2502 {
2503 cache->morecore (cache);
2504 if (!cache->memory_pos)
2505 return 0;
2506 }
2507 cache->bytes_left = cache->memory_pos->bytes;
2508 cache->memory_addr = ((char *)cache->memory_pos
2509 + sizeof (struct rx_blocklist));
2510 }
2511 cache->bytes_left -= bytes;
2512 {
2513 char * addr = cache->memory_addr;
2514 cache->memory_addr += bytes;
2515 return addr;
2516 }
2517}
2518
2519#ifdef __STDC__
2520static void
2521rx_cache_free (struct rx_cache * cache,
2522 struct rx_freelist ** freelist, char * mem)
2523#else
2524static void
2525rx_cache_free (cache, freelist, mem)
2526 struct rx_cache * cache;
2527 struct rx_freelist ** freelist;
2528 char * mem;
2529#endif
2530{
2531 struct rx_freelist * it = (struct rx_freelist *)mem;
2532 it->next = *freelist;
2533 *freelist = it;
2534}
2535
2536
2537/* The partially instantiated superstate graph has a transition
2538 * table at every node. There is one entry for every character.
2539 * This fills in the transition for a set.
2540 */
2541#ifdef __STDC__
2542static void
2543install_transition (struct rx_superstate *super,
2544 struct rx_inx *answer, rx_Bitset trcset)
2545#else
2546static void
2547install_transition (super, answer, trcset)
2548 struct rx_superstate *super;
2549 struct rx_inx *answer;
2550 rx_Bitset trcset;
2551#endif
2552{
2553 struct rx_inx * transitions = super->transitions;
2554 int chr;
2555 for (chr = 0; chr < 256; )
2556 if (!*trcset)
2557 {
2558 ++trcset;
2559 chr += 32;
2560 }
2561 else
2562 {
2563 RX_subset sub = *trcset;
2564 RX_subset mask = 1;
2565 int bound = chr + 32;
2566 while (chr < bound)
2567 {
2568 if (sub & mask)
2569 transitions [chr] = *answer;
2570 ++chr;
2571 mask <<= 1;
2572 }
2573 ++trcset;
2574 }
2575}
2576
2577
2578#ifdef __STDC__
2579static int
2580qlen (struct rx_superstate * q)
2581#else
2582static int
2583qlen (q)
2584 struct rx_superstate * q;
2585#endif
2586{
2587 int count = 1;
2588 struct rx_superstate * it;
2589 if (!q)
2590 return 0;
2591 for (it = q->next_recyclable; it != q; it = it->next_recyclable)
2592 ++count;
2593 return count;
2594}
2595
2596#ifdef __STDC__
2597static void
2598check_cache (struct rx_cache * cache)
2599#else
2600static void
2601check_cache (cache)
2602 struct rx_cache * cache;
2603#endif
2604{
2605 struct rx_cache * you_fucked_up = 0;
2606 int total = cache->superstates;
2607 int semi = cache->semifree_superstates;
2608 if (semi != qlen (cache->semifree_superstate))
2609 check_cache (you_fucked_up);
2610 if ((total - semi) != qlen (cache->lru_superstate))
2611 check_cache (you_fucked_up);
2612}
2613
2614/* When a superstate is old and neglected, it can enter a
2615 * semi-free state. A semi-free state is slated to die.
2616 * Incoming transitions to a semi-free state are re-written
2617 * to cause an (interpreted) fault when they are taken.
2618 * The fault handler revives the semi-free state, patches
2619 * incoming transitions back to normal, and continues.
2620 *
2621 * The idea is basicly to free in two stages, aborting
2622 * between the two if the state turns out to be useful again.
2623 * When a free is aborted, the rescued superstate is placed
2624 * in the most-favored slot to maximize the time until it
2625 * is next semi-freed.
2626 */
2627
2628#ifdef __STDC__
2629static void
2630semifree_superstate (struct rx_cache * cache)
2631#else
2632static void
2633semifree_superstate (cache)
2634 struct rx_cache * cache;
2635#endif
2636{
2637 int disqualified = cache->semifree_superstates;
2638 if (disqualified == cache->superstates)
2639 return;
2640 while (cache->lru_superstate->locks)
2641 {
2642 cache->lru_superstate = cache->lru_superstate->next_recyclable;
2643 ++disqualified;
2644 if (disqualified == cache->superstates)
2645 return;
2646 }
2647 {
2648 struct rx_superstate * it = cache->lru_superstate;
2649 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2650 it->prev_recyclable->next_recyclable = it->next_recyclable;
2651 cache->lru_superstate = (it == it->next_recyclable
2652 ? 0
2653 : it->next_recyclable);
2654 if (!cache->semifree_superstate)
2655 {
2656 cache->semifree_superstate = it;
2657 it->next_recyclable = it;
2658 it->prev_recyclable = it;
2659 }
2660 else
2661 {
2662 it->prev_recyclable = cache->semifree_superstate->prev_recyclable;
2663 it->next_recyclable = cache->semifree_superstate;
2664 it->prev_recyclable->next_recyclable = it;
2665 it->next_recyclable->prev_recyclable = it;
2666 }
2667 {
2668 struct rx_distinct_future *df;
2669 it->is_semifree = 1;
2670 ++cache->semifree_superstates;
2671 df = it->transition_refs;
2672 if (df)
2673 {
2674 df->prev_same_dest->next_same_dest = 0;
2675 for (df = it->transition_refs; df; df = df->next_same_dest)
2676 {
2677 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
2678 df->future_frame.data = 0;
2679 df->future_frame.data_2 = (void *) df;
2680 /* If there are any NEXT-CHAR instruction frames that
2681 * refer to this state, we convert them to CACHE-MISS frames.
2682 */
2683 if (!df->effects
2684 && (df->edge->options->next_same_super_edge[0]
2685 == df->edge->options))
2686 install_transition (df->present, &df->future_frame,
2687 df->edge->cset);
2688 }
2689 df = it->transition_refs;
2690 df->prev_same_dest->next_same_dest = df;
2691 }
2692 }
2693 }
2694}
2695
2696
2697#ifdef __STDC__
2698static void
2699refresh_semifree_superstate (struct rx_cache * cache,
2700 struct rx_superstate * super)
2701#else
2702static void
2703refresh_semifree_superstate (cache, super)
2704 struct rx_cache * cache;
2705 struct rx_superstate * super;
2706#endif
2707{
2708 struct rx_distinct_future *df;
2709
2710 if (super->transition_refs)
2711 {
2712 super->transition_refs->prev_same_dest->next_same_dest = 0;
2713 for (df = super->transition_refs; df; df = df->next_same_dest)
2714 {
2715 df->future_frame.inx = cache->instruction_table[rx_next_char];
2716 df->future_frame.data = (void *) super->transitions;
2717 /* CACHE-MISS instruction frames that refer to this state,
2718 * must be converted to NEXT-CHAR frames.
2719 */
2720 if (!df->effects
2721 && (df->edge->options->next_same_super_edge[0]
2722 == df->edge->options))
2723 install_transition (df->present, &df->future_frame,
2724 df->edge->cset);
2725 }
2726 super->transition_refs->prev_same_dest->next_same_dest
2727 = super->transition_refs;
2728 }
2729 if (cache->semifree_superstate == super)
2730 cache->semifree_superstate = (super->prev_recyclable == super
2731 ? 0
2732 : super->prev_recyclable);
2733 super->next_recyclable->prev_recyclable = super->prev_recyclable;
2734 super->prev_recyclable->next_recyclable = super->next_recyclable;
2735
2736 if (!cache->lru_superstate)
2737 (cache->lru_superstate
2738 = super->next_recyclable
2739 = super->prev_recyclable
2740 = super);
2741 else
2742 {
2743 super->next_recyclable = cache->lru_superstate;
2744 super->prev_recyclable = cache->lru_superstate->prev_recyclable;
2745 super->next_recyclable->prev_recyclable = super;
2746 super->prev_recyclable->next_recyclable = super;
2747 }
2748 super->is_semifree = 0;
2749 --cache->semifree_superstates;
2750}
2751
2752#ifdef __STDC__
2753static void
2754rx_refresh_this_superstate (struct rx_cache * cache, struct rx_superstate * superstate)
2755#else
2756static void
2757rx_refresh_this_superstate (cache, superstate)
2758 struct rx_cache * cache;
2759 struct rx_superstate * superstate;
2760#endif
2761{
2762 if (superstate->is_semifree)
2763 refresh_semifree_superstate (cache, superstate);
2764 else if (cache->lru_superstate == superstate)
2765 cache->lru_superstate = superstate->next_recyclable;
2766 else if (superstate != cache->lru_superstate->prev_recyclable)
2767 {
2768 superstate->next_recyclable->prev_recyclable
2769 = superstate->prev_recyclable;
2770 superstate->prev_recyclable->next_recyclable
2771 = superstate->next_recyclable;
2772 superstate->next_recyclable = cache->lru_superstate;
2773 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
2774 superstate->next_recyclable->prev_recyclable = superstate;
2775 superstate->prev_recyclable->next_recyclable = superstate;
2776 }
2777}
2778
2779#ifdef __STDC__
2780static void
2781release_superset_low (struct rx_cache * cache,
2782 struct rx_superset *set)
2783#else
2784static void
2785release_superset_low (cache, set)
2786 struct rx_cache * cache;
2787 struct rx_superset *set;
2788#endif
2789{
2790 if (!--set->refs)
2791 {
2792 if (set->cdr)
2793 release_superset_low (cache, set->cdr);
2794
2795 set->starts_for = 0;
2796
2797 rx_hash_free
2798 (rx_hash_find
2799 (&cache->superset_table,
2800 (unsigned long)set->car ^ set->id ^ (unsigned long)set->cdr,
2801 (void *)set,
2802 &cache->superset_hash_rules),
2803 &cache->superset_hash_rules);
2804 rx_cache_free (cache, &cache->free_supersets, (char *)set);
2805 }
2806}
2807
2808#ifdef __STDC__
2809RX_DECL void
2810rx_release_superset (struct rx *rx,
2811 struct rx_superset *set)
2812#else
2813RX_DECL void
2814rx_release_superset (rx, set)
2815 struct rx *rx;
2816 struct rx_superset *set;
2817#endif
2818{
2819 release_superset_low (rx->cache, set);
2820}
2821
2822/* This tries to add a new superstate to the superstate freelist.
2823 * It might, as a result, free some edge pieces or hash tables.
2824 * If nothing can be freed because too many locks are being held, fail.
2825 */
2826
2827#ifdef __STDC__
2828static int
2829rx_really_free_superstate (struct rx_cache * cache)
2830#else
2831static int
2832rx_really_free_superstate (cache)
2833 struct rx_cache * cache;
2834#endif
2835{
2836 int locked_superstates = 0;
2837 struct rx_superstate * it;
2838
2839 if (!cache->superstates)
2840 return 0;
2841
2842 {
2843 /* This is a total guess. The idea is that we should expect as
2844 * many misses as we've recently experienced. I.e., cache->misses
2845 * should be the same as cache->semifree_superstates.
2846 */
2847 while ((cache->hits + cache->misses) > cache->superstates_allowed)
2848 {
2849 cache->hits >>= 1;
2850 cache->misses >>= 1;
2851 }
2852 if ( ((cache->hits + cache->misses) * cache->semifree_superstates)
2853 < (cache->superstates * cache->misses))
2854 {
2855 semifree_superstate (cache);
2856 semifree_superstate (cache);
2857 }
2858 }
2859
2860 while (cache->semifree_superstate && cache->semifree_superstate->locks)
2861 {
2862 refresh_semifree_superstate (cache, cache->semifree_superstate);
2863 ++locked_superstates;
2864 if (locked_superstates == cache->superstates)
2865 return 0;
2866 }
2867
2868 if (cache->semifree_superstate)
2869 {
2870 it = cache->semifree_superstate;
2871 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2872 it->prev_recyclable->next_recyclable = it->next_recyclable;
2873 cache->semifree_superstate = ((it == it->next_recyclable)
2874 ? 0
2875 : it->next_recyclable);
2876 --cache->semifree_superstates;
2877 }
2878 else
2879 {
2880 while (cache->lru_superstate->locks)
2881 {
2882 cache->lru_superstate = cache->lru_superstate->next_recyclable;
2883 ++locked_superstates;
2884 if (locked_superstates == cache->superstates)
2885 return 0;
2886 }
2887 it = cache->lru_superstate;
2888 it->next_recyclable->prev_recyclable = it->prev_recyclable;
2889 it->prev_recyclable->next_recyclable = it->next_recyclable;
2890 cache->lru_superstate = ((it == it->next_recyclable)
2891 ? 0
2892 : it->next_recyclable);
2893 }
2894
2895 if (it->transition_refs)
2896 {
2897 struct rx_distinct_future *df;
2898 for (df = it->transition_refs,
2899 df->prev_same_dest->next_same_dest = 0;
2900 df;
2901 df = df->next_same_dest)
2902 {
2903 df->future_frame.inx = cache->instruction_table[rx_cache_miss];
2904 df->future_frame.data = 0;
2905 df->future_frame.data_2 = (void *) df;
2906 df->future = 0;
2907 }
2908 it->transition_refs->prev_same_dest->next_same_dest =
2909 it->transition_refs;
2910 }
2911 {
2912 struct rx_super_edge *tc = it->edges;
2913 while (tc)
2914 {
2915 struct rx_distinct_future * df;
2916 struct rx_super_edge *tct = tc->next;
2917 df = tc->options;
2918 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
2919 while (df)
2920 {
2921 struct rx_distinct_future *dft = df;
2922 df = df->next_same_super_edge[0];
2923
2924
2925 if (dft->future && dft->future->transition_refs == dft)
2926 {
2927 dft->future->transition_refs = dft->next_same_dest;
2928 if (dft->future->transition_refs == dft)
2929 dft->future->transition_refs = 0;
2930 }
2931 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
2932 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
2933 rx_cache_free (cache, &cache->free_discernable_futures,
2934 (char *)dft);
2935 }
2936 rx_cache_free (cache, &cache->free_transition_classes, (char *)tc);
2937 tc = tct;
2938 }
2939 }
2940
2941 if (it->contents->superstate == it)
2942 it->contents->superstate = 0;
2943 release_superset_low (cache, it->contents);
2944 rx_cache_free (cache, &cache->free_superstates, (char *)it);
2945 --cache->superstates;
2946 return 1;
2947}
2948
2949#ifdef __STDC__
2950static char *
2951rx_cache_get (struct rx_cache * cache,
2952 struct rx_freelist ** freelist)
2953#else
2954static char *
2955rx_cache_get (cache, freelist)
2956 struct rx_cache * cache;
2957 struct rx_freelist ** freelist;
2958#endif
2959{
2960 while (!*freelist && rx_really_free_superstate (cache))
2961 ;
2962 if (!*freelist)
2963 return 0;
2964 {
2965 struct rx_freelist * it = *freelist;
2966 *freelist = it->next;
2967 return (char *)it;
2968 }
2969}
2970
2971#ifdef __STDC__
2972static char *
2973rx_cache_malloc_or_get (struct rx_cache * cache,
2974 struct rx_freelist ** freelist, int bytes)
2975#else
2976static char *
2977rx_cache_malloc_or_get (cache, freelist, bytes)
2978 struct rx_cache * cache;
2979 struct rx_freelist ** freelist;
2980 int bytes;
2981#endif
2982{
2983 if (!*freelist)
2984 {
2985 char * answer = rx_cache_malloc (cache, bytes);
2986 if (answer)
2987 return answer;
2988 }
2989
2990 return rx_cache_get (cache, freelist);
2991}
2992
2993#ifdef __STDC__
2994static char *
2995rx_cache_get_superstate (struct rx_cache * cache)
2996#else
2997static char *
2998rx_cache_get_superstate (cache)
2999 struct rx_cache * cache;
3000#endif
3001{
3002 char * answer;
3003 int bytes = ( sizeof (struct rx_superstate)
3004 + cache->local_cset_size * sizeof (struct rx_inx));
3005 if (!cache->free_superstates
3006 && (cache->superstates < cache->superstates_allowed))
3007 {
3008 answer = rx_cache_malloc (cache, bytes);
3009 if (answer)
3010 {
3011 ++cache->superstates;
3012 return answer;
3013 }
3014 }
3015 answer = rx_cache_get (cache, &cache->free_superstates);
3016 if (!answer)
3017 {
3018 answer = rx_cache_malloc (cache, bytes);
3019 if (answer)
3020 ++cache->superstates_allowed;
3021 }
3022 ++cache->superstates;
3023 return answer;
3024}
3025
3026
3027
3028
3029#ifdef __STDC__
3030static int
3031supersetcmp (void * va, void * vb)
3032#else
3033static int
3034supersetcmp (va, vb)
3035 void * va;
3036 void * vb;
3037#endif
3038{
3039 struct rx_superset * a = (struct rx_superset *)va;
3040 struct rx_superset * b = (struct rx_superset *)vb;
3041 return ( (a == b)
3042 || (a && b && (a->car == b->car) && (a->cdr == b->cdr)));
3043}
3044
3045#ifdef __STDC__
3046static struct rx_hash_item *
3047superset_allocator (struct rx_hash_rules * rules, void * val)
3048#else
3049static struct rx_hash_item *
3050superset_allocator (rules, val)
3051 struct rx_hash_rules * rules;
3052 void * val;
3053#endif
3054{
3055 struct rx_cache * cache
3056 = ((struct rx_cache *)
3057 ((char *)rules
3058 - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
3059 struct rx_superset * template = (struct rx_superset *)val;
3060 struct rx_superset * newset
3061 = ((struct rx_superset *)
3062 rx_cache_malloc_or_get (cache,
3063 &cache->free_supersets,
3064 sizeof (*template)));
3065 if (!newset)
3066 return 0;
3067 newset->refs = 0;
3068 newset->car = template->car;
3069 newset->id = template->car->id;
3070 newset->cdr = template->cdr;
3071 newset->superstate = 0;
3072 rx_protect_superset (rx, template->cdr);
3073 newset->hash_item.data = (void *)newset;
3074 newset->hash_item.binding = 0;
3075 return &newset->hash_item;
3076}
3077
3078#ifdef __STDC__
3079static struct rx_hash *
3080super_hash_allocator (struct rx_hash_rules * rules)
3081#else
3082static struct rx_hash *
3083super_hash_allocator (rules)
3084 struct rx_hash_rules * rules;
3085#endif
3086{
3087 struct rx_cache * cache
3088 = ((struct rx_cache *)
3089 ((char *)rules
3090 - (unsigned long)(&((struct rx_cache *)0)->superset_hash_rules)));
3091 return ((struct rx_hash *)
3092 rx_cache_malloc_or_get (cache,
3093 &cache->free_hash, sizeof (struct rx_hash)));
3094}
3095
3096
3097#ifdef __STDC__
3098static void
3099super_hash_liberator (struct rx_hash * hash, struct rx_hash_rules * rules)
3100#else
3101static void
3102super_hash_liberator (hash, rules)
3103 struct rx_hash * hash;
3104 struct rx_hash_rules * rules;
3105#endif
3106{
3107 struct rx_cache * cache
3108 = ((struct rx_cache *)
3109 (char *)rules - (long)(&((struct rx_cache *)0)->superset_hash_rules));
3110 rx_cache_free (cache, &cache->free_hash, (char *)hash);
3111}
3112
3113#ifdef __STDC__
3114static void
3115superset_hash_item_liberator (struct rx_hash_item * it,
3116 struct rx_hash_rules * rules)
3117#else
3118static void
3119superset_hash_item_liberator (it, rules) /* Well, it does ya know. */
3120 struct rx_hash_item * it;
3121 struct rx_hash_rules * rules;
3122#endif
3123{
3124}
3125
3126int rx_cache_bound = 128;
3127static int rx_default_cache_got = 0;
3128
3129#ifdef __STDC__
3130static int
3131bytes_for_cache_size (int supers, int cset_size)
3132#else
3133static int
3134bytes_for_cache_size (supers, cset_size)
3135 int supers;
3136 int cset_size;
3137#endif
3138{
3139 /* What the hell is this? !!!*/
3140 return (int)
3141 ((float)supers *
3142 ( (1.03 * (float) ( rx_sizeof_bitset (cset_size)
3143 + sizeof (struct rx_super_edge)))
3144 + (1.80 * (float) sizeof (struct rx_possible_future))
3145 + (float) ( sizeof (struct rx_superstate)
3146 + cset_size * sizeof (struct rx_inx))));
3147}
3148
3149#ifdef __STDC__
3150static void
3151rx_morecore (struct rx_cache * cache)
3152#else
3153static void
3154rx_morecore (cache)
3155 struct rx_cache * cache;
3156#endif
3157{
3158 if (rx_default_cache_got >= rx_cache_bound)
3159 return;
3160
3161 rx_default_cache_got += 16;
3162 cache->superstates_allowed = rx_cache_bound;
3163 {
3164 struct rx_blocklist ** pos = &cache->memory;
3165 int size = bytes_for_cache_size (16, cache->local_cset_size);
3166 while (*pos)
3167 pos = &(*pos)->next;
3168 *pos = ((struct rx_blocklist *)
3169 malloc (size + sizeof (struct rx_blocklist)));
3170 if (!*pos)
3171 return;
3172
3173 (*pos)->next = 0;
3174 (*pos)->bytes = size;
3175 cache->memory_pos = *pos;
3176 cache->memory_addr = (char *)*pos + sizeof (**pos);
3177 cache->bytes_left = size;
3178 }
3179}
3180
3181static struct rx_cache default_cache =
3182{
3183 {
3184 supersetcmp,
3185 super_hash_allocator,
3186 super_hash_liberator,
3187 superset_allocator,
3188 superset_hash_item_liberator,
3189 },
3190 0,
3191 0,
3192 0,
3193 0,
3194 rx_morecore,
3195
3196 0,
3197 0,
3198 0,
3199 0,
3200 0,
3201
3202 0,
3203 0,
3204
3205 0,
3206
3207 0,
3208 0,
3209 0,
3210 0,
3211 128,
3212
3213 256,
3214 rx_id_instruction_table,
3215
3216 {
3217 0,
3218 0,
3219 {0},
3220 {0},
3221 {0}
3222 }
3223};
3224
3225/* This adds an element to a superstate set. These sets are lists, such
3226 * that lists with == elements are ==. The empty set is returned by
3227 * superset_cons (rx, 0, 0) and is NOT equivelent to
3228 * (struct rx_superset)0.
3229 */
3230
3231#ifdef __STDC__
3232RX_DECL struct rx_superset *
3233rx_superset_cons (struct rx * rx,
3234 struct rx_nfa_state *car, struct rx_superset *cdr)
3235#else
3236RX_DECL struct rx_superset *
3237rx_superset_cons (rx, car, cdr)
3238 struct rx * rx;
3239 struct rx_nfa_state *car;
3240 struct rx_superset *cdr;
3241#endif
3242{
3243 struct rx_cache * cache = rx->cache;
3244 if (!car && !cdr)
3245 {
3246 if (!cache->empty_superset)
3247 {
3248 cache->empty_superset
3249 = ((struct rx_superset *)
3250 rx_cache_malloc_or_get (cache, &cache->free_supersets,
3251 sizeof (struct rx_superset)));
3252 if (!cache->empty_superset)
3253 return 0;
3254 bzero (cache->empty_superset, sizeof (struct rx_superset));
3255 cache->empty_superset->refs = 1000;
3256 }
3257 return cache->empty_superset;
3258 }
3259 {
3260 struct rx_superset template;
3261 struct rx_hash_item * hit;
3262 template.car = car;
3263 template.cdr = cdr;
3264 template.id = car->id;
3265 hit = rx_hash_store (&cache->superset_table,
3266 (unsigned long)car ^ car->id ^ (unsigned long)cdr,
3267 (void *)&template,
3268 &cache->superset_hash_rules);
3269 return (hit
3270 ? (struct rx_superset *)hit->data
3271 : 0);
3272 }
3273}
3274
3275/* This computes a union of two NFA state sets. The sets do not have the
3276 * same representation though. One is a RX_SUPERSET structure (part
3277 * of the superstate NFA) and the other is an NFA_STATE_SET (part of the NFA).
3278 */
3279
3280#ifdef __STDC__
3281RX_DECL struct rx_superset *
3282rx_superstate_eclosure_union
3283 (struct rx * rx, struct rx_superset *set, struct rx_nfa_state_set *ecl)
3284#else
3285RX_DECL struct rx_superset *
3286rx_superstate_eclosure_union (rx, set, ecl)
3287 struct rx * rx;
3288 struct rx_superset *set;
3289 struct rx_nfa_state_set *ecl;
3290#endif
3291{
3292 if (!ecl)
3293 return set;
3294
3295 if (!set->car)
3296 return rx_superset_cons (rx, ecl->car,
3297 rx_superstate_eclosure_union (rx, set, ecl->cdr));
3298 if (set->car == ecl->car)
3299 return rx_superstate_eclosure_union (rx, set, ecl->cdr);
3300
3301 {
3302 struct rx_superset * tail;
3303 struct rx_nfa_state * first;
3304
3305 if (set->car > ecl->car)
3306 {
3307 tail = rx_superstate_eclosure_union (rx, set->cdr, ecl);
3308 first = set->car;
3309 }
3310 else
3311 {
3312 tail = rx_superstate_eclosure_union (rx, set, ecl->cdr);
3313 first = ecl->car;
3314 }
3315 if (!tail)
3316 return 0;
3317 else
3318 {
3319 struct rx_superset * answer;
3320 answer = rx_superset_cons (rx, first, tail);
3321 if (!answer)
3322 {
3323 rx_protect_superset (rx, tail);
3324 rx_release_superset (rx, tail);
3325 return 0;
3326 }
3327 else
3328 return answer;
3329 }
3330 }
3331}
3332
3333
3334
3335
3336
3337/*
3338 * This makes sure that a list of rx_distinct_futures contains
3339 * a future for each possible set of side effects in the eclosure
3340 * of a given state. This is some of the work of filling in a
3341 * superstate transition.
3342 */
3343
3344#ifdef __STDC__
3345static struct rx_distinct_future *
3346include_futures (struct rx *rx,
3347 struct rx_distinct_future *df, struct rx_nfa_state
3348 *state, struct rx_superstate *superstate)
3349#else
3350static struct rx_distinct_future *
3351include_futures (rx, df, state, superstate)
3352 struct rx *rx;
3353 struct rx_distinct_future *df;
3354 struct rx_nfa_state *state;
3355 struct rx_superstate *superstate;
3356#endif
3357{
3358 struct rx_possible_future *future;
3359 struct rx_cache * cache = rx->cache;
3360 for (future = state->futures; future; future = future->next)
3361 {
3362 struct rx_distinct_future *dfp;
3363 struct rx_distinct_future *insert_before = 0;
3364 if (df)
3365 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3366 for (dfp = df; dfp; dfp = dfp->next_same_super_edge[0])
3367 if (dfp->effects == future->effects)
3368 break;
3369 else
3370 {
3371 int order = rx->se_list_cmp (rx, dfp->effects, future->effects);
3372 if (order > 0)
3373 {
3374 insert_before = dfp;
3375 dfp = 0;
3376 break;
3377 }
3378 }
3379 if (df)
3380 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
3381 if (!dfp)
3382 {
3383 dfp
3384 = ((struct rx_distinct_future *)
3385 rx_cache_malloc_or_get (cache, &cache->free_discernable_futures,
3386 sizeof (struct rx_distinct_future)));
3387 if (!dfp)
3388 return 0;
3389 if (!df)
3390 {
3391 df = insert_before = dfp;
3392 df->next_same_super_edge[0] = df->next_same_super_edge[1] = df;
3393 }
3394 else if (!insert_before)
3395 insert_before = df;
3396 else if (insert_before == df)
3397 df = dfp;
3398
3399 dfp->next_same_super_edge[0] = insert_before;
3400 dfp->next_same_super_edge[1]
3401 = insert_before->next_same_super_edge[1];
3402 dfp->next_same_super_edge[1]->next_same_super_edge[0] = dfp;
3403 dfp->next_same_super_edge[0]->next_same_super_edge[1] = dfp;
3404 dfp->next_same_dest = dfp->prev_same_dest = dfp;
3405 dfp->future = 0;
3406 dfp->present = superstate;
3407 dfp->future_frame.inx = rx->instruction_table[rx_cache_miss];
3408 dfp->future_frame.data = 0;
3409 dfp->future_frame.data_2 = (void *) dfp;
3410 dfp->side_effects_frame.inx
3411 = rx->instruction_table[rx_do_side_effects];
3412 dfp->side_effects_frame.data = 0;
3413 dfp->side_effects_frame.data_2 = (void *) dfp;
3414 dfp->effects = future->effects;
3415 }
3416 }
3417 return df;
3418}
3419
3420
3421
3422
3423/* This constructs a new superstate from its state set. The only
3424 * complexity here is memory management.
3425 */
3426#ifdef __STDC__
3427RX_DECL struct rx_superstate *
3428rx_superstate (struct rx *rx,
3429 struct rx_superset *set)
3430#else
3431RX_DECL struct rx_superstate *
3432rx_superstate (rx, set)
3433 struct rx *rx;
3434 struct rx_superset *set;
3435#endif
3436{
3437 struct rx_cache * cache = rx->cache;
3438 struct rx_superstate * superstate = 0;
3439
3440 /* Does the superstate already exist in the cache? */
3441 if (set->superstate)
3442 {
3443 if (set->superstate->rx_id != rx->rx_id)
3444 {
3445 /* Aha. It is in the cache, but belongs to a superstate
3446 * that refers to an NFA that no longer exists.
3447 * (We know it no longer exists because it was evidently
3448 * stored in the same region of memory as the current nfa
3449 * yet it has a different id.)
3450 */
3451 superstate = set->superstate;
3452 if (!superstate->is_semifree)
3453 {
3454 if (cache->lru_superstate == superstate)
3455 {
3456 cache->lru_superstate = superstate->next_recyclable;
3457 if (cache->lru_superstate == superstate)
3458 cache->lru_superstate = 0;
3459 }
3460 {
3461 superstate->next_recyclable->prev_recyclable
3462 = superstate->prev_recyclable;
3463 superstate->prev_recyclable->next_recyclable
3464 = superstate->next_recyclable;
3465 if (!cache->semifree_superstate)
3466 {
3467 (cache->semifree_superstate
3468 = superstate->next_recyclable
3469 = superstate->prev_recyclable
3470 = superstate);
3471 }
3472 else
3473 {
3474 superstate->next_recyclable = cache->semifree_superstate;
3475 superstate->prev_recyclable
3476 = cache->semifree_superstate->prev_recyclable;
3477 superstate->next_recyclable->prev_recyclable
3478 = superstate;
3479 superstate->prev_recyclable->next_recyclable
3480 = superstate;
3481 cache->semifree_superstate = superstate;
3482 }
3483 ++cache->semifree_superstates;
3484 }
3485 }
3486 set->superstate = 0;
3487 goto handle_cache_miss;
3488 }
3489 ++cache->hits;
3490 superstate = set->superstate;
3491
3492 rx_refresh_this_superstate (cache, superstate);
3493 return superstate;
3494 }
3495
3496 handle_cache_miss:
3497
3498 /* This point reached only for cache misses. */
3499 ++cache->misses;
3500#if RX_DEBUG
3501 if (rx_debug_trace > 1)
3502 {
3503 struct rx_superset * setp = set;
3504 fprintf (stderr, "Building a superstet %d(%d): ", rx->rx_id, set);
3505 while (setp)
3506 {
3507 fprintf (stderr, "%d ", setp->id);
3508 setp = setp->cdr;
3509 }
3510 fprintf (stderr, "(%d)\n", set);
3511 }
3512#endif
3513 superstate = (struct rx_superstate *)rx_cache_get_superstate (cache);
3514 if (!superstate)
3515 return 0;
3516
3517 if (!cache->lru_superstate)
3518 (cache->lru_superstate
3519 = superstate->next_recyclable
3520 = superstate->prev_recyclable
3521 = superstate);
3522 else
3523 {
3524 superstate->next_recyclable = cache->lru_superstate;
3525 superstate->prev_recyclable = cache->lru_superstate->prev_recyclable;
3526 ( superstate->prev_recyclable->next_recyclable
3527 = superstate->next_recyclable->prev_recyclable
3528 = superstate);
3529 }
3530 superstate->rx_id = rx->rx_id;
3531 superstate->transition_refs = 0;
3532 superstate->locks = 0;
3533 superstate->is_semifree = 0;
3534 set->superstate = superstate;
3535 superstate->contents = set;
3536 rx_protect_superset (rx, set);
3537 superstate->edges = 0;
3538 {
3539 int x;
3540 /* None of the transitions from this superstate are known yet. */
3541 for (x = 0; x < rx->local_cset_size; ++x) /* &&&&& 3.8 % */
3542 {
3543 struct rx_inx * ifr = &superstate->transitions[x];
3544 ifr->inx = rx->instruction_table [rx_cache_miss];
3545 ifr->data = ifr->data_2 = 0;
3546 }
3547 }
3548 return superstate;
3549}
3550
3551
3552
3553/* This computes the destination set of one edge of the superstate NFA.
3554 * Note that a RX_DISTINCT_FUTURE is a superstate edge.
3555 * Returns 0 on an allocation failure.
3556 */
3557
3558#ifdef __STDC__
3559static int
3560solve_destination (struct rx *rx, struct rx_distinct_future *df)
3561#else
3562static int
3563solve_destination (rx, df)
3564 struct rx *rx;
3565 struct rx_distinct_future *df;
3566#endif
3567{
3568 struct rx_super_edge *tc = df->edge;
3569 struct rx_superset *nfa_state;
3570 struct rx_superset *nil_set = rx_superset_cons (rx, 0, 0);
3571 struct rx_superset *solution = nil_set;
3572 struct rx_superstate *dest;
3573
3574 rx_protect_superset (rx, solution);
3575 /* Iterate over all NFA states in the state set of this superstate. */
3576 for (nfa_state = df->present->contents;
3577 nfa_state->car;
3578 nfa_state = nfa_state->cdr)
3579 {
3580 struct rx_nfa_edge *e;
3581 /* Iterate over all edges of each NFA state. */
3582 for (e = nfa_state->car->edges; e; e = e->next)
3583 /* If we find an edge that is labeled with
3584 * the characters we are solving for.....
3585 */
3586 if (rx_bitset_is_subset (rx->local_cset_size,
3587 tc->cset, e->params.cset))
3588 {
3589 struct rx_nfa_state *n = e->dest;
3590 struct rx_possible_future *pf;
3591 /* ....search the partial epsilon closures of the destination
3592 * of that edge for a path that involves the same set of
3593 * side effects we are solving for.
3594 * If we find such a RX_POSSIBLE_FUTURE, we add members to the
3595 * stateset we are computing.
3596 */
3597 for (pf = n->futures; pf; pf = pf->next)
3598 if (pf->effects == df->effects)
3599 {
3600 struct rx_superset * old_sol;
3601 old_sol = solution;
3602 solution = rx_superstate_eclosure_union (rx, solution,
3603 pf->destset);
3604 if (!solution)
3605 return 0;
3606 rx_protect_superset (rx, solution);
3607 rx_release_superset (rx, old_sol);
3608 }
3609 }
3610 }
3611 /* It is possible that the RX_DISTINCT_FUTURE we are working on has
3612 * the empty set of NFA states as its definition. In that case, this
3613 * is a failure point.
3614 */
3615 if (solution == nil_set)
3616 {
3617 df->future_frame.inx = (void *) rx_backtrack;
3618 df->future_frame.data = 0;
3619 df->future_frame.data_2 = 0;
3620 return 1;
3621 }
3622 dest = rx_superstate (rx, solution);
3623 rx_release_superset (rx, solution);
3624 if (!dest)
3625 return 0;
3626
3627 {
3628 struct rx_distinct_future *dft;
3629 dft = df;
3630 df->prev_same_dest->next_same_dest = 0;
3631 while (dft)
3632 {
3633 dft->future = dest;
3634 dft->future_frame.inx = rx->instruction_table[rx_next_char];
3635 dft->future_frame.data = (void *) dest->transitions;
3636 dft = dft->next_same_dest;
3637 }
3638 df->prev_same_dest->next_same_dest = df;
3639 }
3640 if (!dest->transition_refs)
3641 dest->transition_refs = df;
3642 else
3643 {
3644 struct rx_distinct_future *dft = dest->transition_refs->next_same_dest;
3645 dest->transition_refs->next_same_dest = df->next_same_dest;
3646 df->next_same_dest->prev_same_dest = dest->transition_refs;
3647 df->next_same_dest = dft;
3648 dft->prev_same_dest = df;
3649 }
3650 return 1;
3651}
3652
3653
3654/* This takes a superstate and a character, and computes some edges
3655 * from the superstate NFA. In particular, this computes all edges
3656 * that lead from SUPERSTATE given CHR. This function also
3657 * computes the set of characters that share this edge set.
3658 * This returns 0 on allocation error.
3659 * The character set and list of edges are returned through
3660 * the paramters CSETOUT and DFOUT.
3661} */
3662
3663#ifdef __STDC__
3664static int
3665compute_super_edge (struct rx *rx, struct rx_distinct_future **dfout,
3666 rx_Bitset csetout, struct rx_superstate *superstate,
3667 unsigned char chr)
3668#else
3669static int
3670compute_super_edge (rx, dfout, csetout, superstate, chr)
3671 struct rx *rx;
3672 struct rx_distinct_future **dfout;
3673 rx_Bitset csetout;
3674 struct rx_superstate *superstate;
3675 unsigned char chr;
3676#endif
3677{
3678 struct rx_superset *stateset = superstate->contents;
3679
3680 /* To compute the set of characters that share edges with CHR,
3681 * we start with the full character set, and subtract.
3682 */
3683 rx_bitset_universe (rx->local_cset_size, csetout);
3684 *dfout = 0;
3685
3686 /* Iterate over the NFA states in the superstate state-set. */
3687 while (stateset->car)
3688 {
3689 struct rx_nfa_edge *e;
3690 for (e = stateset->car->edges; e; e = e->next)
3691 if (RX_bitset_member (e->params.cset, chr))
3692 {
3693 /* If we find an NFA edge that applies, we make sure there
3694 * are corresponding edges in the superstate NFA.
3695 */
3696 {
3697 struct rx_distinct_future * saved;
3698 saved = *dfout;
3699 *dfout = include_futures (rx, *dfout, e->dest, superstate);
3700 if (!*dfout)
3701 {
3702 struct rx_distinct_future * df;
3703 df = saved;
3704 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3705 while (df)
3706 {
3707 struct rx_distinct_future *dft;
3708 dft = df;
3709 df = df->next_same_super_edge[0];
3710
3711 if (dft->future && dft->future->transition_refs == dft)
3712 {
3713 dft->future->transition_refs = dft->next_same_dest;
3714 if (dft->future->transition_refs == dft)
3715 dft->future->transition_refs = 0;
3716 }
3717 dft->next_same_dest->prev_same_dest = dft->prev_same_dest;
3718 dft->prev_same_dest->next_same_dest = dft->next_same_dest;
3719 rx_cache_free (rx->cache,
3720 &rx->cache->free_discernable_futures,
3721 (char *)dft);
3722 }
3723 return 0;
3724 }
3725 }
3726 /* We also trim the character set a bit. */
3727 rx_bitset_intersection (rx->local_cset_size,
3728 csetout, e->params.cset);
3729 }
3730 else
3731 /* An edge that doesn't apply at least tells us some characters
3732 * that don't share the same edge set as CHR.
3733 */
3734 rx_bitset_difference (rx->local_cset_size, csetout, e->params.cset);
3735 stateset = stateset->cdr;
3736 }
3737 return 1;
3738}
3739
3740
3741
3742/* This is a constructor for RX_SUPER_EDGE structures. These are
3743 * wrappers for lists of superstate NFA edges that share character sets labels.
3744 * If a transition class contains more than one rx_distinct_future (superstate
3745 * edge), then it represents a non-determinism in the superstate NFA.
3746 */
3747
3748#ifdef __STDC__
3749static struct rx_super_edge *
3750rx_super_edge (struct rx *rx,
3751 struct rx_superstate *super, rx_Bitset cset,
3752 struct rx_distinct_future *df)
3753#else
3754static struct rx_super_edge *
3755rx_super_edge (rx, super, cset, df)
3756 struct rx *rx;
3757 struct rx_superstate *super;
3758 rx_Bitset cset;
3759 struct rx_distinct_future *df;
3760#endif
3761{
3762 struct rx_super_edge *tc =
3763 (struct rx_super_edge *)rx_cache_malloc_or_get
3764 (rx->cache, &rx->cache->free_transition_classes,
3765 sizeof (struct rx_super_edge) + rx_sizeof_bitset (rx->local_cset_size));
3766
3767 if (!tc)
3768 return 0;
3769 tc->next = super->edges;
3770 super->edges = tc;
3771 tc->rx_backtrack_frame.inx = rx->instruction_table[rx_backtrack_point];
3772 tc->rx_backtrack_frame.data = 0;
3773 tc->rx_backtrack_frame.data_2 = (void *) tc;
3774 tc->options = df;
3775 tc->cset = (rx_Bitset) ((char *) tc + sizeof (*tc));
3776 rx_bitset_assign (rx->local_cset_size, tc->cset, cset);
3777 if (df)
3778 {
3779 struct rx_distinct_future * dfp = df;
3780 df->next_same_super_edge[1]->next_same_super_edge[0] = 0;
3781 while (dfp)
3782 {
3783 dfp->edge = tc;
3784 dfp = dfp->next_same_super_edge[0];
3785 }
3786 df->next_same_super_edge[1]->next_same_super_edge[0] = df;
3787 }
3788 return tc;
3789}
3790
3791
3792/* There are three kinds of cache miss. The first occurs when a
3793 * transition is taken that has never been computed during the
3794 * lifetime of the source superstate. That cache miss is handled by
3795 * calling COMPUTE_SUPER_EDGE. The second kind of cache miss
3796 * occurs when the destination superstate of a transition doesn't
3797 * exist. SOLVE_DESTINATION is used to construct the destination superstate.
3798 * Finally, the third kind of cache miss occurs when the destination
3799 * superstate of a transition is in a `semi-free state'. That case is
3800 * handled by UNFREE_SUPERSTATE.
3801 *
3802 * The function of HANDLE_CACHE_MISS is to figure out which of these
3803 * cases applies.
3804 */
3805
3806#ifdef __STDC__
3807static void
3808install_partial_transition (struct rx_superstate *super,
3809 struct rx_inx *answer,
3810 RX_subset set, int offset)
3811#else
3812static void
3813install_partial_transition (super, answer, set, offset)
3814 struct rx_superstate *super;
3815 struct rx_inx *answer;
3816 RX_subset set;
3817 int offset;
3818#endif
3819{
3820 int start = offset;
3821 int end = start + 32;
3822 RX_subset pos = 1;
3823 struct rx_inx * transitions = super->transitions;
3824
3825 while (start < end)
3826 {
3827 if (set & pos)
3828 transitions[start] = *answer;
3829 pos <<= 1;
3830 ++start;
3831 }
3832}
3833
3834
3835#ifdef __STDC__
3836RX_DECL struct rx_inx *
3837rx_handle_cache_miss
3838 (struct rx *rx, struct rx_superstate *super, unsigned char chr, void *data)
3839#else
3840RX_DECL struct rx_inx *
3841rx_handle_cache_miss (rx, super, chr, data)
3842 struct rx *rx;
3843 struct rx_superstate *super;
3844 unsigned char chr;
3845 void *data;
3846#endif
3847{
3848 int offset = chr / RX_subset_bits;
3849 struct rx_distinct_future *df = data;
3850
3851 if (!df) /* must be the shared_cache_miss_frame */
3852 {
3853 /* Perhaps this is just a transition waiting to be filled. */
3854 struct rx_super_edge *tc;
3855 RX_subset mask = rx_subset_singletons [chr % RX_subset_bits];
3856
3857 for (tc = super->edges; tc; tc = tc->next)
3858 if (tc->cset[offset] & mask)
3859 {
3860 struct rx_inx * answer;
3861 df = tc->options;
3862 answer = ((tc->options->next_same_super_edge[0] != tc->options)
3863 ? &tc->rx_backtrack_frame
3864 : (df->effects
3865 ? &df->side_effects_frame
3866 : &df->future_frame));
3867 install_partial_transition (super, answer,
3868 tc->cset [offset], offset * 32);
3869 return answer;
3870 }
3871 /* Otherwise, it's a flushed or newly encountered edge. */
3872 {
3873 char cset_space[1024]; /* this limit is far from unreasonable */
3874 rx_Bitset trcset;
3875 struct rx_inx *answer;
3876
3877 if (rx_sizeof_bitset (rx->local_cset_size) > sizeof (cset_space))
3878 return 0; /* If the arbitrary limit is hit, always fail */
3879 /* cleanly. */
3880 trcset = (rx_Bitset)cset_space;
3881 rx_lock_superstate (rx, super);
3882 if (!compute_super_edge (rx, &df, trcset, super, chr))
3883 {
3884 rx_unlock_superstate (rx, super);
3885 return 0;
3886 }
3887 if (!df) /* We just computed the fail transition. */
3888 {
3889 static struct rx_inx
3890 shared_fail_frame = { 0, 0, (void *)rx_backtrack, 0 };
3891 answer = &shared_fail_frame;
3892 }
3893 else
3894 {
3895 tc = rx_super_edge (rx, super, trcset, df);
3896 if (!tc)
3897 {
3898 rx_unlock_superstate (rx, super);
3899 return 0;
3900 }
3901 answer = ((tc->options->next_same_super_edge[0] != tc->options)
3902 ? &tc->rx_backtrack_frame
3903 : (df->effects
3904 ? &df->side_effects_frame
3905 : &df->future_frame));
3906 }
3907 install_partial_transition (super, answer,
3908 trcset[offset], offset * 32);
3909 rx_unlock_superstate (rx, super);
3910 return answer;
3911 }
3912 }
3913 else if (df->future) /* A cache miss on an edge with a future? Must be
3914 * a semi-free destination. */
3915 {
3916 if (df->future->is_semifree)
3917 refresh_semifree_superstate (rx->cache, df->future);
3918 return &df->future_frame;
3919 }
3920 else
3921 /* no future superstate on an existing edge */
3922 {
3923 rx_lock_superstate (rx, super);
3924 if (!solve_destination (rx, df))
3925 {
3926 rx_unlock_superstate (rx, super);
3927 return 0;
3928 }
3929 if (!df->effects
3930 && (df->edge->options->next_same_super_edge[0] == df->edge->options))
3931 install_partial_transition (super, &df->future_frame,
3932 df->edge->cset[offset], offset * 32);
3933 rx_unlock_superstate (rx, super);
3934 return &df->future_frame;
3935 }
3936}
3937
3938
3939
3940
3941
3942/* The rest of the code provides a regex.c compatable interface. */
3943
3944
3945__const__ char *re_error_msg[] =
3946{
3947 0, /* REG_NOUT */
3948 "No match", /* REG_NOMATCH */
3949 "Invalid regular expression", /* REG_BADPAT */
3950 "Invalid collation character", /* REG_ECOLLATE */
3951 "Invalid character class name", /* REG_ECTYPE */
3952 "Trailing backslash", /* REG_EESCAPE */
3953 "Invalid back reference", /* REG_ESUBREG */
3954 "Unmatched [ or [^", /* REG_EBRACK */
3955 "Unmatched ( or \\(", /* REG_EPAREN */
3956 "Unmatched \\{", /* REG_EBRACE */
3957 "Invalid content of \\{\\}", /* REG_BADBR */
3958 "Invalid range end", /* REG_ERANGE */
3959 "Memory exhausted", /* REG_ESPACE */
3960 "Invalid preceding regular expression", /* REG_BADRPT */
3961 "Premature end of regular expression", /* REG_EEND */
3962 "Regular expression too big", /* REG_ESIZE */
3963 "Unmatched ) or \\)", /* REG_ERPAREN */
3964};
3965
3966
3967
3968
3969/*
3970 * Macros used while compiling patterns.
3971 *
3972 * By convention, PEND points just past the end of the uncompiled pattern,
3973 * P points to the read position in the pattern. `translate' is the name
3974 * of the translation table (`TRANSLATE' is the name of a macro that looks
3975 * things up in `translate').
3976 */
3977
3978
3979/*
3980 * Fetch the next character in the uncompiled pattern---translating it
3981 * if necessary. *Also cast from a signed character in the constant
3982 * string passed to us by the user to an unsigned char that we can use
3983 * as an array index (in, e.g., `translate').
3984 */
3985#define PATFETCH(c) \
3986 do {if (p == pend) return REG_EEND; \
3987 c = (unsigned char) *p++; \
3988 c = translate[c]; \
3989 } while (0)
3990
3991/*
3992 * Fetch the next character in the uncompiled pattern, with no
3993 * translation.
3994 */
3995#define PATFETCH_RAW(c) \
3996 do {if (p == pend) return REG_EEND; \
3997 c = (unsigned char) *p++; \
3998 } while (0)
3999
4000/* Go backwards one character in the pattern. */
4001#define PATUNFETCH p--
4002
4003
4004#define TRANSLATE(d) translate[(unsigned char) (d)]
4005
4006typedef unsigned regnum_t;
4007
4008/* Since offsets can go either forwards or backwards, this type needs to
4009 * be able to hold values from -(MAX_BUF_SIZE - 1) to MAX_BUF_SIZE - 1.
4010 */
4011typedef int pattern_offset_t;
4012
4013typedef struct
4014{
4015 struct rexp_node ** top_expression; /* was begalt */
4016 struct rexp_node ** last_expression; /* was laststart */
4017 pattern_offset_t inner_group_offset;
4018 regnum_t regnum;
4019} compile_stack_elt_t;
4020
4021typedef struct
4022{
4023 compile_stack_elt_t *stack;
4024 unsigned size;
4025 unsigned avail; /* Offset of next open position. */
4026} compile_stack_type;
4027
4028
4029#define INIT_COMPILE_STACK_SIZE 32
4030
4031#define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
4032#define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
4033
4034/* The next available element. */
4035#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
4036
4037
4038/* Set the bit for character C in a list. */
4039#define SET_LIST_BIT(c) \
4040 (b[((unsigned char) (c)) / CHARBITS] \
4041 |= 1 << (((unsigned char) c) % CHARBITS))
4042
4043/* Get the next unsigned number in the uncompiled pattern. */
4044#define GET_UNSIGNED_NUMBER(num) \
4045 { if (p != pend) \
4046 { \
4047 PATFETCH (c); \
4048 while (isdigit (c)) \
4049 { \
4050 if (num < 0) \
4051 num = 0; \
4052 num = num * 10 + c - '0'; \
4053 if (p == pend) \
4054 break; \
4055 PATFETCH (c); \
4056 } \
4057 } \
4058 }
4059
4060#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
4061
4062#define IS_CHAR_CLASS(string) \
4063 (!strcmp (string, "alpha") || !strcmp (string, "upper") \
4064 || !strcmp (string, "lower") || !strcmp (string, "digit") \
4065 || !strcmp (string, "alnum") || !strcmp (string, "xdigit") \
4066 || !strcmp (string, "space") || !strcmp (string, "print") \
4067 || !strcmp (string, "punct") || !strcmp (string, "graph") \
4068 || !strcmp (string, "cntrl") || !strcmp (string, "blank"))
4069
4070
4071
4072/* These predicates are used in regex_compile. */
4073
4074/* P points to just after a ^ in PATTERN. Return true if that ^ comes
4075 * after an alternative or a begin-subexpression. We assume there is at
4076 * least one character before the ^.
4077 */
4078
4079#ifdef __STDC__
4080static boolean
4081at_begline_loc_p (__const__ char *pattern, __const__ char * p, reg_syntax_t syntax)
4082#else
4083static boolean
4084at_begline_loc_p (pattern, p, syntax)
4085 __const__ char *pattern;
4086 __const__ char * p;
4087 reg_syntax_t syntax;
4088#endif
4089{
4090 __const__ char *prev = p - 2;
4091 boolean prev_prev_backslash = ((prev > pattern) && (prev[-1] == '\\'));
4092
4093 return
4094
4095 (/* After a subexpression? */
4096 ((*prev == '(') && ((syntax & RE_NO_BK_PARENS) || prev_prev_backslash))
4097 ||
4098 /* After an alternative? */
4099 ((*prev == '|') && ((syntax & RE_NO_BK_VBAR) || prev_prev_backslash))
4100 );
4101}
4102
4103/* The dual of at_begline_loc_p. This one is for $. We assume there is
4104 * at least one character after the $, i.e., `P < PEND'.
4105 */
4106
4107#ifdef __STDC__
4108static boolean
4109at_endline_loc_p (__const__ char *p, __const__ char *pend, int syntax)
4110#else
4111static boolean
4112at_endline_loc_p (p, pend, syntax)
4113 __const__ char *p;
4114 __const__ char *pend;
4115 int syntax;
4116#endif
4117{
4118 __const__ char *next = p;
4119 boolean next_backslash = (*next == '\\');
4120 __const__ char *next_next = (p + 1 < pend) ? (p + 1) : 0;
4121
4122 return
4123 (
4124 /* Before a subexpression? */
4125 ((syntax & RE_NO_BK_PARENS)
4126 ? (*next == ')')
4127 : (next_backslash && next_next && (*next_next == ')')))
4128 ||
4129 /* Before an alternative? */
4130 ((syntax & RE_NO_BK_VBAR)
4131 ? (*next == '|')
4132 : (next_backslash && next_next && (*next_next == '|')))
4133 );
4134}
4135
4136
4137
4138unsigned char rx_id_translation[256] =
4139{
4140 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
4141 10, 11, 12, 13, 14, 15, 16, 17, 18, 19,
4142 20, 21, 22, 23, 24, 25, 26, 27, 28, 29,
4143 30, 31, 32, 33, 34, 35, 36, 37, 38, 39,
4144 40, 41, 42, 43, 44, 45, 46, 47, 48, 49,
4145 50, 51, 52, 53, 54, 55, 56, 57, 58, 59,
4146 60, 61, 62, 63, 64, 65, 66, 67, 68, 69,
4147 70, 71, 72, 73, 74, 75, 76, 77, 78, 79,
4148 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
4149 90, 91, 92, 93, 94, 95, 96, 97, 98, 99,
4150
4151 100, 101, 102, 103, 104, 105, 106, 107, 108, 109,
4152 110, 111, 112, 113, 114, 115, 116, 117, 118, 119,
4153 120, 121, 122, 123, 124, 125, 126, 127, 128, 129,
4154 130, 131, 132, 133, 134, 135, 136, 137, 138, 139,
4155 140, 141, 142, 143, 144, 145, 146, 147, 148, 149,
4156 150, 151, 152, 153, 154, 155, 156, 157, 158, 159,
4157 160, 161, 162, 163, 164, 165, 166, 167, 168, 169,
4158 170, 171, 172, 173, 174, 175, 176, 177, 178, 179,
4159 180, 181, 182, 183, 184, 185, 186, 187, 188, 189,
4160 190, 191, 192, 193, 194, 195, 196, 197, 198, 199,
4161
4162 200, 201, 202, 203, 204, 205, 206, 207, 208, 209,
4163 210, 211, 212, 213, 214, 215, 216, 217, 218, 219,
4164 220, 221, 222, 223, 224, 225, 226, 227, 228, 229,
4165 230, 231, 232, 233, 234, 235, 236, 237, 238, 239,
4166 240, 241, 242, 243, 244, 245, 246, 247, 248, 249,
4167 250, 251, 252, 253, 254, 255
4168};
4169
4170/* The compiler keeps an inverted translation table.
4171 * This looks up/inititalize elements.
4172 * VALID is an array of booleans that validate CACHE.
4173 */
4174
4175#ifdef __STDC__
4176static rx_Bitset
4177inverse_translation (struct re_pattern_buffer * rxb,
4178 char * valid, rx_Bitset cache,
4179 unsigned char * translate, int c)
4180#else
4181static rx_Bitset
4182inverse_translation (rxb, valid, cache, translate, c)
4183 struct re_pattern_buffer * rxb;
4184 char * valid;
4185 rx_Bitset cache;
4186 unsigned char * translate;
4187 int c;
4188#endif
4189{
4190 rx_Bitset cs
4191 = cache + c * rx_bitset_numb_subsets (rxb->rx.local_cset_size);
4192
4193 if (!valid[c])
4194 {
4195 int x;
4196 int c_tr = TRANSLATE(c);
4197 rx_bitset_null (rxb->rx.local_cset_size, cs);
4198 for (x = 0; x < 256; ++x) /* &&&& 13.37 */
4199 if (TRANSLATE(x) == c_tr)
4200 RX_bitset_enjoin (cs, x);
4201 valid[c] = 1;
4202 }
4203 return cs;
4204}
4205
4206
4207
4208
4209
4210/* More subroutine declarations and macros for regex_compile. */
4211
4212/* Returns true if REGNUM is in one of COMPILE_STACK's elements and
4213 false if it's not. */
4214
4215#ifdef __STDC__
4216static boolean
4217group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
4218#else
4219static boolean
4220group_in_compile_stack (compile_stack, regnum)
4221 compile_stack_type compile_stack;
4222 regnum_t regnum;
4223#endif
4224{
4225 int this_element;
4226
4227 for (this_element = compile_stack.avail - 1;
4228 this_element >= 0;
4229 this_element--)
4230 if (compile_stack.stack[this_element].regnum == regnum)
4231 return true;
4232
4233 return false;
4234}
4235
4236
4237/*
4238 * Read the ending character of a range (in a bracket expression) from the
4239 * uncompiled pattern *P_PTR (which ends at PEND). We assume the
4240 * starting character is in `P[-2]'. (`P[-1]' is the character `-'.)
4241 * Then we set the translation of all bits between the starting and
4242 * ending characters (inclusive) in the compiled pattern B.
4243 *
4244 * Return an error code.
4245 *
4246 * We use these short variable names so we can use the same macros as
4247 * `regex_compile' itself.
4248 */
4249
4250#ifdef __STDC__
4251static reg_errcode_t
4252compile_range (struct re_pattern_buffer * rxb, rx_Bitset cs,
4253 __const__ char ** p_ptr, __const__ char * pend,
4254 unsigned char * translate, reg_syntax_t syntax,
4255 rx_Bitset inv_tr, char * valid_inv_tr)
4256#else
4257static reg_errcode_t
4258compile_range (rxb, cs, p_ptr, pend, translate, syntax, inv_tr, valid_inv_tr)
4259 struct re_pattern_buffer * rxb;
4260 rx_Bitset cs;
4261 __const__ char ** p_ptr;
4262 __const__ char * pend;
4263 unsigned char * translate;
4264 reg_syntax_t syntax;
4265 rx_Bitset inv_tr;
4266 char * valid_inv_tr;
4267#endif
4268{
4269 unsigned this_char;
4270
4271 __const__ char *p = *p_ptr;
4272
4273 unsigned char range_end;
4274 unsigned char range_start = TRANSLATE(p[-2]);
4275
4276 if (p == pend)
4277 return REG_ERANGE;
4278
4279 PATFETCH (range_end);
4280
4281 (*p_ptr)++;
4282
4283 if (range_start > range_end)
4284 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
4285
4286 for (this_char = range_start; this_char <= range_end; this_char++)
4287 {
4288 rx_Bitset it =
4289 inverse_translation (rxb, valid_inv_tr, inv_tr, translate, this_char);
4290 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
4291 }
4292
4293 return REG_NOERROR;
4294}
4295
4296
4297
4298/* This searches a regexp for backreference side effects.
4299 * It fills in the array OUT with 1 at the index of every register pair
4300 * referenced by a backreference.
4301 *
4302 * This is used to help optimize patterns for searching. The information is
4303 * useful because, if the caller doesn't want register values, backreferenced
4304 * registers are the only registers for which we need rx_backtrack.
4305 */
4306
4307#ifdef __STDC__
4308static void
4309find_backrefs (char * out, struct rexp_node * rexp,
4310 struct re_se_params * params)
4311#else
4312static void
4313find_backrefs (out, rexp, params)
4314 char * out;
4315 struct rexp_node * rexp;
4316 struct re_se_params * params;
4317#endif
4318{
4319 if (rexp)
4320 switch (rexp->type)
4321 {
4322 case r_cset:
4323 case r_data:
4324 return;
4325 case r_alternate:
4326 case r_concat:
4327 case r_opt:
4328 case r_star:
4329 case r_2phase_star:
4330 find_backrefs (out, rexp->params.pair.left, params);
4331 find_backrefs (out, rexp->params.pair.right, params);
4332 return;
4333 case r_side_effect:
4334 if ( ((long)rexp->params.side_effect >= 0)
4335 && (params [(long)rexp->params.side_effect].se == re_se_backref))
4336 out[ params [(long)rexp->params.side_effect].op1] = 1;
4337 return;
4338 }
4339}
4340
4341
4342
4343
4344/* Returns 0 unless the pattern can match the empty string. */
4345
4346#ifdef __STDC__
4347static int
4348compute_fastset (struct re_pattern_buffer * rxb, struct rexp_node * rexp)
4349#else
4350static int
4351compute_fastset (rxb, rexp)
4352 struct re_pattern_buffer * rxb;
4353 struct rexp_node * rexp;
4354#endif
4355{
4356 if (!rexp)
4357 return 1;
4358 switch (rexp->type)
4359 {
4360 case r_data:
4361 return 1;
4362 case r_cset:
4363 {
4364 rx_bitset_union (rxb->rx.local_cset_size,
4365 rxb->fastset, rexp->params.cset);
4366 }
4367 return 0;
4368 case r_concat:
4369 return (compute_fastset (rxb, rexp->params.pair.left)
4370 && compute_fastset (rxb, rexp->params.pair.right));
4371 case r_2phase_star:
4372 compute_fastset (rxb, rexp->params.pair.left);
4373 /* compute_fastset (rxb, rexp->params.pair.right); nope... */
4374 return 1;
4375 case r_alternate:
4376 return !!(compute_fastset (rxb, rexp->params.pair.left)
4377 + compute_fastset (rxb, rexp->params.pair.right));
4378 case r_opt:
4379 case r_star:
4380 compute_fastset (rxb, rexp->params.pair.left);
4381 return 1;
4382 case r_side_effect:
4383 return 1;
4384 }
4385
4386 /* this should never happen */
4387 return 0;
4388}
4389
4390
4391
4392/* returns
4393 * 1 -- yes, definately anchored by the given side effect.
4394 * 2 -- maybe anchored, maybe the empty string.
4395 * 0 -- definately not anchored
4396 * There is simply no other possibility.
4397 */
4398
4399#ifdef __STDC__
4400static int
4401is_anchored (struct rexp_node * rexp, rx_side_effect se)
4402#else
4403static int
4404is_anchored (rexp, se)
4405 struct rexp_node * rexp;
4406 rx_side_effect se;
4407#endif
4408{
4409 if (!rexp)
4410 return 2;
4411 switch (rexp->type)
4412 {
4413 case r_cset:
4414 case r_data:
4415 return 0;
4416 case r_concat:
4417 case r_2phase_star:
4418 {
4419 int l = is_anchored (rexp->params.pair.left, se);
4420 return (l == 2 ? is_anchored (rexp->params.pair.right, se) : l);
4421 }
4422 case r_alternate:
4423 {
4424 int l = is_anchored (rexp->params.pair.left, se);
4425 int r = l ? is_anchored (rexp->params.pair.right, se) : 0;
4426
4427 if (l == r)
4428 return l;
4429 else if ((l == 0) || (r == 0))
4430 return 0;
4431 else
4432 return 2;
4433 }
4434 case r_opt:
4435 case r_star:
4436 return is_anchored (rexp->params.pair.left, se) ? 2 : 0;
4437
4438 case r_side_effect:
4439 return ((rexp->params.side_effect == se)
4440 ? 1 : 2);
4441 }
4442
4443 /* this should never happen */
4444 return 0;
4445}
4446
4447
4448
4449/* This removes register assignments that aren't required by backreferencing.
4450 * This can speed up explore_future, especially if it eliminates
4451 * non-determinism in the superstate NFA.
4452 *
4453 * NEEDED is an array of characters, presumably filled in by FIND_BACKREFS.
4454 * The non-zero elements of the array indicate which register assignments
4455 * can NOT be removed from the expression.
4456 */
4457
4458#ifdef __STDC__
4459static struct rexp_node *
4460remove_unecessary_side_effects (struct rx * rx, char * needed,
4461 struct rexp_node * rexp,
4462 struct re_se_params * params)
4463#else
4464static struct rexp_node *
4465remove_unecessary_side_effects (rx, needed, rexp, params)
4466 struct rx * rx;
4467 char * needed;
4468 struct rexp_node * rexp;
4469 struct re_se_params * params;
4470#endif
4471{
4472 struct rexp_node * l;
4473 struct rexp_node * r;
4474 if (!rexp)
4475 return 0;
4476 else
4477 switch (rexp->type)
4478 {
4479 case r_cset:
4480 case r_data:
4481 return rexp;
4482 case r_alternate:
4483 case r_concat:
4484 case r_2phase_star:
4485 l = remove_unecessary_side_effects (rx, needed,
4486 rexp->params.pair.left, params);
4487 r = remove_unecessary_side_effects (rx, needed,
4488 rexp->params.pair.right, params);
4489 if ((l && r) || (rexp->type != r_concat))
4490 {
4491 rexp->params.pair.left = l;
4492 rexp->params.pair.right = r;
4493 return rexp;
4494 }
4495 else
4496 {
4497 rexp->params.pair.left = rexp->params.pair.right = 0;
4498 rx_free_rexp (rx, rexp);
4499 return l ? l : r;
4500 }
4501 case r_opt:
4502 case r_star:
4503 l = remove_unecessary_side_effects (rx, needed,
4504 rexp->params.pair.left, params);
4505 if (l)
4506 {
4507 rexp->params.pair.left = l;
4508 return rexp;
4509 }
4510 else
4511 {
4512 rexp->params.pair.left = 0;
4513 rx_free_rexp (rx, rexp);
4514 return 0;
4515 }
4516 case r_side_effect:
4517 {
4518 int se = (long)rexp->params.side_effect;
4519 if ( (se >= 0)
4520 && ( ((enum re_side_effects)params[se].se == re_se_lparen)
4521 || ((enum re_side_effects)params[se].se == re_se_rparen))
4522 && (params [se].op1 > 0)
4523 && (!needed [params [se].op1]))
4524 {
4525 rx_free_rexp (rx, rexp);
4526 return 0;
4527 }
4528 else
4529 return rexp;
4530 }
4531 }
4532
4533 /* this should never happen */
4534 return 0;
4535}
4536
4537
4538
4539
4540#ifdef __STDC__
4541static int
4542pointless_if_repeated (struct rexp_node * node, struct re_se_params * params)
4543#else
4544static int
4545pointless_if_repeated (node, params)
4546 struct rexp_node * node;
4547 struct re_se_params * params;
4548#endif
4549{
4550 if (!node)
4551 return 1;
4552 switch (node->type)
4553 {
4554 case r_cset:
4555 return 0;
4556 case r_alternate:
4557 case r_concat:
4558 case r_2phase_star:
4559 return (pointless_if_repeated (node->params.pair.left, params)
4560 && pointless_if_repeated (node->params.pair.right, params));
4561 case r_opt:
4562 case r_star:
4563 return pointless_if_repeated (node->params.pair.left, params);
4564 case r_side_effect:
4565 switch ((enum re_side_effects) (((long)node->params.side_effect < 0)
4566 ? (enum re_side_effects) (long) node->params.side_effect
4567 : (enum re_side_effects)params[(long)node->params.side_effect].se))
4568 {
4569 case re_se_try:
4570 case re_se_at_dot:
4571 case re_se_begbuf:
4572 case re_se_hat:
4573 case re_se_wordbeg:
4574 case re_se_wordbound:
4575 case re_se_notwordbound:
4576 case re_se_wordend:
4577 case re_se_endbuf:
4578 case re_se_dollar:
4579 case re_se_fail:
4580 case re_se_win:
4581 return 1;
4582 case re_se_lparen:
4583 case re_se_rparen:
4584 case re_se_iter:
4585 case re_se_end_iter:
4586 case re_se_syntax:
4587 case re_se_not_syntax:
4588 case re_se_backref:
4589 return 0;
4590 }
4591 case r_data:
4592 default:
4593 return 0;
4594 }
4595}
4596
4597
4598
4599
4600#ifdef __STDC__
4601static int
4602registers_on_stack (struct re_pattern_buffer * rxb,
4603 struct rexp_node * rexp, int in_danger,
4604 struct re_se_params * params)
4605#else
4606static int
4607registers_on_stack (rxb, rexp, in_danger, params)
4608 struct re_pattern_buffer * rxb;
4609 struct rexp_node * rexp;
4610 int in_danger;
4611 struct re_se_params * params;
4612#endif
4613{
4614 if (!rexp)
4615 return 0;
4616 else
4617 switch (rexp->type)
4618 {
4619 case r_cset:
4620 case r_data:
4621 return 0;
4622 case r_alternate:
4623 case r_concat:
4624 return ( registers_on_stack (rxb, rexp->params.pair.left,
4625 in_danger, params)
4626 || (registers_on_stack
4627 (rxb, rexp->params.pair.right,
4628 in_danger, params)));
4629 case r_opt:
4630 return registers_on_stack (rxb, rexp->params.pair.left, 0, params);
4631 case r_star:
4632 return registers_on_stack (rxb, rexp->params.pair.left, 1, params);
4633 case r_2phase_star:
4634 return
4635 ( registers_on_stack (rxb, rexp->params.pair.left, 1, params)
4636 || registers_on_stack (rxb, rexp->params.pair.right, 1, params));
4637 case r_side_effect:
4638 {
4639 int se = (long)rexp->params.side_effect;
4640 if ( in_danger
4641 && (se >= 0)
4642 && (params [se].op1 > 0)
4643 && ( ((enum re_side_effects)params[se].se == re_se_lparen)
4644 || ((enum re_side_effects)params[se].se == re_se_rparen)))
4645 return 1;
4646 else
4647 return 0;
4648 }
4649 }
4650
4651 /* this should never happen */
4652 return 0;
4653}
4654
4655
4656
4657
4658static char idempotent_complex_se[] =
4659{
4660#define RX_WANT_SE_DEFS 1
4661#undef RX_DEF_SE
4662#undef RX_DEF_CPLX_SE
4663#define RX_DEF_SE(IDEM, NAME, VALUE)
4664#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE) IDEM,
4665#include "rx.h"
4666#undef RX_DEF_SE
4667#undef RX_DEF_CPLX_SE
4668#undef RX_WANT_SE_DEFS
4669 23
4670};
4671
4672static char idempotent_se[] =
4673{
4674 13,
4675#define RX_WANT_SE_DEFS 1
4676#undef RX_DEF_SE
4677#undef RX_DEF_CPLX_SE
4678#define RX_DEF_SE(IDEM, NAME, VALUE) IDEM,
4679#define RX_DEF_CPLX_SE(IDEM, NAME, VALUE)
4680#include "rx.h"
4681#undef RX_DEF_SE
4682#undef RX_DEF_CPLX_SE
4683#undef RX_WANT_SE_DEFS
4684 42
4685};
4686
4687
4688
4689
4690
4691#ifdef __STDC__
4692static int
4693has_any_se (struct rx * rx,
4694 struct rexp_node * rexp)
4695#else
4696static int
4697has_any_se (rx, rexp)
4698 struct rx * rx;
4699 struct rexp_node * rexp;
4700#endif
4701{
4702 if (!rexp)
4703 return 0;
4704
4705 switch (rexp->type)
4706 {
4707 case r_cset:
4708 case r_data:
4709 return 0;
4710
4711 case r_side_effect:
4712 return 1;
4713
4714 case r_2phase_star:
4715 case r_concat:
4716 case r_alternate:
4717 return
4718 ( has_any_se (rx, rexp->params.pair.left)
4719 || has_any_se (rx, rexp->params.pair.right));
4720
4721 case r_opt:
4722 case r_star:
4723 return has_any_se (rx, rexp->params.pair.left);
4724 }
4725
4726 /* this should never happen */
4727 return 0;
4728}
4729
4730
4731
4732
4733/* This must be called AFTER `convert_hard_loops' for a given REXP. */
4734#ifdef __STDC__
4735static int
4736has_non_idempotent_epsilon_path (struct rx * rx,
4737 struct rexp_node * rexp,
4738 struct re_se_params * params)
4739#else
4740static int
4741has_non_idempotent_epsilon_path (rx, rexp, params)
4742 struct rx * rx;
4743 struct rexp_node * rexp;
4744 struct re_se_params * params;
4745#endif
4746{
4747 if (!rexp)
4748 return 0;
4749
4750 switch (rexp->type)
4751 {
4752 case r_cset:
4753 case r_data:
4754 case r_star:
4755 return 0;
4756
4757 case r_side_effect:
4758 return
4759 !((long)rexp->params.side_effect > 0
4760 ? idempotent_complex_se [ params [(long)rexp->params.side_effect].se ]
4761 : idempotent_se [-(long)rexp->params.side_effect]);
4762
4763 case r_alternate:
4764 return
4765 ( has_non_idempotent_epsilon_path (rx,
4766 rexp->params.pair.left, params)
4767 || has_non_idempotent_epsilon_path (rx,
4768 rexp->params.pair.right, params));
4769
4770 case r_2phase_star:
4771 case r_concat:
4772 return
4773 ( has_non_idempotent_epsilon_path (rx,
4774 rexp->params.pair.left, params)
4775 && has_non_idempotent_epsilon_path (rx,
4776 rexp->params.pair.right, params));
4777
4778 case r_opt:
4779 return has_non_idempotent_epsilon_path (rx,
4780 rexp->params.pair.left, params);
4781 }
4782
4783 /* this should never happen */
4784 return 0;
4785}
4786
4787
4788
4789
4790/* This computes rougly what it's name suggests. It can (and does) go wrong
4791 * in the direction of returning spurious 0 without causing disasters.
4792 */
4793#ifdef __STDC__
4794static int
4795begins_with_complex_se (struct rx * rx, struct rexp_node * rexp)
4796#else
4797static int
4798begins_with_complex_se (rx, rexp)
4799 struct rx * rx;
4800 struct rexp_node * rexp;
4801#endif
4802{
4803 if (!rexp)
4804 return 0;
4805
4806 switch (rexp->type)
4807 {
4808 case r_cset:
4809 case r_data:
4810 return 0;
4811
4812 case r_side_effect:
4813 return ((long)rexp->params.side_effect >= 0);
4814
4815 case r_alternate:
4816 return
4817 ( begins_with_complex_se (rx, rexp->params.pair.left)
4818 && begins_with_complex_se (rx, rexp->params.pair.right));
4819
4820
4821 case r_concat:
4822 return has_any_se (rx, rexp->params.pair.left);
4823 case r_opt:
4824 case r_star:
4825 case r_2phase_star:
4826 return 0;
4827 }
4828
4829 /* this should never happen */
4830 return 0;
4831}
4832
4833
4834
4835/* This destructively removes some of the re_se_tv side effects from
4836 * a rexp tree. In particular, during parsing re_se_tv was inserted on the
4837 * right half of every | to guarantee that posix path preference could be
4838 * honored. This function removes some which it can be determined aren't
4839 * needed.
4840 */
4841
4842#ifdef __STDC__
4843static void
4844speed_up_alt (struct rx * rx,
4845 struct rexp_node * rexp,
4846 int unposix)
4847#else
4848static void
4849speed_up_alt (rx, rexp, unposix)
4850 struct rx * rx;
4851 struct rexp_node * rexp;
4852 int unposix;
4853#endif
4854{
4855 if (!rexp)
4856 return;
4857
4858 switch (rexp->type)
4859 {
4860 case r_cset:
4861 case r_data:
4862 case r_side_effect:
4863 return;
4864
4865 case r_opt:
4866 case r_star:
4867 speed_up_alt (rx, rexp->params.pair.left, unposix);
4868 return;
4869
4870 case r_2phase_star:
4871 case r_concat:
4872 speed_up_alt (rx, rexp->params.pair.left, unposix);
4873 speed_up_alt (rx, rexp->params.pair.right, unposix);
4874 return;
4875
4876 case r_alternate:
4877 /* the right child is guaranteed to be (concat re_se_tv <subexp>) */
4878
4879 speed_up_alt (rx, rexp->params.pair.left, unposix);
4880 speed_up_alt (rx, rexp->params.pair.right->params.pair.right, unposix);
4881
4882 if ( unposix
4883 || (begins_with_complex_se
4884 (rx, rexp->params.pair.right->params.pair.right))
4885 || !( has_any_se (rx, rexp->params.pair.right->params.pair.right)
4886 || has_any_se (rx, rexp->params.pair.left)))
4887 {
4888 struct rexp_node * conc = rexp->params.pair.right;
4889 rexp->params.pair.right = conc->params.pair.right;
4890 conc->params.pair.right = 0;
4891 rx_free_rexp (rx, conc);
4892 }
4893 }
4894}
4895
4896
4897
4898
4899
4900
4901/* `regex_compile' compiles PATTERN (of length SIZE) according to SYNTAX.
4902 Returns one of error codes defined in `regex.h', or zero for success.
4903
4904 Assumes the `allocated' (and perhaps `buffer') and `translate'
4905 fields are set in BUFP on entry.
4906
4907 If it succeeds, results are put in BUFP (if it returns an error, the
4908 contents of BUFP are undefined):
4909 `buffer' is the compiled pattern;
4910 `syntax' is set to SYNTAX;
4911 `used' is set to the length of the compiled pattern;
4912 `fastmap_accurate' is set to zero;
4913 `re_nsub' is set to the number of groups in PATTERN;
4914 `not_bol' and `not_eol' are set to zero.
4915
4916 The `fastmap' and `newline_anchor' fields are neither
4917 examined nor set. */
4918
4919
4920
4921#ifdef __STDC__
4922RX_DECL reg_errcode_t
4923rx_compile (__const__ char *pattern, int size,
4924 reg_syntax_t syntax,
4925 struct re_pattern_buffer * rxb)
4926#else
4927RX_DECL reg_errcode_t
4928rx_compile (pattern, size, syntax, rxb)
4929 __const__ char *pattern;
4930 int size;
4931 reg_syntax_t syntax;
4932 struct re_pattern_buffer * rxb;
4933#endif
4934{
4935 RX_subset
4936 inverse_translate [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
4937 char
4938 validate_inv_tr [CHAR_SET_SIZE * rx_bitset_numb_subsets(CHAR_SET_SIZE)];
4939
4940 /* We fetch characters from PATTERN here. Even though PATTERN is
4941 `char *' (i.e., signed), we declare these variables as unsigned, so
4942 they can be reliably used as array indices. */
4943 register unsigned char c, c1;
4944
4945 /* A random tempory spot in PATTERN. */
4946 __const__ char *p1;
4947
4948 /* Keeps track of unclosed groups. */
4949 compile_stack_type compile_stack;
4950
4951 /* Points to the current (ending) position in the pattern. */
4952 __const__ char *p = pattern;
4953 __const__ char *pend = pattern + size;
4954
4955 /* How to translate the characters in the pattern. */
4956 unsigned char *translate = (rxb->translate
4957 ? rxb->translate
4958 : rx_id_translation);
4959
4960 /* When parsing is done, this will hold the expression tree. */
4961 struct rexp_node * rexp = 0;
4962
4963 /* In the midst of compilation, this holds onto the regexp
4964 * first parst while rexp goes on to aquire additional constructs.
4965 */
4966 struct rexp_node * orig_rexp = 0;
4967 struct rexp_node * fewer_side_effects = 0;
4968
4969 /* This and top_expression are saved on the compile stack. */
4970 struct rexp_node ** top_expression = &rexp;
4971 struct rexp_node ** last_expression = top_expression;
4972
4973 /* Parameter to `goto append_node' */
4974 struct rexp_node * append;
4975
4976 /* Counts open-groups as they are encountered. This is the index of the
4977 * innermost group being compiled.
4978 */
4979 regnum_t regnum = 0;
4980
4981 /* Place in the uncompiled pattern (i.e., the {) to
4982 * which to go back if the interval is invalid.
4983 */
4984 __const__ char *beg_interval;
4985
4986 struct re_se_params * params = 0;
4987 int paramc = 0; /* How many complex side effects so far? */
4988
4989 rx_side_effect side; /* param to `goto add_side_effect' */
4990
4991 bzero (validate_inv_tr, sizeof (validate_inv_tr));
4992
4993 rxb->rx.instruction_table = rx_id_instruction_table;
4994
4995
4996 /* Initialize the compile stack. */
4997 compile_stack.stack = (( compile_stack_elt_t *) malloc ((INIT_COMPILE_STACK_SIZE) * sizeof ( compile_stack_elt_t)));
4998 if (compile_stack.stack == 0)
4999 return REG_ESPACE;
5000
5001 compile_stack.size = INIT_COMPILE_STACK_SIZE;
5002 compile_stack.avail = 0;
5003
5004 /* Initialize the pattern buffer. */
5005 rxb->rx.cache = &default_cache;
5006 rxb->syntax = syntax;
5007 rxb->fastmap_accurate = 0;
5008 rxb->not_bol = rxb->not_eol = 0;
5009 rxb->least_subs = 0;
5010
5011 /* Always count groups, whether or not rxb->no_sub is set.
5012 * The whole pattern is implicitly group 0, so counting begins
5013 * with 1.
5014 */
5015 rxb->re_nsub = 0;
5016
5017#if !defined (emacs) && !defined (SYNTAX_TABLE)
5018 /* Initialize the syntax table. */
5019 init_syntax_once ();
5020#endif
5021
5022 /* Loop through the uncompiled pattern until we're at the end. */
5023 while (p != pend)
5024 {
5025 PATFETCH (c);
5026
5027 switch (c)
5028 {
5029 case '^':
5030 {
5031 if ( /* If at start of pattern, it's an operator. */
5032 p == pattern + 1
5033 /* If context independent, it's an operator. */
5034 || syntax & RE_CONTEXT_INDEP_ANCHORS
5035 /* Otherwise, depends on what's come before. */
5036 || at_begline_loc_p (pattern, p, syntax))
5037 {
5038 struct rexp_node * n
5039 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_hat);
5040 if (!n)
5041 return REG_ESPACE;
5042 append = n;
5043 goto append_node;
5044 }
5045 else
5046 goto normal_char;
5047 }
5048 break;
5049
5050
5051 case '$':
5052 {
5053 if ( /* If at end of pattern, it's an operator. */
5054 p == pend
5055 /* If context independent, it's an operator. */
5056 || syntax & RE_CONTEXT_INDEP_ANCHORS
5057 /* Otherwise, depends on what's next. */
5058 || at_endline_loc_p (p, pend, syntax))
5059 {
5060 struct rexp_node * n
5061 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)re_se_dollar);
5062 if (!n)
5063 return REG_ESPACE;
5064 append = n;
5065 goto append_node;
5066 }
5067 else
5068 goto normal_char;
5069 }
5070 break;
5071
5072
5073 case '+':
5074 case '?':
5075 if ((syntax & RE_BK_PLUS_QM)
5076 || (syntax & RE_LIMITED_OPS))
5077 goto normal_char;
5078
5079 handle_plus:
5080 case '*':
5081 /* If there is no previous pattern... */
5082 if (pointless_if_repeated (*last_expression, params))
5083 {
5084 if (syntax & RE_CONTEXT_INVALID_OPS)
5085 return REG_BADRPT;
5086 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
5087 goto normal_char;
5088 }
5089
5090 {
5091 /* 1 means zero (many) matches is allowed. */
5092 char zero_times_ok = 0, many_times_ok = 0;
5093
5094 /* If there is a sequence of repetition chars, collapse it
5095 down to just one (the right one). We can't combine
5096 interval operators with these because of, e.g., `a{2}*',
5097 which should only match an even number of `a's. */
5098
5099 for (;;)
5100 {
5101 zero_times_ok |= c != '+';
5102 many_times_ok |= c != '?';
5103
5104 if (p == pend)
5105 break;
5106
5107 PATFETCH (c);
5108
5109 if (c == '*'
5110 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
5111 ;
5112
5113 else if (syntax & RE_BK_PLUS_QM && c == '\\')
5114 {
5115 if (p == pend) return REG_EESCAPE;
5116
5117 PATFETCH (c1);
5118 if (!(c1 == '+' || c1 == '?'))
5119 {
5120 PATUNFETCH;
5121 PATUNFETCH;
5122 break;
5123 }
5124
5125 c = c1;
5126 }
5127 else
5128 {
5129 PATUNFETCH;
5130 break;
5131 }
5132
5133 /* If we get here, we found another repeat character. */
5134 }
5135
5136 /* Star, etc. applied to an empty pattern is equivalent
5137 to an empty pattern. */
5138 if (!last_expression)
5139 break;
5140
5141 /* Now we know whether or not zero matches is allowed
5142 * and also whether or not two or more matches is allowed.
5143 */
5144
5145 {
5146 struct rexp_node * inner_exp = *last_expression;
5147 int need_sync = 0;
5148
5149 if (many_times_ok
5150 && has_non_idempotent_epsilon_path (&rxb->rx,
5151 inner_exp, params))
5152 {
5153 struct rexp_node * pusher
5154 = rx_mk_r_side_effect (&rxb->rx,
5155 (rx_side_effect)re_se_pushpos);
5156 struct rexp_node * checker
5157 = rx_mk_r_side_effect (&rxb->rx,
5158 (rx_side_effect)re_se_chkpos);
5159 struct rexp_node * pushback
5160 = rx_mk_r_side_effect (&rxb->rx,
5161 (rx_side_effect)re_se_pushback);
5162 rx_Bitset cs = rx_cset (&rxb->rx);
5163 struct rexp_node * lit_t = rx_mk_r_cset (&rxb->rx, cs);
5164 struct rexp_node * fake_state
5165 = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
5166 struct rexp_node * phase2
5167 = rx_mk_r_concat (&rxb->rx, checker, fake_state);
5168 struct rexp_node * popper
5169 = rx_mk_r_side_effect (&rxb->rx,
5170 (rx_side_effect)re_se_poppos);
5171 struct rexp_node * star
5172 = rx_mk_r_2phase_star (&rxb->rx, inner_exp, phase2);
5173 struct rexp_node * a
5174 = rx_mk_r_concat (&rxb->rx, pusher, star);
5175 struct rexp_node * whole_thing
5176 = rx_mk_r_concat (&rxb->rx, a, popper);
5177 if (!(pusher && star && pushback && lit_t && fake_state
5178 && lit_t && phase2 && checker && popper
5179 && a && whole_thing))
5180 return REG_ESPACE;
5181 RX_bitset_enjoin (cs, 't');
5182 *last_expression = whole_thing;
5183 }
5184 else
5185 {
5186 struct rexp_node * star =
5187 (many_times_ok ? rx_mk_r_star : rx_mk_r_opt)
5188 (&rxb->rx, *last_expression);
5189 if (!star)
5190 return REG_ESPACE;
5191 *last_expression = star;
5192 need_sync = has_any_se (&rxb->rx, *last_expression);
5193 }
5194 if (!zero_times_ok)
5195 {
5196 struct rexp_node * concat
5197 = rx_mk_r_concat (&rxb->rx, inner_exp,
5198 rx_copy_rexp (&rxb->rx,
5199 *last_expression));
5200 if (!concat)
5201 return REG_ESPACE;
5202 *last_expression = concat;
5203 }
5204 if (need_sync)
5205 {
5206 int sync_se = paramc;
5207 params = (params
5208 ? ((struct re_se_params *)
5209 realloc (params,
5210 sizeof (*params) * (1 + paramc)))
5211 : ((struct re_se_params *)
5212 malloc (sizeof (*params))));
5213 if (!params)
5214 return REG_ESPACE;
5215 ++paramc;
5216 params [sync_se].se = re_se_tv;
5217 side = (rx_side_effect)sync_se;
5218 goto add_side_effect;
5219 }
5220 }
5221 /* The old regex.c used to optimize `.*\n'.
5222 * Maybe rx should too?
5223 */
5224 }
5225 break;
5226
5227
5228 case '.':
5229 {
5230 rx_Bitset cs = rx_cset (&rxb->rx);
5231 struct rexp_node * n = rx_mk_r_cset (&rxb->rx, cs);
5232 if (!(cs && n))
5233 return REG_ESPACE;
5234
5235 rx_bitset_universe (rxb->rx.local_cset_size, cs);
5236 if (!(rxb->syntax & RE_DOT_NEWLINE))
5237 RX_bitset_remove (cs, '\n');
5238 if (!(rxb->syntax & RE_DOT_NOT_NULL))
5239 RX_bitset_remove (cs, 0);
5240
5241 append = n;
5242 goto append_node;
5243 break;
5244 }
5245
5246
5247 case '[':
5248 if (p == pend) return REG_EBRACK;
5249 {
5250 boolean had_char_class = false;
5251 rx_Bitset cs = rx_cset (&rxb->rx);
5252 struct rexp_node * node = rx_mk_r_cset (&rxb->rx, cs);
5253 int is_inverted = *p == '^';
5254
5255 if (!(node && cs))
5256 return REG_ESPACE;
5257
5258 /* This branch of the switch is normally exited with
5259 *`goto append_node'
5260 */
5261 append = node;
5262
5263 if (is_inverted)
5264 p++;
5265
5266 /* Remember the first position in the bracket expression. */
5267 p1 = p;
5268
5269 /* Read in characters and ranges, setting map bits. */
5270 for (;;)
5271 {
5272 if (p == pend) return REG_EBRACK;
5273
5274 PATFETCH (c);
5275
5276 /* \ might escape characters inside [...] and [^...]. */
5277 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
5278 {
5279 if (p == pend) return REG_EESCAPE;
5280
5281 PATFETCH (c1);
5282 {
5283 rx_Bitset it = inverse_translation (rxb,
5284 validate_inv_tr,
5285 inverse_translate,
5286 translate,
5287 c1);
5288 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
5289 }
5290 continue;
5291 }
5292
5293 /* Could be the end of the bracket expression. If it's
5294 not (i.e., when the bracket expression is `[]' so
5295 far), the ']' character bit gets set way below. */
5296 if (c == ']' && p != p1 + 1)
5297 goto finalize_class_and_append;
5298
5299 /* Look ahead to see if it's a range when the last thing
5300 was a character class. */
5301 if (had_char_class && c == '-' && *p != ']')
5302 return REG_ERANGE;
5303
5304 /* Look ahead to see if it's a range when the last thing
5305 was a character: if this is a hyphen not at the
5306 beginning or the end of a list, then it's the range
5307 operator. */
5308 if (c == '-'
5309 && !(p - 2 >= pattern && p[-2] == '[')
5310 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
5311 && *p != ']')
5312 {
5313 reg_errcode_t ret
5314 = compile_range (rxb, cs, &p, pend, translate, syntax,
5315 inverse_translate, validate_inv_tr);
5316 if (ret != REG_NOERROR) return ret;
5317 }
5318
5319 else if (p[0] == '-' && p[1] != ']')
5320 { /* This handles ranges made up of characters only. */
5321 reg_errcode_t ret;
5322
5323 /* Move past the `-'. */
5324 PATFETCH (c1);
5325
5326 ret = compile_range (rxb, cs, &p, pend, translate, syntax,
5327 inverse_translate, validate_inv_tr);
5328 if (ret != REG_NOERROR) return ret;
5329 }
5330
5331 /* See if we're at the beginning of a possible character
5332 class. */
5333
5334 else if ((syntax & RE_CHAR_CLASSES)
5335 && (c == '[') && (*p == ':'))
5336 {
5337 char str[CHAR_CLASS_MAX_LENGTH + 1];
5338
5339 PATFETCH (c);
5340 c1 = 0;
5341
5342 /* If pattern is `[[:'. */
5343 if (p == pend) return REG_EBRACK;
5344
5345 for (;;)
5346 {
5347 PATFETCH (c);
5348 if (c == ':' || c == ']' || p == pend
5349 || c1 == CHAR_CLASS_MAX_LENGTH)
5350 break;
5351 str[c1++] = c;
5352 }
5353 str[c1] = '\0';
5354
5355 /* If isn't a word bracketed by `[:' and:`]':
5356 undo the ending character, the letters, and leave
5357 the leading `:' and `[' (but set bits for them). */
5358 if (c == ':' && *p == ']')
5359 {
5360 int ch;
5361 boolean is_alnum = !strcmp (str, "alnum");
5362 boolean is_alpha = !strcmp (str, "alpha");
5363 boolean is_blank = !strcmp (str, "blank");
5364 boolean is_cntrl = !strcmp (str, "cntrl");
5365 boolean is_digit = !strcmp (str, "digit");
5366 boolean is_graph = !strcmp (str, "graph");
5367 boolean is_lower = !strcmp (str, "lower");
5368 boolean is_print = !strcmp (str, "print");
5369 boolean is_punct = !strcmp (str, "punct");
5370 boolean is_space = !strcmp (str, "space");
5371 boolean is_upper = !strcmp (str, "upper");
5372 boolean is_xdigit = !strcmp (str, "xdigit");
5373
5374 if (!IS_CHAR_CLASS (str)) return REG_ECTYPE;
5375
5376 /* Throw away the ] at the end of the character
5377 class. */
5378 PATFETCH (c);
5379
5380 if (p == pend) return REG_EBRACK;
5381
5382 for (ch = 0; ch < 1 << CHARBITS; ch++)
5383 {
5384 if ( (is_alnum && isalnum (ch))
5385 || (is_alpha && isalpha (ch))
5386 || (is_blank && isblank (ch))
5387 || (is_cntrl && iscntrl (ch))
5388 || (is_digit && isdigit (ch))
5389 || (is_graph && isgraph (ch))
5390 || (is_lower && islower (ch))
5391 || (is_print && isprint (ch))
5392 || (is_punct && ispunct (ch))
5393 || (is_space && isspace (ch))
5394 || (is_upper && isupper (ch))
5395 || (is_xdigit && isxdigit (ch)))
5396 {
5397 rx_Bitset it =
5398 inverse_translation (rxb,
5399 validate_inv_tr,
5400 inverse_translate,
5401 translate,
5402 ch);
5403 rx_bitset_union (rxb->rx.local_cset_size,
5404 cs, it);
5405 }
5406 }
5407 had_char_class = true;
5408 }
5409 else
5410 {
5411 c1++;
5412 while (c1--)
5413 PATUNFETCH;
5414 {
5415 rx_Bitset it =
5416 inverse_translation (rxb,
5417 validate_inv_tr,
5418 inverse_translate,
5419 translate,
5420 '[');
5421 rx_bitset_union (rxb->rx.local_cset_size,
5422 cs, it);
5423 }
5424 {
5425 rx_Bitset it =
5426 inverse_translation (rxb,
5427 validate_inv_tr,
5428 inverse_translate,
5429 translate,
5430 ':');
5431 rx_bitset_union (rxb->rx.local_cset_size,
5432 cs, it);
5433 }
5434 had_char_class = false;
5435 }
5436 }
5437 else
5438 {
5439 had_char_class = false;
5440 {
5441 rx_Bitset it = inverse_translation (rxb,
5442 validate_inv_tr,
5443 inverse_translate,
5444 translate,
5445 c);
5446 rx_bitset_union (rxb->rx.local_cset_size, cs, it);
5447 }
5448 }
5449 }
5450
5451 finalize_class_and_append:
5452 if (is_inverted)
5453 {
5454 rx_bitset_complement (rxb->rx.local_cset_size, cs);
5455 if (syntax & RE_HAT_LISTS_NOT_NEWLINE)
5456 RX_bitset_remove (cs, '\n');
5457 }
5458 goto append_node;
5459 }
5460 break;
5461
5462
5463 case '(':
5464 if (syntax & RE_NO_BK_PARENS)
5465 goto handle_open;
5466 else
5467 goto normal_char;
5468
5469
5470 case ')':
5471 if (syntax & RE_NO_BK_PARENS)
5472 goto handle_close;
5473 else
5474 goto normal_char;
5475
5476
5477 case '\n':
5478 if (syntax & RE_NEWLINE_ALT)
5479 goto handle_alt;
5480 else
5481 goto normal_char;
5482
5483
5484 case '|':
5485 if (syntax & RE_NO_BK_VBAR)
5486 goto handle_alt;
5487 else
5488 goto normal_char;
5489
5490
5491 case '{':
5492 if ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5493 goto handle_interval;
5494 else
5495 goto normal_char;
5496
5497
5498 case '\\':
5499 if (p == pend) return REG_EESCAPE;
5500
5501 /* Do not translate the character after the \, so that we can
5502 distinguish, e.g., \B from \b, even if we normally would
5503 translate, e.g., B to b. */
5504 PATFETCH_RAW (c);
5505
5506 switch (c)
5507 {
5508 case '(':
5509 if (syntax & RE_NO_BK_PARENS)
5510 goto normal_backslash;
5511
5512 handle_open:
5513 rxb->re_nsub++;
5514 regnum++;
5515 if (COMPILE_STACK_FULL)
5516 {
5517 ((compile_stack.stack) =
5518 (compile_stack_elt_t *) realloc (compile_stack.stack, ( compile_stack.size << 1) * sizeof (
5519 compile_stack_elt_t)));
5520 if (compile_stack.stack == 0) return REG_ESPACE;
5521
5522 compile_stack.size <<= 1;
5523 }
5524
5525 if (*last_expression)
5526 {
5527 struct rexp_node * concat
5528 = rx_mk_r_concat (&rxb->rx, *last_expression, 0);
5529 if (!concat)
5530 return REG_ESPACE;
5531 *last_expression = concat;
5532 last_expression = &concat->params.pair.right;
5533 }
5534
5535 /*
5536 * These are the values to restore when we hit end of this
5537 * group.
5538 */
5539 COMPILE_STACK_TOP.top_expression = top_expression;
5540 COMPILE_STACK_TOP.last_expression = last_expression;
5541 COMPILE_STACK_TOP.regnum = regnum;
5542
5543 compile_stack.avail++;
5544
5545 top_expression = last_expression;
5546 break;
5547
5548
5549 case ')':
5550 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
5551
5552 handle_close:
5553 /* See similar code for backslashed left paren above. */
5554 if (COMPILE_STACK_EMPTY)
5555 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
5556 goto normal_char;
5557 else
5558 return REG_ERPAREN;
5559
5560 /* Since we just checked for an empty stack above, this
5561 ``can't happen''. */
5562
5563 {
5564 /* We don't just want to restore into `regnum', because
5565 later groups should continue to be numbered higher,
5566 as in `(ab)c(de)' -- the second group is #2. */
5567 regnum_t this_group_regnum;
5568 struct rexp_node ** inner = top_expression;
5569
5570 compile_stack.avail--;
5571 top_expression = COMPILE_STACK_TOP.top_expression;
5572 last_expression = COMPILE_STACK_TOP.last_expression;
5573 this_group_regnum = COMPILE_STACK_TOP.regnum;
5574 {
5575 int left_se = paramc;
5576 int right_se = paramc + 1;
5577
5578 params = (params
5579 ? ((struct re_se_params *)
5580 realloc (params,
5581 (paramc + 2) * sizeof (params[0])))
5582 : ((struct re_se_params *)
5583 malloc (2 * sizeof (params[0]))));
5584 if (!params)
5585 return REG_ESPACE;
5586 paramc += 2;
5587
5588 params[left_se].se = re_se_lparen;
5589 params[left_se].op1 = this_group_regnum;
5590 params[right_se].se = re_se_rparen;
5591 params[right_se].op1 = this_group_regnum;
5592 {
5593 struct rexp_node * left
5594 = rx_mk_r_side_effect (&rxb->rx,
5595 (rx_side_effect)left_se);
5596 struct rexp_node * right
5597 = rx_mk_r_side_effect (&rxb->rx,
5598 (rx_side_effect)right_se);
5599 struct rexp_node * c1
5600 = (*inner
5601 ? rx_mk_r_concat (&rxb->rx, left, *inner) : left);
5602 struct rexp_node * c2
5603 = rx_mk_r_concat (&rxb->rx, c1, right);
5604 if (!(left && right && c1 && c2))
5605 return REG_ESPACE;
5606 *inner = c2;
5607 }
5608 }
5609 break;
5610 }
5611
5612 case '|': /* `\|'. */
5613 if ((syntax & RE_LIMITED_OPS) || (syntax & RE_NO_BK_VBAR))
5614 goto normal_backslash;
5615 handle_alt:
5616 if (syntax & RE_LIMITED_OPS)
5617 goto normal_char;
5618
5619 {
5620 struct rexp_node * alt
5621 = rx_mk_r_alternate (&rxb->rx, *top_expression, 0);
5622 if (!alt)
5623 return REG_ESPACE;
5624 *top_expression = alt;
5625 last_expression = &alt->params.pair.right;
5626 {
5627 int sync_se = paramc;
5628
5629 params = (params
5630 ? ((struct re_se_params *)
5631 realloc (params,
5632 (paramc + 1) * sizeof (params[0])))
5633 : ((struct re_se_params *)
5634 malloc (sizeof (params[0]))));
5635 if (!params)
5636 return REG_ESPACE;
5637 ++paramc;
5638
5639 params[sync_se].se = re_se_tv;
5640 {
5641 struct rexp_node * sync
5642 = rx_mk_r_side_effect (&rxb->rx,
5643 (rx_side_effect)sync_se);
5644 struct rexp_node * conc
5645 = rx_mk_r_concat (&rxb->rx, sync, 0);
5646
5647 if (!sync || !conc)
5648 return REG_ESPACE;
5649
5650 *last_expression = conc;
5651 last_expression = &conc->params.pair.right;
5652 }
5653 }
5654 }
5655 break;
5656
5657
5658 case '{':
5659 /* If \{ is a literal. */
5660 if (!(syntax & RE_INTERVALS)
5661 /* If we're at `\{' and it's not the open-interval
5662 operator. */
5663 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
5664 || (p - 2 == pattern && p == pend))
5665 goto normal_backslash;
5666
5667 handle_interval:
5668 {
5669 /* If got here, then the syntax allows intervals. */
5670
5671 /* At least (most) this many matches must be made. */
5672 int lower_bound = -1, upper_bound = -1;
5673
5674 beg_interval = p - 1;
5675
5676 if (p == pend)
5677 {
5678 if (syntax & RE_NO_BK_BRACES)
5679 goto unfetch_interval;
5680 else
5681 return REG_EBRACE;
5682 }
5683
5684 GET_UNSIGNED_NUMBER (lower_bound);
5685
5686 if (c == ',')
5687 {
5688 GET_UNSIGNED_NUMBER (upper_bound);
5689 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
5690 }
5691 else
5692 /* Interval such as `{1}' => match exactly once. */
5693 upper_bound = lower_bound;
5694
5695 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
5696 || lower_bound > upper_bound)
5697 {
5698 if (syntax & RE_NO_BK_BRACES)
5699 goto unfetch_interval;
5700 else
5701 return REG_BADBR;
5702 }
5703
5704 if (!(syntax & RE_NO_BK_BRACES))
5705 {
5706 if (c != '\\') return REG_EBRACE;
5707 PATFETCH (c);
5708 }
5709
5710 if (c != '}')
5711 {
5712 if (syntax & RE_NO_BK_BRACES)
5713 goto unfetch_interval;
5714 else
5715 return REG_BADBR;
5716 }
5717
5718 /* We just parsed a valid interval. */
5719
5720 /* If it's invalid to have no preceding re. */
5721 if (pointless_if_repeated (*last_expression, params))
5722 {
5723 if (syntax & RE_CONTEXT_INVALID_OPS)
5724 return REG_BADRPT;
5725 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
5726 goto unfetch_interval;
5727 /* was: else laststart = b; */
5728 }
5729
5730 /* If the upper bound is zero, don't want to iterate
5731 * at all.
5732 */
5733 if (upper_bound == 0)
5734 {
5735 if (*last_expression)
5736 {
5737 rx_free_rexp (&rxb->rx, *last_expression);
5738 *last_expression = 0;
5739 }
5740 }
5741 else
5742 /* Otherwise, we have a nontrivial interval. */
5743 {
5744 int iter_se = paramc;
5745 int end_se = paramc + 1;
5746 params = (params
5747 ? ((struct re_se_params *)
5748 realloc (params,
5749 sizeof (*params) * (2 + paramc)))
5750 : ((struct re_se_params *)
5751 malloc (2 * sizeof (*params))));
5752 if (!params)
5753 return REG_ESPACE;
5754 paramc += 2;
5755 params [iter_se].se = re_se_iter;
5756 params [iter_se].op1 = lower_bound;
5757 params[iter_se].op2 = upper_bound;
5758
5759 params[end_se].se = re_se_end_iter;
5760 params[end_se].op1 = lower_bound;
5761 params[end_se].op2 = upper_bound;
5762 {
5763 struct rexp_node * push0
5764 = rx_mk_r_side_effect (&rxb->rx,
5765 (rx_side_effect)re_se_push0);
5766 struct rexp_node * start_one_iter
5767 = rx_mk_r_side_effect (&rxb->rx,
5768 (rx_side_effect)iter_se);
5769 struct rexp_node * phase1
5770 = rx_mk_r_concat (&rxb->rx, start_one_iter,
5771 *last_expression);
5772 struct rexp_node * pushback
5773 = rx_mk_r_side_effect (&rxb->rx,
5774 (rx_side_effect)re_se_pushback);
5775 rx_Bitset cs = rx_cset (&rxb->rx);
5776 struct rexp_node * lit_t
5777 = rx_mk_r_cset (&rxb->rx, cs);
5778 struct rexp_node * phase2
5779 = rx_mk_r_concat (&rxb->rx, pushback, lit_t);
5780 struct rexp_node * loop
5781 = rx_mk_r_2phase_star (&rxb->rx, phase1, phase2);
5782 struct rexp_node * push_n_loop
5783 = rx_mk_r_concat (&rxb->rx, push0, loop);
5784 struct rexp_node * final_test
5785 = rx_mk_r_side_effect (&rxb->rx,
5786 (rx_side_effect)end_se);
5787 struct rexp_node * full_exp
5788 = rx_mk_r_concat (&rxb->rx, push_n_loop, final_test);
5789
5790 if (!(push0 && start_one_iter && phase1
5791 && pushback && lit_t && phase2
5792 && loop && push_n_loop && final_test && full_exp))
5793 return REG_ESPACE;
5794
5795 RX_bitset_enjoin(cs, 't');
5796
5797 *last_expression = full_exp;
5798 }
5799 }
5800 beg_interval = 0;
5801 }
5802 break;
5803
5804 unfetch_interval:
5805 /* If an invalid interval, match the characters as literals. */
5806 p = beg_interval;
5807 beg_interval = 0;
5808
5809 /* normal_char and normal_backslash need `c'. */
5810 PATFETCH (c);
5811
5812 if (!(syntax & RE_NO_BK_BRACES))
5813 {
5814 if (p > pattern && p[-1] == '\\')
5815 goto normal_backslash;
5816 }
5817 goto normal_char;
5818
5819#ifdef emacs
5820 /* There is no way to specify the before_dot and after_dot
5821 operators. rms says this is ok. --karl */
5822 case '=':
5823 side = (rx_side_effect)rx_se_at_dot;
5824 goto add_side_effect;
5825 break;
5826
5827 case 's':
5828 case 'S':
5829 {
5830 rx_Bitset cs = rx_cset (&rxb->rx);
5831 struct rexp_node * set = rx_mk_r_cset (&rxb->rx, cs);
5832 if (!(cs && set))
5833 return REG_ESPACE;
5834 if (c == 'S')
5835 rx_bitset_universe (rxb->rx.local_cset_size, cs);
5836
5837 PATFETCH (c);
5838 {
5839 int x;
5840 enum syntaxcode code = syntax_spec_code [c];
5841 for (x = 0; x < 256; ++x)
5842 {
5843
5844 if (SYNTAX (x) == code)
5845 {
5846 rx_Bitset it =
5847 inverse_translation (rxb, validate_inv_tr,
5848 inverse_translate,
5849 translate, x);
5850 rx_bitset_xor (rxb->rx.local_cset_size, cs, it);
5851 }
5852 }
5853 }
5854 append = set;
5855 goto append_node;
5856 }
5857 break;
5858#endif /* emacs */
5859
5860
5861 case 'w':
5862 case 'W':
5863 {
5864 rx_Bitset cs = rx_cset (&rxb->rx);
5865 struct rexp_node * n = (cs ? rx_mk_r_cset (&rxb->rx, cs) : 0);
5866 if (!(cs && n))
5867 return REG_ESPACE;
5868 if (c == 'W')
5869 rx_bitset_universe (rxb->rx.local_cset_size ,cs);
5870 {
5871 int x;
5872 for (x = rxb->rx.local_cset_size - 1; x > 0; --x)
5873 if (SYNTAX(x) & Sword)
5874 RX_bitset_toggle (cs, x);
5875 }
5876 append = n;
5877 goto append_node;
5878 }
5879 break;
5880
5881/* With a little extra work, some of these side effects could be optimized
5882 * away (basicly by looking at what we already know about the surrounding
5883 * chars).
5884 */
5885 case '<':
5886 side = (rx_side_effect)re_se_wordbeg;
5887 goto add_side_effect;
5888 break;
5889
5890 case '>':
5891 side = (rx_side_effect)re_se_wordend;
5892 goto add_side_effect;
5893 break;
5894
5895 case 'b':
5896 side = (rx_side_effect)re_se_wordbound;
5897 goto add_side_effect;
5898 break;
5899
5900 case 'B':
5901 side = (rx_side_effect)re_se_notwordbound;
5902 goto add_side_effect;
5903 break;
5904
5905 case '`':
5906 side = (rx_side_effect)re_se_begbuf;
5907 goto add_side_effect;
5908 break;
5909
5910 case '\'':
5911 side = (rx_side_effect)re_se_endbuf;
5912 goto add_side_effect;
5913 break;
5914
5915 add_side_effect:
5916 {
5917 struct rexp_node * se
5918 = rx_mk_r_side_effect (&rxb->rx, side);
5919 if (!se)
5920 return REG_ESPACE;
5921 append = se;
5922 goto append_node;
5923 }
5924 break;
5925
5926 case '1': case '2': case '3': case '4': case '5':
5927 case '6': case '7': case '8': case '9':
5928 if (syntax & RE_NO_BK_REFS)
5929 goto normal_char;
5930
5931 c1 = c - '0';
5932
5933 if (c1 > regnum)
5934 return REG_ESUBREG;
5935
5936 /* Can't back reference to a subexpression if inside of it. */
5937 if (group_in_compile_stack (compile_stack, c1))
5938 return REG_ESUBREG;
5939
5940 {
5941 int backref_se = paramc;
5942 params = (params
5943 ? ((struct re_se_params *)
5944 realloc (params,
5945 sizeof (*params) * (1 + paramc)))
5946 : ((struct re_se_params *)
5947 malloc (sizeof (*params))));
5948 if (!params)
5949 return REG_ESPACE;
5950 ++paramc;
5951 params[backref_se].se = re_se_backref;
5952 params[backref_se].op1 = c1;
5953 side = (rx_side_effect)backref_se;
5954 goto add_side_effect;
5955 }
5956 break;
5957
5958 case '+':
5959 case '?':
5960 if (syntax & RE_BK_PLUS_QM)
5961 goto handle_plus;
5962 else
5963 goto normal_backslash;
5964
5965 default:
5966 normal_backslash:
5967 /* You might think it would be useful for \ to mean
5968 not to translate; but if we don't translate it
5969 it will never match anything. */
5970 c = TRANSLATE (c);
5971 goto normal_char;
5972 }
5973 break;
5974
5975
5976 default:
5977 /* Expects the character in `c'. */
5978 normal_char:
5979 {
5980 rx_Bitset cs = rx_cset(&rxb->rx);
5981 struct rexp_node * match = rx_mk_r_cset (&rxb->rx, cs);
5982 rx_Bitset it;
5983 if (!(cs && match))
5984 return REG_ESPACE;
5985 it = inverse_translation (rxb, validate_inv_tr,
5986 inverse_translate, translate, c);
5987 rx_bitset_union (CHAR_SET_SIZE, cs, it);
5988 append = match;
5989
5990 append_node:
5991 /* This genericly appends the rexp APPEND to *LAST_EXPRESSION
5992 * and then parses the next character normally.
5993 */
5994 if (*last_expression)
5995 {
5996 struct rexp_node * concat
5997 = rx_mk_r_concat (&rxb->rx, *last_expression, append);
5998 if (!concat)
5999 return REG_ESPACE;
6000 *last_expression = concat;
6001 last_expression = &concat->params.pair.right;
6002 }
6003 else
6004 *last_expression = append;
6005 }
6006 } /* switch (c) */
6007 } /* while p != pend */
6008
6009
6010 {
6011 int win_se = paramc;
6012 params = (params
6013 ? ((struct re_se_params *)
6014 realloc (params,
6015 sizeof (*params) * (1 + paramc)))
6016 : ((struct re_se_params *)
6017 malloc (sizeof (*params))));
6018 if (!params)
6019 return REG_ESPACE;
6020 ++paramc;
6021 params[win_se].se = re_se_win;
6022 {
6023 struct rexp_node * se
6024 = rx_mk_r_side_effect (&rxb->rx, (rx_side_effect)win_se);
6025 struct rexp_node * concat
6026 = rx_mk_r_concat (&rxb->rx, rexp, se);
6027 if (!(se && concat))
6028 return REG_ESPACE;
6029 rexp = concat;
6030 }
6031 }
6032
6033
6034 /* Through the pattern now. */
6035
6036 if (!COMPILE_STACK_EMPTY)
6037 return REG_EPAREN;
6038
6039 free (compile_stack.stack);
6040
6041 orig_rexp = rexp;
6042#ifdef RX_DEBUG
6043 if (rx_debug_compile)
6044 {
6045 dbug_rxb = rxb;
6046 fputs ("\n\nCompiling ", stdout);
6047 fwrite (pattern, 1, size, stdout);
6048 fputs (":\n", stdout);
6049 rxb->se_params = params;
6050 print_rexp (&rxb->rx, orig_rexp, 2, re_seprint, stdout);
6051 }
6052#endif
6053 {
6054 rx_Bitset cs = rx_cset(&rxb->rx);
6055 rx_Bitset cs2 = rx_cset(&rxb->rx);
6056#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6057 char * se_map = (char *) _alloca (paramc);
6058#else
6059 char * se_map = (char *) alloca (paramc);
6060#endif
6061 struct rexp_node * new_rexp = 0;
6062
6063
6064 bzero (se_map, paramc);
6065 find_backrefs (se_map, rexp, params);
6066 fewer_side_effects =
6067 remove_unecessary_side_effects (&rxb->rx, se_map,
6068 rx_copy_rexp (&rxb->rx, rexp), params);
6069
6070 speed_up_alt (&rxb->rx, rexp, 0);
6071 speed_up_alt (&rxb->rx, fewer_side_effects, 1);
6072
6073 {
6074 char * syntax_parens = rxb->syntax_parens;
6075 if (syntax_parens == (char *)0x1)
6076 rexp = remove_unecessary_side_effects
6077 (&rxb->rx, se_map, rexp, params);
6078 else if (syntax_parens)
6079 {
6080 int x;
6081 for (x = 0; x < paramc; ++x)
6082 if (( (params[x].se == re_se_lparen)
6083 || (params[x].se == re_se_rparen))
6084 && (!syntax_parens [params[x].op1]))
6085 se_map [x] = 1;
6086 rexp = remove_unecessary_side_effects
6087 (&rxb->rx, se_map, rexp, params);
6088 }
6089 }
6090
6091 /* At least one more optimization would be nice to have here but i ran out
6092 * of time. The idea would be to delay side effects.
6093 * For examle, `(abc)' is the same thing as `abc()' except that the
6094 * left paren is offset by 3 (which we know at compile time).
6095 * (In this comment, write that second pattern `abc(:3:)'
6096 * where `(:3:' is a syntactic unit.)
6097 *
6098 * Trickier: `(abc|defg)' is the same as `(abc(:3:|defg(:4:))'
6099 * (The paren nesting may be hard to follow -- that's an alternation
6100 * of `abc(:3:' and `defg(:4:' inside (purely syntactic) parens
6101 * followed by the closing paren from the original expression.)
6102 *
6103 * Neither the expression tree representation nor the the nfa make
6104 * this very easy to write. :(
6105 */
6106
6107 /* What we compile is different than what the parser returns.
6108 * Suppose the parser returns expression R.
6109 * Let R' be R with unnecessary register assignments removed
6110 * (see REMOVE_UNECESSARY_SIDE_EFFECTS, above).
6111 *
6112 * What we will compile is the expression:
6113 *
6114 * m{try}R{win}\|s{try}R'{win}
6115 *
6116 * {try} and {win} denote side effect epsilons (see EXPLORE_FUTURE).
6117 *
6118 * When trying a match, we insert an `m' at the beginning of the
6119 * string if the user wants registers to be filled, `s' if not.
6120 */
6121 new_rexp =
6122 rx_mk_r_alternate
6123 (&rxb->rx,
6124 rx_mk_r_concat (&rxb->rx, rx_mk_r_cset (&rxb->rx, cs2), rexp),
6125 rx_mk_r_concat (&rxb->rx,
6126 rx_mk_r_cset (&rxb->rx, cs), fewer_side_effects));
6127
6128 if (!(new_rexp && cs && cs2))
6129 return REG_ESPACE;
6130 RX_bitset_enjoin (cs2, '\0'); /* prefixed to the rexp used for matching. */
6131 RX_bitset_enjoin (cs, '\1'); /* prefixed to the rexp used for searching. */
6132 rexp = new_rexp;
6133 }
6134
6135#ifdef RX_DEBUG
6136 if (rx_debug_compile)
6137 {
6138 fputs ("\n...which is compiled as:\n", stdout);
6139 print_rexp (&rxb->rx, rexp, 2, re_seprint, stdout);
6140 }
6141#endif
6142 {
6143 struct rx_nfa_state *start = 0;
6144 struct rx_nfa_state *end = 0;
6145
6146 if (!rx_build_nfa (&rxb->rx, rexp, &start, &end))
6147 return REG_ESPACE; /* */
6148 else
6149 {
6150 void * mem = (void *)rxb->buffer;
6151 unsigned long size = rxb->allocated;
6152 int start_id;
6153 char * perm_mem;
6154 int iterator_size = paramc * sizeof (params[0]);
6155
6156 end->is_final = 1;
6157 start->is_start = 1;
6158 rx_name_nfa_states (&rxb->rx);
6159 start_id = start->id;
6160#ifdef RX_DEBUG
6161 if (rx_debug_compile)
6162 {
6163 fputs ("...giving the NFA: \n", stdout);
6164 dbug_rxb = rxb;
6165 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6166 }
6167#endif
6168 if (!rx_eclose_nfa (&rxb->rx))
6169 return REG_ESPACE;
6170 else
6171 {
6172 rx_delete_epsilon_transitions (&rxb->rx);
6173
6174 /* For compatability reasons, we need to shove the
6175 * compiled nfa into one chunk of malloced memory.
6176 */
6177 rxb->rx.reserved = ( sizeof (params[0]) * paramc
6178 + rx_sizeof_bitset (rxb->rx.local_cset_size));
6179#ifdef RX_DEBUG
6180 if (rx_debug_compile)
6181 {
6182 dbug_rxb = rxb;
6183 fputs ("...which cooks down (uncompactified) to: \n", stdout);
6184 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6185 }
6186#endif
6187 if (!rx_compactify_nfa (&rxb->rx, &mem, &size))
6188 return REG_ESPACE;
6189 rxb->buffer = mem;
6190 rxb->allocated = size;
6191 rxb->rx.buffer = mem;
6192 rxb->rx.allocated = size;
6193 perm_mem = ((char *)rxb->rx.buffer
6194 + rxb->rx.allocated - rxb->rx.reserved);
6195 rxb->se_params = ((struct re_se_params *)perm_mem);
6196 bcopy (params, rxb->se_params, iterator_size);
6197 perm_mem += iterator_size;
6198 rxb->fastset = (rx_Bitset) perm_mem;
6199 rxb->start = rx_id_to_nfa_state (&rxb->rx, start_id);
6200 }
6201 rx_bitset_null (rxb->rx.local_cset_size, rxb->fastset);
6202 rxb->can_match_empty = compute_fastset (rxb, orig_rexp);
6203 rxb->match_regs_on_stack =
6204 registers_on_stack (rxb, orig_rexp, 0, params);
6205 rxb->search_regs_on_stack =
6206 registers_on_stack (rxb, fewer_side_effects, 0, params);
6207 if (rxb->can_match_empty)
6208 rx_bitset_universe (rxb->rx.local_cset_size, rxb->fastset);
6209 rxb->is_anchored = is_anchored (orig_rexp, (rx_side_effect) re_se_hat);
6210 rxb->begbuf_only = is_anchored (orig_rexp,
6211 (rx_side_effect) re_se_begbuf);
6212 }
6213 rx_free_rexp (&rxb->rx, rexp);
6214 if (params)
6215 free (params);
6216#ifdef RX_DEBUG
6217 if (rx_debug_compile)
6218 {
6219 dbug_rxb = rxb;
6220 fputs ("...which cooks down to: \n", stdout);
6221 print_nfa (&rxb->rx, rxb->rx.nfa_states, re_seprint, stdout);
6222 }
6223#endif
6224 }
6225 return REG_NOERROR;
6226}
6227
6228
6229
6230
6231/* This table gives an error message for each of the error codes listed
6232 in regex.h. Obviously the order here has to be same as there. */
6233
6234__const__ char * rx_error_msg[] =
6235{ 0, /* REG_NOERROR */
6236 "No match", /* REG_NOMATCH */
6237 "Invalid regular expression", /* REG_BADPAT */
6238 "Invalid collation character", /* REG_ECOLLATE */
6239 "Invalid character class name", /* REG_ECTYPE */
6240 "Trailing backslash", /* REG_EESCAPE */
6241 "Invalid back reference", /* REG_ESUBREG */
6242 "Unmatched [ or [^", /* REG_EBRACK */
6243 "Unmatched ( or \\(", /* REG_EPAREN */
6244 "Unmatched \\{", /* REG_EBRACE */
6245 "Invalid content of \\{\\}", /* REG_BADBR */
6246 "Invalid range end", /* REG_ERANGE */
6247 "Memory exhausted", /* REG_ESPACE */
6248 "Invalid preceding regular expression", /* REG_BADRPT */
6249 "Premature end of regular expression", /* REG_EEND */
6250 "Regular expression too big", /* REG_ESIZE */
6251 "Unmatched ) or \\)", /* REG_ERPAREN */
6252};
6253
6254
6255
6256
6257
6258char rx_slowmap [256] =
6259{
6260 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6261 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6262 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6263 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6264 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6265 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6266 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6267 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6268 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6269 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6270 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6271 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6272 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6273 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6274 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6275 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
6276};
6277
6278#ifdef __STDC__
6279RX_DECL void
6280rx_blow_up_fastmap (struct re_pattern_buffer * rxb)
6281#else
6282RX_DECL void
6283rx_blow_up_fastmap (rxb)
6284 struct re_pattern_buffer * rxb;
6285#endif
6286{
6287 int x;
6288 for (x = 0; x < 256; ++x) /* &&&& 3.6 % */
6289 rxb->fastmap [x] = !!RX_bitset_member (rxb->fastset, x);
6290 rxb->fastmap_accurate = 1;
6291}
6292
6293
6294
6295
6296
6297#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
6298#define RE_SEARCH_2_FN inner_re_search_2
6299#define RE_S2_QUAL static
6300#else
6301#define RE_SEARCH_2_FN re_search_2
6302#define RE_S2_QUAL
6303#endif
6304
6305struct re_search_2_closure
6306{
6307 __const__ char * string1;
6308 int size1;
6309 __const__ char * string2;
6310 int size2;
6311};
6312
6313
6314static __inline__ enum rx_get_burst_return
6315re_search_2_get_burst (pos, vclosure, stop)
6316 struct rx_string_position * pos;
6317 void * vclosure;
6318 int stop;
6319{
6320 struct re_search_2_closure * closure;
6321 closure = (struct re_search_2_closure *)vclosure;
6322 if (!closure->string2)
6323 {
6324 int inset;
6325
6326 inset = pos->pos - pos->string;
6327 if ((inset < -1) || (inset > closure->size1))
6328 return rx_get_burst_no_more;
6329 else
6330 {
6331 pos->pos = (__const__ unsigned char *) closure->string1 + inset;
6332 pos->string = (__const__ unsigned char *) closure->string1;
6333 pos->size = closure->size1;
6334 pos->end = ((__const__ unsigned char *)
6335 MIN(closure->string1 + closure->size1,
6336 closure->string1 + stop));
6337 pos->offset = 0;
6338 return ((pos->pos < pos->end)
6339 ? rx_get_burst_ok
6340 : rx_get_burst_no_more);
6341 }
6342 }
6343 else if (!closure->string1)
6344 {
6345 int inset;
6346
6347 inset = pos->pos - pos->string;
6348 pos->pos = (__const__ unsigned char *) closure->string2 + inset;
6349 pos->string = (__const__ unsigned char *) closure->string2;
6350 pos->size = closure->size2;
6351 pos->end = ((__const__ unsigned char *)
6352 MIN(closure->string2 + closure->size2,
6353 closure->string2 + stop));
6354 pos->offset = 0;
6355 return ((pos->pos < pos->end)
6356 ? rx_get_burst_ok
6357 : rx_get_burst_no_more);
6358 }
6359 else
6360 {
6361 int inset;
6362
6363 inset = pos->pos - pos->string + pos->offset;
6364 if (inset < closure->size1)
6365 {
6366 pos->pos = (__const__ unsigned char *) closure->string1 + inset;
6367 pos->string = (__const__ unsigned char *) closure->string1;
6368 pos->size = closure->size1;
6369 pos->end = ((__const__ unsigned char *)
6370 MIN(closure->string1 + closure->size1,
6371 closure->string1 + stop));
6372 pos->offset = 0;
6373 return rx_get_burst_ok;
6374 }
6375 else
6376 {
6377 pos->pos = ((__const__ unsigned char *)
6378 closure->string2 + inset - closure->size1);
6379 pos->string = (__const__ unsigned char *) closure->string2;
6380 pos->size = closure->size2;
6381 pos->end = ((__const__ unsigned char *)
6382 MIN(closure->string2 + closure->size2,
6383 closure->string2 + stop - closure->size1));
6384 pos->offset = closure->size1;
6385 return ((pos->pos < pos->end)
6386 ? rx_get_burst_ok
6387 : rx_get_burst_no_more);
6388 }
6389 }
6390}
6391
6392
6393static __inline__ enum rx_back_check_return
6394re_search_2_back_check (pos, lparen, rparen, translate, vclosure, stop)
6395 struct rx_string_position * pos;
6396 int lparen;
6397 int rparen;
6398 unsigned char * translate;
6399 void * vclosure;
6400 int stop;
6401{
6402 struct rx_string_position there;
6403 struct rx_string_position past;
6404
6405 there = *pos;
6406 there.pos = there.string + lparen - there.offset;
6407 re_search_2_get_burst (&there, vclosure, stop);
6408
6409 past = *pos;
6410 past.pos = past.string + rparen - there.offset;
6411 re_search_2_get_burst (&past, vclosure, stop);
6412
6413 ++pos->pos;
6414 re_search_2_get_burst (pos, vclosure, stop);
6415
6416 while ( (there.pos != past.pos)
6417 && (pos->pos != pos->end))
6418 if (TRANSLATE(*there.pos) != TRANSLATE(*pos->pos))
6419 return rx_back_check_fail;
6420 else
6421 {
6422 ++there.pos;
6423 ++pos->pos;
6424 if (there.pos == there.end)
6425 re_search_2_get_burst (&there, vclosure, stop);
6426 if (pos->pos == pos->end)
6427 re_search_2_get_burst (pos, vclosure, stop);
6428 }
6429
6430 if (there.pos != past.pos)
6431 return rx_back_check_fail;
6432 --pos->pos;
6433 re_search_2_get_burst (pos, vclosure, stop);
6434 return rx_back_check_pass;
6435}
6436
6437static __inline__ int
6438re_search_2_fetch_char (pos, offset, app_closure, stop)
6439 struct rx_string_position * pos;
6440 int offset;
6441 void * app_closure;
6442 int stop;
6443{
6444 struct re_search_2_closure * closure;
6445 closure = (struct re_search_2_closure *)app_closure;
6446 if (offset == 0)
6447 {
6448 if (pos->pos >= pos->string)
6449 return *pos->pos;
6450 else
6451 {
6452 if ( (pos->string == (__const__ unsigned char *) closure->string2)
6453 && (closure->string1)
6454 && (closure->size1))
6455 return closure->string1[closure->size1 - 1];
6456 else
6457 return 0; /* sure, why not. */
6458 }
6459 }
6460 if (pos->pos == pos->end)
6461 return *closure->string2;
6462 else
6463 return pos->pos[1];
6464}
6465
6466
6467#ifdef __STDC__
6468RE_S2_QUAL int
6469RE_SEARCH_2_FN (struct re_pattern_buffer *rxb,
6470 __const__ char * string1, int size1,
6471 __const__ char * string2, int size2,
6472 int startpos, int range,
6473 struct re_registers *regs,
6474 int stop)
6475#else
6476RE_S2_QUAL int
6477RE_SEARCH_2_FN (rxb,
6478 string1, size1, string2, size2, startpos, range, regs, stop)
6479 struct re_pattern_buffer *rxb;
6480 __const__ char * string1;
6481 int size1;
6482 __const__ char * string2;
6483 int size2;
6484 int startpos;
6485 int range;
6486 struct re_registers *regs;
6487 int stop;
6488#endif
6489{
6490 int answer;
6491 struct re_search_2_closure closure;
6492 closure.string1 = string1;
6493 closure.size1 = size1;
6494 closure.string2 = string2;
6495 closure.size2 = size2;
6496 answer = rx_search (rxb, startpos, range, stop, size1 + size2,
6497 re_search_2_get_burst,
6498 re_search_2_back_check,
6499 re_search_2_fetch_char,
6500 (void *)&closure,
6501 regs,
6502 0,
6503 0);
6504 switch (answer)
6505 {
6506 case rx_search_continuation:
6507 abort ();
6508 case rx_search_error:
6509 return -2;
6510 case rx_search_soft_fail:
6511 case rx_search_fail:
6512 return -1;
6513 default:
6514 return answer;
6515 }
6516}
6517
6518/* Export rx_search to callers outside this file. */
6519
6520int
6521re_rx_search (rxb, startpos, range, stop, total_size,
6522 get_burst, back_check, fetch_char,
6523 app_closure, regs, resume_state, save_state)
6524 struct re_pattern_buffer * rxb;
6525 int startpos;
6526 int range;
6527 int stop;
6528 int total_size;
6529 rx_get_burst_fn get_burst;
6530 rx_back_check_fn back_check;
6531 rx_fetch_char_fn fetch_char;
6532 void * app_closure;
6533 struct re_registers * regs;
6534 struct rx_search_state * resume_state;
6535 struct rx_search_state * save_state;
6536{
6537 return rx_search (rxb, startpos, range, stop, total_size,
6538 get_burst, back_check, fetch_char, app_closure,
6539 regs, resume_state, save_state);
6540}
6541
6542#if !defined(REGEX_MALLOC) && !defined(__GNUC__)
6543#ifdef __STDC__
6544int
6545re_search_2 (struct re_pattern_buffer *rxb,
6546 __const__ char * string1, int size1,
6547 __const__ char * string2, int size2,
6548 int startpos, int range,
6549 struct re_registers *regs,
6550 int stop)
6551#else
6552int
6553re_search_2 (rxb, string1, size1, string2, size2, startpos, range, regs, stop)
6554 struct re_pattern_buffer *rxb;
6555 __const__ char * string1;
6556 int size1;
6557 __const__ char * string2;
6558 int size2;
6559 int startpos;
6560 int range;
6561 struct re_registers *regs;
6562 int stop;
6563#endif
6564{
6565 int ret;
6566 ret = inner_re_search_2 (rxb, string1, size1, string2, size2, startpos,
6567 range, regs, stop);
6568#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6569 _alloca (0);
6570#else
6571 alloca (0);
6572#endif
6573 return ret;
6574}
6575#endif
6576
6577
6578/* Like re_search_2, above, but only one string is specified, and
6579 * doesn't let you say where to stop matching.
6580 */
6581
6582#ifdef __STDC__
6583int
6584re_search (struct re_pattern_buffer * rxb, __const__ char *string,
6585 int size, int startpos, int range,
6586 struct re_registers *regs)
6587#else
6588int
6589re_search (rxb, string, size, startpos, range, regs)
6590 struct re_pattern_buffer * rxb;
6591 __const__ char * string;
6592 int size;
6593 int startpos;
6594 int range;
6595 struct re_registers *regs;
6596#endif
6597{
6598 return re_search_2 (rxb, 0, 0, string, size, startpos, range, regs, size);
6599}
6600
6601#ifdef __STDC__
6602int
6603re_match_2 (struct re_pattern_buffer * rxb,
6604 __const__ char * string1, int size1,
6605 __const__ char * string2, int size2,
6606 int pos, struct re_registers *regs, int stop)
6607#else
6608int
6609re_match_2 (rxb, string1, size1, string2, size2, pos, regs, stop)
6610 struct re_pattern_buffer * rxb;
6611 __const__ char * string1;
6612 int size1;
6613 __const__ char * string2;
6614 int size2;
6615 int pos;
6616 struct re_registers *regs;
6617 int stop;
6618#endif
6619{
6620 struct re_registers some_regs;
6621 regoff_t start;
6622 regoff_t end;
6623 int srch;
6624 int save = rxb->regs_allocated;
6625 struct re_registers * regs_to_pass = regs;
6626
6627 if (!regs)
6628 {
6629 some_regs.start = &start;
6630 some_regs.end = &end;
6631 some_regs.num_regs = 1;
6632 regs_to_pass = &some_regs;
6633 rxb->regs_allocated = REGS_FIXED;
6634 }
6635
6636 srch = re_search_2 (rxb, string1, size1, string2, size2,
6637 pos, 1, regs_to_pass, stop);
6638 if (regs_to_pass != regs)
6639 rxb->regs_allocated = save;
6640 if (srch < 0)
6641 return srch;
6642 return regs_to_pass->end[0] - regs_to_pass->start[0];
6643}
6644
6645/* re_match is like re_match_2 except it takes only a single string. */
6646
6647#ifdef __STDC__
6648int
6649re_match (struct re_pattern_buffer * rxb,
6650 __const__ char * string,
6651 int size, int pos,
6652 struct re_registers *regs)
6653#else
6654int
6655re_match (rxb, string, size, pos, regs)
6656 struct re_pattern_buffer * rxb;
6657 __const__ char *string;
6658 int size;
6659 int pos;
6660 struct re_registers *regs;
6661#endif
6662{
6663 return re_match_2 (rxb, string, size, 0, 0, pos, regs, size);
6664}
6665
6666
6667
6668
6669/* Set by `re_set_syntax' to the current regexp syntax to recognize. Can
6670 also be assigned to arbitrarily: each pattern buffer stores its own
6671 syntax, so it can be changed between regex compilations. */
6672reg_syntax_t re_syntax_options = RE_SYNTAX_EMACS;
6673
6674
6675/* Specify the precise syntax of regexps for compilation. This provides
6676 for compatibility for various utilities which historically have
6677 different, incompatible syntaxes.
6678
6679 The argument SYNTAX is a bit mask comprised of the various bits
6680 defined in regex.h. We return the old syntax. */
6681
6682#ifdef __STDC__
6683reg_syntax_t
6684re_set_syntax (reg_syntax_t syntax)
6685#else
6686reg_syntax_t
6687re_set_syntax (syntax)
6688 reg_syntax_t syntax;
6689#endif
6690{
6691 reg_syntax_t ret = re_syntax_options;
6692
6693 re_syntax_options = syntax;
6694 return ret;
6695}
6696
6697
6698
6699/* Set REGS to hold NUM_REGS registers, storing them in STARTS and
6700 ENDS. Subsequent matches using PATTERN_BUFFER and REGS will use
6701 this memory for recording register information. STARTS and ENDS
6702 must be allocated using the malloc library routine, and must each
6703 be at least NUM_REGS * sizeof (regoff_t) bytes long.
6704
6705 If NUM_REGS == 0, then subsequent matches should allocate their own
6706 register data.
6707
6708 Unless this function is called, the first search or match using
6709 PATTERN_BUFFER will allocate its own register data, without
6710 freeing the old data. */
6711
6712#ifdef __STDC__
6713void
6714re_set_registers (struct re_pattern_buffer *bufp,
6715 struct re_registers *regs,
6716 unsigned num_regs,
6717 regoff_t * starts, regoff_t * ends)
6718#else
6719void
6720re_set_registers (bufp, regs, num_regs, starts, ends)
6721 struct re_pattern_buffer *bufp;
6722 struct re_registers *regs;
6723 unsigned num_regs;
6724 regoff_t * starts;
6725 regoff_t * ends;
6726#endif
6727{
6728 if (num_regs)
6729 {
6730 bufp->regs_allocated = REGS_REALLOCATE;
6731 regs->num_regs = num_regs;
6732 regs->start = starts;
6733 regs->end = ends;
6734 }
6735 else
6736 {
6737 bufp->regs_allocated = REGS_UNALLOCATED;
6738 regs->num_regs = 0;
6739 regs->start = regs->end = (regoff_t) 0;
6740 }
6741}
6742
6743
6744
6745
6746
6747#ifdef __STDC__
6748static int
6749cplx_se_sublist_len (struct rx_se_list * list)
6750#else
6751static int
6752cplx_se_sublist_len (list)
6753 struct rx_se_list * list;
6754#endif
6755{
6756 int x = 0;
6757 while (list)
6758 {
6759 if ((long)list->car >= 0)
6760 ++x;
6761 list = list->cdr;
6762 }
6763 return x;
6764}
6765
6766
6767/* For rx->se_list_cmp */
6768
6769#ifdef __STDC__
6770static int
6771posix_se_list_order (struct rx * rx,
6772 struct rx_se_list * a, struct rx_se_list * b)
6773#else
6774static int
6775posix_se_list_order (rx, a, b)
6776 struct rx * rx;
6777 struct rx_se_list * a;
6778 struct rx_se_list * b;
6779#endif
6780{
6781 int al = cplx_se_sublist_len (a);
6782 int bl = cplx_se_sublist_len (b);
6783
6784 if (!al && !bl)
6785 return ((a == b)
6786 ? 0
6787 : ((a < b) ? -1 : 1));
6788
6789 else if (!al)
6790 return -1;
6791
6792 else if (!bl)
6793 return 1;
6794
6795 else
6796 {
6797#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6798 rx_side_effect * av = ((rx_side_effect *)
6799 _alloca (sizeof (rx_side_effect) * (al + 1)));
6800 rx_side_effect * bv = ((rx_side_effect *)
6801 _alloca (sizeof (rx_side_effect) * (bl + 1)));
6802#else
6803 rx_side_effect * av = ((rx_side_effect *)
6804 alloca (sizeof (rx_side_effect) * (al + 1)));
6805 rx_side_effect * bv = ((rx_side_effect *)
6806 alloca (sizeof (rx_side_effect) * (bl + 1)));
6807#endif
6808 struct rx_se_list * ap = a;
6809 struct rx_se_list * bp = b;
6810 int ai, bi;
6811
6812 for (ai = al - 1; ai >= 0; --ai)
6813 {
6814 while ((long)ap->car < 0)
6815 ap = ap->cdr;
6816 av[ai] = ap->car;
6817 ap = ap->cdr;
6818 }
6819 av[al] = (rx_side_effect)-2;
6820 for (bi = bl - 1; bi >= 0; --bi)
6821 {
6822 while ((long)bp->car < 0)
6823 bp = bp->cdr;
6824 bv[bi] = bp->car;
6825 bp = bp->cdr;
6826 }
6827 bv[bl] = (rx_side_effect)-1;
6828
6829 {
6830 int ret;
6831 int x = 0;
6832 while (av[x] == bv[x])
6833 ++x;
6834 ret = (((unsigned *)(av[x]) < (unsigned *)(bv[x])) ? -1 : 1);
6835 return ret;
6836 }
6837 }
6838}
6839
6840
6841
6842
6843
6844/* re_compile_pattern is the GNU regular expression compiler: it
6845 compiles PATTERN (of length SIZE) and puts the result in RXB.
6846 Returns 0 if the pattern was valid, otherwise an error string.
6847
6848 Assumes the `allocated' (and perhaps `buffer') and `translate' fields
6849 are set in RXB on entry.
6850
6851 We call rx_compile to do the actual compilation. */
6852
6853#ifdef __STDC__
6854__const__ char *
6855re_compile_pattern (__const__ char *pattern,
6856 int length,
6857 struct re_pattern_buffer * rxb)
6858#else
6859__const__ char *
6860re_compile_pattern (pattern, length, rxb)
6861 __const__ char *pattern;
6862 int length;
6863 struct re_pattern_buffer * rxb;
6864#endif
6865{
6866 reg_errcode_t ret;
6867
6868 /* GNU code is written to assume at least RE_NREGS registers will be set
6869 (and at least one extra will be -1). */
6870 rxb->regs_allocated = REGS_UNALLOCATED;
6871
6872 /* And GNU code determines whether or not to get register information
6873 by passing null for the REGS argument to re_match, etc., not by
6874 setting no_sub. */
6875 rxb->no_sub = 0;
6876
6877 rxb->rx.local_cset_size = 256;
6878
6879 /* Match anchors at newline. */
6880 rxb->newline_anchor = 1;
6881
6882 rxb->re_nsub = 0;
6883 rxb->start = 0;
6884 rxb->se_params = 0;
6885 rxb->rx.nodec = 0;
6886 rxb->rx.epsnodec = 0;
6887 rxb->rx.instruction_table = 0;
6888 rxb->rx.nfa_states = 0;
6889 rxb->rx.se_list_cmp = posix_se_list_order;
6890 rxb->rx.start_set = 0;
6891
6892 ret = rx_compile (pattern, length, re_syntax_options, rxb);
6893#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6894 _alloca (0);
6895#else
6896 alloca (0);
6897#endif
6898 return rx_error_msg[(int) ret];
6899}
6900
6901
6902
6903#ifdef __STDC__
6904int
6905re_compile_fastmap (struct re_pattern_buffer * rxb)
6906#else
6907int
6908re_compile_fastmap (rxb)
6909 struct re_pattern_buffer * rxb;
6910#endif
6911{
6912 rx_blow_up_fastmap (rxb);
6913 return 0;
6914}
6915
6916
6917
6918
6919
6920/* Entry points compatible with 4.2 BSD regex library. We don't define
6921 them if this is an Emacs or POSIX compilation. */
6922
6923#if (!defined (emacs) && !defined (_POSIX_SOURCE)) || defined(USE_BSD_REGEX)
6924
6925/* BSD has one and only one pattern buffer. */
6926static struct re_pattern_buffer rx_comp_buf;
6927
6928#ifdef __STDC__
6929char *
6930re_comp (__const__ char *s)
6931#else
6932char *
6933re_comp (s)
6934 __const__ char *s;
6935#endif
6936{
6937 reg_errcode_t ret;
6938
6939 if (!s || (*s == '\0'))
6940 {
6941 if (!rx_comp_buf.buffer)
6942 return "No previous regular expression";
6943 return 0;
6944 }
6945
6946 if (!rx_comp_buf.fastmap)
6947 {
6948 rx_comp_buf.fastmap = (char *) malloc (1 << CHARBITS);
6949 if (!rx_comp_buf.fastmap)
6950 return "Memory exhausted";
6951 }
6952
6953 /* Since `rx_exec' always passes NULL for the `regs' argument, we
6954 don't need to initialize the pattern buffer fields which affect it. */
6955
6956 /* Match anchors at newlines. */
6957 rx_comp_buf.newline_anchor = 1;
6958
6959 rx_comp_buf.re_nsub = 0;
6960 rx_comp_buf.start = 0;
6961 rx_comp_buf.se_params = 0;
6962 rx_comp_buf.rx.nodec = 0;
6963 rx_comp_buf.rx.epsnodec = 0;
6964 rx_comp_buf.rx.instruction_table = 0;
6965 rx_comp_buf.rx.nfa_states = 0;
6966 rx_comp_buf.rx.start = 0;
6967 rx_comp_buf.rx.se_list_cmp = posix_se_list_order;
6968 rx_comp_buf.rx.local_cset_size = 256;
6969
6970 ret = rx_compile (s, strlen (s), re_syntax_options, &rx_comp_buf);
6971#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
6972 _alloca (0);
6973#else
6974 alloca (0);
6975#endif
6976
6977 /* Yes, we're discarding `__const__' here. */
6978 return (char *) rx_error_msg[(int) ret];
6979}
6980
6981
6982#ifdef __STDC__
6983int
6984re_exec (__const__ char *s)
6985#else
6986int
6987re_exec (s)
6988 __const__ char *s;
6989#endif
6990{
6991 __const__ int len = strlen (s);
6992 return
6993 0 <= re_search (&rx_comp_buf, s, len, 0, len, (struct re_registers *) 0);
6994}
6995#endif /* not emacs and not _POSIX_SOURCE */
6996
6997
6998
6999
7000/* POSIX.2 functions. Don't define these for Emacs. */
7001
7002#if !defined(emacs)
7003
7004/* regcomp takes a regular expression as a string and compiles it.
7005
7006 PREG is a regex_t *. We do not expect any fields to be initialized,
7007 since POSIX says we shouldn't. Thus, we set
7008
7009 `buffer' to the compiled pattern;
7010 `used' to the length of the compiled pattern;
7011 `syntax' to RE_SYNTAX_POSIX_EXTENDED if the
7012 REG_EXTENDED bit in CFLAGS is set; otherwise, to
7013 RE_SYNTAX_POSIX_BASIC;
7014 `newline_anchor' to REG_NEWLINE being set in CFLAGS;
7015 `fastmap' and `fastmap_accurate' to zero;
7016 `re_nsub' to the number of subexpressions in PATTERN.
7017
7018 PATTERN is the address of the pattern string.
7019
7020 CFLAGS is a series of bits which affect compilation.
7021
7022 If REG_EXTENDED is set, we use POSIX extended syntax; otherwise, we
7023 use POSIX basic syntax.
7024
7025 If REG_NEWLINE is set, then . and [^...] don't match newline.
7026 Also, regexec will try a match beginning after every newline.
7027
7028 If REG_ICASE is set, then we considers upper- and lowercase
7029 versions of letters to be equivalent when matching.
7030
7031 If REG_NOSUB is set, then when PREG is passed to regexec, that
7032 routine will report only success or failure, and nothing about the
7033 registers.
7034
7035 It returns 0 if it succeeds, nonzero if it doesn't. (See regex.h for
7036 the return codes and their meanings.) */
7037
7038
7039#ifdef __STDC__
7040int
7041regcomp (regex_t * preg, __const__ char * pattern, int cflags)
7042#else
7043int
7044regcomp (preg, pattern, cflags)
7045 regex_t * preg;
7046 __const__ char * pattern;
7047 int cflags;
7048#endif
7049{
7050 reg_errcode_t ret;
7051 unsigned syntax
7052 = cflags & REG_EXTENDED ? RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
7053
7054 /* regex_compile will allocate the space for the compiled pattern. */
7055 preg->buffer = 0;
7056 preg->allocated = 0;
7057 preg->fastmap = malloc (256);
7058 if (!preg->fastmap)
7059 return REG_ESPACE;
7060 preg->fastmap_accurate = 0;
7061
7062 if (cflags & REG_ICASE)
7063 {
7064 unsigned i;
7065
7066 preg->translate = (unsigned char *) malloc (256);
7067 if (!preg->translate)
7068 return (int) REG_ESPACE;
7069
7070 /* Map uppercase characters to corresponding lowercase ones. */
7071 for (i = 0; i < CHAR_SET_SIZE; i++)
7072 preg->translate[i] = isupper (i) ? tolower (i) : i;
7073 }
7074 else
7075 preg->translate = 0;
7076
7077 /* If REG_NEWLINE is set, newlines are treated differently. */
7078 if (cflags & REG_NEWLINE)
7079 { /* REG_NEWLINE implies neither . nor [^...] match newline. */
7080 syntax &= ~RE_DOT_NEWLINE;
7081 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
7082 /* It also changes the matching behavior. */
7083 preg->newline_anchor = 1;
7084 }
7085 else
7086 preg->newline_anchor = 0;
7087
7088 preg->no_sub = !!(cflags & REG_NOSUB);
7089
7090 /* POSIX says a null character in the pattern terminates it, so we
7091 can use strlen here in compiling the pattern. */
7092 preg->re_nsub = 0;
7093 preg->start = 0;
7094 preg->se_params = 0;
7095 preg->syntax_parens = 0;
7096 preg->rx.nodec = 0;
7097 preg->rx.epsnodec = 0;
7098 preg->rx.instruction_table = 0;
7099 preg->rx.nfa_states = 0;
7100 preg->rx.local_cset_size = 256;
7101 preg->rx.start = 0;
7102 preg->rx.se_list_cmp = posix_se_list_order;
7103 preg->rx.start_set = 0;
7104 ret = rx_compile (pattern, strlen (pattern), syntax, preg);
7105
7106#ifdef __WIN32__ /* [RPAP - Feb 97: WIN32 Port] */
7107 _alloca (0);
7108#else
7109 alloca (0);
7110#endif
7111
7112 /* POSIX doesn't distinguish between an unmatched open-group and an
7113 unmatched close-group: both are REG_EPAREN. */
7114 if (ret == REG_ERPAREN) ret = REG_EPAREN;
7115
7116 return (int) ret;
7117}
7118
7119
7120/* regexec searches for a given pattern, specified by PREG, in the
7121 string STRING.
7122
7123 If NMATCH is zero or REG_NOSUB was set in the cflags argument to
7124 `regcomp', we ignore PMATCH. Otherwise, we assume PMATCH has at
7125 least NMATCH elements, and we set them to the offsets of the
7126 corresponding matched substrings.
7127
7128 EFLAGS specifies `execution flags' which affect matching: if
7129 REG_NOTBOL is set, then ^ does not match at the beginning of the
7130 string; if REG_NOTEOL is set, then $ does not match at the end.
7131
7132 We return 0 if we find a match and REG_NOMATCH if not. */
7133
7134#ifdef __STDC__
7135int
7136regexec (__const__ regex_t *preg, __const__ char *string,
7137 size_t nmatch, regmatch_t pmatch[],
7138 int eflags)
7139#else
7140int
7141regexec (preg, string, nmatch, pmatch, eflags)
7142 __const__ regex_t *preg;
7143 __const__ char *string;
7144 size_t nmatch;
7145 regmatch_t pmatch[];
7146 int eflags;
7147#endif
7148{
7149 int ret;
7150 struct re_registers regs;
7151 regex_t private_preg;
7152 int len = strlen (string);
7153 boolean want_reg_info = !preg->no_sub && nmatch > 0;
7154
7155 private_preg = *preg;
7156
7157 private_preg.not_bol = !!(eflags & REG_NOTBOL);
7158 private_preg.not_eol = !!(eflags & REG_NOTEOL);
7159
7160 /* The user has told us exactly how many registers to return
7161 * information about, via `nmatch'. We have to pass that on to the
7162 * matching routines.
7163 */
7164 private_preg.regs_allocated = REGS_FIXED;
7165
7166 if (want_reg_info)
7167 {
7168 regs.num_regs = nmatch;
7169 regs.start = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
7170 regs.end = (( regoff_t *) malloc ((nmatch) * sizeof ( regoff_t)));
7171 if (regs.start == 0 || regs.end == 0)
7172 return (int) REG_NOMATCH;
7173 }
7174
7175 /* Perform the searching operation. */
7176 ret = re_search (&private_preg,
7177 string, len,
7178 /* start: */ 0,
7179 /* range: */ len,
7180 want_reg_info ? &regs : (struct re_registers *) 0);
7181
7182 /* Copy the register information to the POSIX structure. */
7183 if (want_reg_info)
7184 {
7185 if (ret >= 0)
7186 {
7187 unsigned r;
7188
7189 for (r = 0; r < nmatch; r++)
7190 {
7191 pmatch[r].rm_so = regs.start[r];
7192 pmatch[r].rm_eo = regs.end[r];
7193 }
7194 }
7195
7196 /* If we needed the temporary register info, free the space now. */
7197 free (regs.start);
7198 free (regs.end);
7199 }
7200
7201 /* We want zero return to mean success, unlike `re_search'. */
7202 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
7203}
7204
7205
7206/* Returns a message corresponding to an error code, ERRCODE, returned
7207 from either regcomp or regexec. */
7208
7209#ifdef __STDC__
7210size_t
7211regerror (int rx_errcode, __const__ regex_t *preg,
7212 char *errbuf, size_t errbuf_size)
7213#else
7214size_t
7215regerror (rx_errcode, preg, errbuf, errbuf_size)
7216 int rx_errcode;
7217 __const__ regex_t *preg;
7218 char *errbuf;
7219 size_t errbuf_size;
7220#endif
7221{
7222 __const__ char *msg
7223 = rx_error_msg[rx_errcode] == 0 ? "Success" : rx_error_msg[rx_errcode];
7224 size_t msg_size = strlen (msg) + 1; /* Includes the 0. */
7225
7226 if (errbuf_size != 0)
7227 {
7228 if (msg_size > errbuf_size)
7229 {
7230 strncpy (errbuf, msg, errbuf_size - 1);
7231 errbuf[errbuf_size - 1] = 0;
7232 }
7233 else
7234 strcpy (errbuf, msg);
7235 }
7236
7237 return msg_size;
7238}
7239
7240
7241/* Free dynamically allocated space used by PREG. */
7242
7243#ifdef __STDC__
7244void
7245regfree (regex_t *preg)
7246#else
7247void
7248regfree (preg)
7249 regex_t *preg;
7250#endif
7251{
7252 if (preg->buffer != 0)
7253 free (preg->buffer);
7254 preg->buffer = 0;
7255 preg->allocated = 0;
7256
7257 if (preg->fastmap != 0)
7258 free (preg->fastmap);
7259 preg->fastmap = 0;
7260 preg->fastmap_accurate = 0;
7261
7262 if (preg->translate != 0)
7263 free (preg->translate);
7264 preg->translate = 0;
7265}
7266
7267#endif /* not emacs */
7268
7269
7270
7271
7272
Note: See TracBrowser for help on using the repository browser.