1 | /* Definitions for data structures and routines for the regular
|
---|
2 | expression library, version 0.12.
|
---|
3 | Copyright (C) 1985,89,90,91,92,93,95,96,97,98 Free Software Foundation, Inc.
|
---|
4 |
|
---|
5 | This file is part of the GNU C Library. Its master source is NOT part of
|
---|
6 | the C library, however. The master source lives in /gd/gnu/lib.
|
---|
7 |
|
---|
8 | The GNU C Library is free software; you can redistribute it and/or
|
---|
9 | modify it under the terms of the GNU Library General Public License as
|
---|
10 | published by the Free Software Foundation; either version 2 of the
|
---|
11 | License, or (at your option) any later version.
|
---|
12 |
|
---|
13 | The GNU C Library is distributed in the hope that it will be useful,
|
---|
14 | but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
15 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
---|
16 | Library General Public License for more details.
|
---|
17 |
|
---|
18 | You should have received a copy of the GNU Library General Public
|
---|
19 | License along with the GNU C Library; see the file LGPL. If not,
|
---|
20 | write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
---|
21 | Boston, MA 02111-1307, USA. */
|
---|
22 | /* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
|
---|
23 | Last change: May 21, 1993 by t^2 */
|
---|
24 | /* modified for Ruby by [email protected] */
|
---|
25 |
|
---|
26 | #ifndef REGEX_H
|
---|
27 | #define REGEX_H
|
---|
28 |
|
---|
29 | /* symbol mangling for ruby */
|
---|
30 | #ifdef RUBY
|
---|
31 | # define re_adjust_startpos ruby_re_adjust_startpos
|
---|
32 | # define re_compile_fastmap ruby_re_compile_fastmap
|
---|
33 | # define re_compile_pattern ruby_re_compile_pattern
|
---|
34 | # define re_copy_registers ruby_re_copy_registers
|
---|
35 | # define re_free_pattern ruby_re_free_pattern
|
---|
36 | # define re_free_registers ruby_re_free_registers
|
---|
37 | # define re_match ruby_re_match
|
---|
38 | # define re_mbcinit ruby_re_mbcinit
|
---|
39 | # define re_search ruby_re_search
|
---|
40 | # define re_set_casetable ruby_re_set_casetable
|
---|
41 | # define register_info_type ruby_register_info_type
|
---|
42 | #endif
|
---|
43 |
|
---|
44 | #include <stddef.h>
|
---|
45 |
|
---|
46 | /* Define number of parens for which we record the beginnings and ends.
|
---|
47 | This affects how much space the `struct re_registers' type takes up. */
|
---|
48 | #ifndef RE_NREGS
|
---|
49 | #define RE_NREGS 10
|
---|
50 | #endif
|
---|
51 |
|
---|
52 | #define BYTEWIDTH 8
|
---|
53 |
|
---|
54 | #define RE_REG_MAX ((1<<BYTEWIDTH)-1)
|
---|
55 |
|
---|
56 | /* Maximum number of duplicates an interval can allow. */
|
---|
57 | #ifndef RE_DUP_MAX
|
---|
58 | #define RE_DUP_MAX ((1 << 15) - 1)
|
---|
59 | #endif
|
---|
60 |
|
---|
61 |
|
---|
62 | /* If this bit is set, then character classes are supported; they are:
|
---|
63 | [:alpha:], [:upper:], [:lower:], [:digit:], [:alnum:], [:xdigit:],
|
---|
64 | [:space:], [:print:], [:punct:], [:graph:], and [:cntrl:].
|
---|
65 | If not set, then character classes are not supported. */
|
---|
66 | #define RE_CHAR_CLASSES (1L << 9)
|
---|
67 |
|
---|
68 | /* match will be done case insensetively */
|
---|
69 | #define RE_OPTION_IGNORECASE (1L)
|
---|
70 | /* perl-style extended pattern available */
|
---|
71 | #define RE_OPTION_EXTENDED (RE_OPTION_IGNORECASE<<1)
|
---|
72 | /* newline will be included for . */
|
---|
73 | #define RE_OPTION_MULTILINE (RE_OPTION_EXTENDED<<1)
|
---|
74 | /* ^ and $ ignore newline */
|
---|
75 | #define RE_OPTION_SINGLELINE (RE_OPTION_MULTILINE<<1)
|
---|
76 | /* search for longest match, in accord with POSIX regexp */
|
---|
77 | #define RE_OPTION_LONGEST (RE_OPTION_SINGLELINE<<1)
|
---|
78 |
|
---|
79 | #define RE_MAY_IGNORECASE (RE_OPTION_LONGEST<<1)
|
---|
80 | #define RE_OPTIMIZE_ANCHOR (RE_MAY_IGNORECASE<<1)
|
---|
81 | #define RE_OPTIMIZE_EXACTN (RE_OPTIMIZE_ANCHOR<<1)
|
---|
82 | #define RE_OPTIMIZE_NO_BM (RE_OPTIMIZE_EXACTN<<1)
|
---|
83 | #define RE_OPTIMIZE_BMATCH (RE_OPTIMIZE_NO_BM<<1)
|
---|
84 |
|
---|
85 | /* For multi-byte char support */
|
---|
86 | #define MBCTYPE_ASCII 0
|
---|
87 | #define MBCTYPE_EUC 1
|
---|
88 | #define MBCTYPE_SJIS 2
|
---|
89 | #define MBCTYPE_UTF8 3
|
---|
90 |
|
---|
91 | extern
|
---|
92 | #if defined _WIN32 && !defined __GNUC__ && !defined RUBY_EXPORT
|
---|
93 | __declspec(dllimport)
|
---|
94 | # endif
|
---|
95 | const unsigned char *re_mbctab;
|
---|
96 | #if defined(__STDC__)
|
---|
97 | void re_mbcinit (int);
|
---|
98 | #else
|
---|
99 | void re_mbcinit ();
|
---|
100 | #endif
|
---|
101 |
|
---|
102 | #undef ismbchar
|
---|
103 | #define ismbchar(c) re_mbctab[(unsigned char)(c)]
|
---|
104 | #define mbclen(c) (re_mbctab[(unsigned char)(c)]+1)
|
---|
105 |
|
---|
106 | /* Structure used in re_match() */
|
---|
107 |
|
---|
108 | typedef union
|
---|
109 | {
|
---|
110 | unsigned char *word;
|
---|
111 | struct {
|
---|
112 | unsigned is_active : 1;
|
---|
113 | unsigned matched_something : 1;
|
---|
114 | } bits;
|
---|
115 | } register_info_type;
|
---|
116 |
|
---|
117 | /* This data structure is used to represent a compiled pattern. */
|
---|
118 |
|
---|
119 | struct re_pattern_buffer
|
---|
120 | {
|
---|
121 | char *buffer; /* Space holding the compiled pattern commands. */
|
---|
122 | int allocated; /* Size of space that `buffer' points to. */
|
---|
123 | int used; /* Length of portion of buffer actually occupied */
|
---|
124 | char *fastmap; /* Pointer to fastmap, if any, or zero if none. */
|
---|
125 | /* re_search uses the fastmap, if there is one,
|
---|
126 | to skip over totally implausible characters. */
|
---|
127 | char *must; /* Pointer to exact pattern which strings should have
|
---|
128 | to be matched. */
|
---|
129 | int *must_skip; /* Pointer to exact pattern skip table for bm_search */
|
---|
130 | long options; /* Flags for options such as extended_pattern. */
|
---|
131 | long re_nsub; /* Number of subexpressions found by the compiler. */
|
---|
132 | char fastmap_accurate;
|
---|
133 | /* Set to zero when a new pattern is stored,
|
---|
134 | set to one when the fastmap is updated from it. */
|
---|
135 | char can_be_null; /* Set to one by compiling fastmap
|
---|
136 | if this pattern might match the null string.
|
---|
137 | It does not necessarily match the null string
|
---|
138 | in that case, but if this is zero, it cannot.
|
---|
139 | 2 as value means can match null string
|
---|
140 | but at end of range or before a character
|
---|
141 | listed in the fastmap. */
|
---|
142 |
|
---|
143 | /* stack & working area for re_match() */
|
---|
144 | unsigned char **regstart;
|
---|
145 | unsigned char **regend;
|
---|
146 | unsigned char **old_regstart;
|
---|
147 | unsigned char **old_regend;
|
---|
148 | register_info_type *reg_info;
|
---|
149 | unsigned char **best_regstart;
|
---|
150 | unsigned char **best_regend;
|
---|
151 | };
|
---|
152 |
|
---|
153 | typedef struct re_pattern_buffer regex_t;
|
---|
154 |
|
---|
155 | /* Structure to store register contents data in.
|
---|
156 |
|
---|
157 | Pass the address of such a structure as an argument to re_match, etc.,
|
---|
158 | if you want this information back.
|
---|
159 |
|
---|
160 | For i from 1 to RE_NREGS - 1, start[i] records the starting index in
|
---|
161 | the string of where the ith subexpression matched, and end[i] records
|
---|
162 | one after the ending index. start[0] and end[0] are analogous, for
|
---|
163 | the entire pattern. */
|
---|
164 |
|
---|
165 | struct re_registers
|
---|
166 | {
|
---|
167 | int allocated;
|
---|
168 | int num_regs;
|
---|
169 | int *beg;
|
---|
170 | int *end;
|
---|
171 | };
|
---|
172 |
|
---|
173 | /* Type for byte offsets within the string. POSIX mandates this. */
|
---|
174 | typedef size_t regoff_t;
|
---|
175 |
|
---|
176 | /* POSIX specification for registers. Aside from the different names than
|
---|
177 | `re_registers', POSIX uses an array of structures, instead of a
|
---|
178 | structure of arrays. */
|
---|
179 | typedef struct
|
---|
180 | {
|
---|
181 | regoff_t rm_so; /* Byte offset from string's start to substring's start. */
|
---|
182 | regoff_t rm_eo; /* Byte offset from string's start to substring's end. */
|
---|
183 | } regmatch_t;
|
---|
184 |
|
---|
185 | #ifdef __STDC__
|
---|
186 |
|
---|
187 | extern char *re_compile_pattern (const char *, int, struct re_pattern_buffer *);
|
---|
188 | void re_free_pattern (struct re_pattern_buffer *);
|
---|
189 | /* Is this really advertised? */
|
---|
190 | extern int re_adjust_startpos (struct re_pattern_buffer *, const char*, int, int, int);
|
---|
191 | extern void re_compile_fastmap (struct re_pattern_buffer *);
|
---|
192 | extern int re_search (struct re_pattern_buffer *, const char*, int, int, int,
|
---|
193 | struct re_registers *);
|
---|
194 | extern int re_match (struct re_pattern_buffer *, const char *, int, int,
|
---|
195 | struct re_registers *);
|
---|
196 | extern void re_set_casetable (const char *table);
|
---|
197 | extern void re_copy_registers (struct re_registers*, struct re_registers*);
|
---|
198 | extern void re_free_registers (struct re_registers*);
|
---|
199 |
|
---|
200 | #ifndef RUBY
|
---|
201 | /* 4.2 bsd compatibility. */
|
---|
202 | extern char *re_comp (const char *);
|
---|
203 | extern int re_exec (const char *);
|
---|
204 | #endif
|
---|
205 |
|
---|
206 | #else /* !__STDC__ */
|
---|
207 |
|
---|
208 | extern char *re_compile_pattern ();
|
---|
209 | void re_free_regexp ();
|
---|
210 | /* Is this really advertised? */
|
---|
211 | extern int re_adjust_startpos ();
|
---|
212 | extern void re_compile_fastmap ();
|
---|
213 | extern int re_search ();
|
---|
214 | extern int re_match ();
|
---|
215 | extern void re_set_casetable ();
|
---|
216 | extern void re_copy_registers ();
|
---|
217 | extern void re_free_registers ();
|
---|
218 |
|
---|
219 | #endif /* __STDC__ */
|
---|
220 |
|
---|
221 | #endif /* !REGEX_H */
|
---|