1 | # scanf for Ruby
|
---|
2 | #
|
---|
3 | # $Revision: 11708 $
|
---|
4 | # $Id: scanf.rb 11708 2007-02-12 23:01:19Z shyouhei $
|
---|
5 | # $Author: shyouhei $
|
---|
6 | # $Date: 2007-02-13 08:01:19 +0900 (Tue, 13 Feb 2007) $
|
---|
7 | #
|
---|
8 | # A product of the Austin Ruby Codefest (Austin, Texas, August 2002)
|
---|
9 |
|
---|
10 | =begin
|
---|
11 |
|
---|
12 | =scanf for Ruby
|
---|
13 |
|
---|
14 | ==Description
|
---|
15 |
|
---|
16 | scanf for Ruby is an implementation of the C function scanf(3),
|
---|
17 | modified as necessary for Ruby compatibility.
|
---|
18 |
|
---|
19 | The methods provided are String#scanf, IO#scanf, and
|
---|
20 | Kernel#scanf. Kernel#scanf is a wrapper around STDIN.scanf. IO#scanf
|
---|
21 | can be used on any IO stream, including file handles and sockets.
|
---|
22 | scanf can be called either with or without a block.
|
---|
23 |
|
---|
24 | scanf for Ruby scans an input string or stream according to a
|
---|
25 | <b>format</b>, as described below ("Conversions"), and returns an
|
---|
26 | array of matches between the format and the input. The format is
|
---|
27 | defined in a string, and is similar (though not identical) to the
|
---|
28 | formats used in Kernel#printf and Kernel#sprintf.
|
---|
29 |
|
---|
30 | The format may contain <b>conversion specifiers</b>, which tell scanf
|
---|
31 | what form (type) each particular matched substring should be converted
|
---|
32 | to (e.g., decimal integer, floating point number, literal string,
|
---|
33 | etc.) The matches and conversions take place from left to right, and
|
---|
34 | the conversions themselves are returned as an array.
|
---|
35 |
|
---|
36 | The format string may also contain characters other than those in the
|
---|
37 | conversion specifiers. White space (blanks, tabs, or newlines) in the
|
---|
38 | format string matches any amount of white space, including none, in
|
---|
39 | the input. Everything else matches only itself.
|
---|
40 |
|
---|
41 | Scanning stops, and scanf returns, when any input character fails to
|
---|
42 | match the specifications in the format string, or when input is
|
---|
43 | exhausted, or when everything in the format string has been
|
---|
44 | matched. All matches found up to the stopping point are returned in
|
---|
45 | the return array (or yielded to the block, if a block was given).
|
---|
46 |
|
---|
47 |
|
---|
48 | ==Basic usage
|
---|
49 |
|
---|
50 | require 'scanf.rb'
|
---|
51 |
|
---|
52 | # String#scanf and IO#scanf take a single argument (a format string)
|
---|
53 | array = aString.scanf("%d%s")
|
---|
54 | array = anIO.scanf("%d%s")
|
---|
55 |
|
---|
56 | # Kernel#scanf reads from STDIN
|
---|
57 | array = scanf("%d%s")
|
---|
58 |
|
---|
59 | ==Block usage
|
---|
60 |
|
---|
61 | When called with a block, scanf keeps scanning the input, cycling back
|
---|
62 | to the beginning of the format string, and yields a new array of
|
---|
63 | conversions to the block every time the format string is matched
|
---|
64 | (including partial matches, but not including complete failures). The
|
---|
65 | actual return value of scanf when called with a block is an array
|
---|
66 | containing the results of all the executions of the block.
|
---|
67 |
|
---|
68 | str = "123 abc 456 def 789 ghi"
|
---|
69 | str.scanf("%d%s") { |num,str| [ num * 2, str.upcase ] }
|
---|
70 | # => [[246, "ABC"], [912, "DEF"], [1578, "GHI"]]
|
---|
71 |
|
---|
72 | ==Conversions
|
---|
73 |
|
---|
74 | The single argument to scanf is a format string, which generally
|
---|
75 | includes one or more conversion specifiers. Conversion specifiers
|
---|
76 | begin with the percent character ('%') and include information about
|
---|
77 | what scanf should next scan for (string, decimal number, single
|
---|
78 | character, etc.).
|
---|
79 |
|
---|
80 | There may be an optional maximum field width, expressed as a decimal
|
---|
81 | integer, between the % and the conversion. If no width is given, a
|
---|
82 | default of `infinity' is used (with the exception of the %c specifier;
|
---|
83 | see below). Otherwise, given a field width of <em>n</em> for a given
|
---|
84 | conversion, at most <em>n</em> characters are scanned in processing
|
---|
85 | that conversion. Before conversion begins, most conversions skip
|
---|
86 | white space in the input string; this white space is not counted
|
---|
87 | against the field width.
|
---|
88 |
|
---|
89 | The following conversions are available. (See the files EXAMPLES
|
---|
90 | and <tt>tests/scanftests.rb</tt> for examples.)
|
---|
91 |
|
---|
92 | [%]
|
---|
93 | Matches a literal `%'. That is, `%%' in the format string matches a
|
---|
94 | single input `%' character. No conversion is done, and the resulting
|
---|
95 | '%' is not included in the return array.
|
---|
96 |
|
---|
97 | [d]
|
---|
98 | Matches an optionally signed decimal integer.
|
---|
99 |
|
---|
100 | [u]
|
---|
101 | Same as d.
|
---|
102 |
|
---|
103 | [i]
|
---|
104 | Matches an optionally signed integer. The integer is read in base
|
---|
105 | 16 if it begins with `0x' or `0X', in base 8 if it begins with `0',
|
---|
106 | and in base 10 other- wise. Only characters that correspond to the
|
---|
107 | base are recognized.
|
---|
108 |
|
---|
109 | [o]
|
---|
110 | Matches an optionally signed octal integer.
|
---|
111 |
|
---|
112 | [x,X]
|
---|
113 | Matches an optionally signed hexadecimal integer,
|
---|
114 |
|
---|
115 | [f,g,e,E]
|
---|
116 | Matches an optionally signed floating-point number.
|
---|
117 |
|
---|
118 | [s]
|
---|
119 | Matches a sequence of non-white-space character. The input string stops at
|
---|
120 | white space or at the maximum field width, whichever occurs first.
|
---|
121 |
|
---|
122 | [c]
|
---|
123 | Matches a single character, or a sequence of <em>n</em> characters if a
|
---|
124 | field width of <em>n</em> is specified. The usual skip of leading white
|
---|
125 | space is suppressed. To skip white space first, use an explicit space in
|
---|
126 | the format.
|
---|
127 |
|
---|
128 | [<tt>[</tt>]
|
---|
129 | Matches a nonempty sequence of characters from the specified set
|
---|
130 | of accepted characters. The usual skip of leading white space is
|
---|
131 | suppressed. This bracketed sub-expression is interpreted exactly like a
|
---|
132 | character class in a Ruby regular expression. (In fact, it is placed as-is
|
---|
133 | in a regular expression.) The matching against the input string ends with
|
---|
134 | the appearance of a character not in (or, with a circumflex, in) the set,
|
---|
135 | or when the field width runs out, whichever comes first.
|
---|
136 |
|
---|
137 | ===Assignment suppression
|
---|
138 |
|
---|
139 | To require that a particular match occur, but without including the result
|
---|
140 | in the return array, place the <b>assignment suppression flag</b>, which is
|
---|
141 | the star character ('*'), immediately after the leading '%' of a format
|
---|
142 | specifier (just before the field width, if any).
|
---|
143 |
|
---|
144 | ==Examples
|
---|
145 |
|
---|
146 | See the files <tt>EXAMPLES</tt> and <tt>tests/scanftests.rb</tt>.
|
---|
147 |
|
---|
148 | ==scanf for Ruby compared with scanf in C
|
---|
149 |
|
---|
150 | scanf for Ruby is based on the C function scanf(3), but with modifications,
|
---|
151 | dictated mainly by the underlying differences between the languages.
|
---|
152 |
|
---|
153 | ===Unimplemented flags and specifiers
|
---|
154 |
|
---|
155 | * The only flag implemented in scanf for Ruby is '<tt>*</tt>' (ignore
|
---|
156 | upcoming conversion). Many of the flags available in C versions of scanf(4)
|
---|
157 | have to do with the type of upcoming pointer arguments, and are literally
|
---|
158 | meaningless in Ruby.
|
---|
159 |
|
---|
160 | * The <tt>n</tt> specifier (store number of characters consumed so far in
|
---|
161 | next pointer) is not implemented.
|
---|
162 |
|
---|
163 | * The <tt>p</tt> specifier (match a pointer value) is not implemented.
|
---|
164 |
|
---|
165 | ===Altered specifiers
|
---|
166 |
|
---|
167 | [o,u,x,X]
|
---|
168 | In scanf for Ruby, all of these specifiers scan for an optionally signed
|
---|
169 | integer, rather than for an unsigned integer like their C counterparts.
|
---|
170 |
|
---|
171 | ===Return values
|
---|
172 |
|
---|
173 | scanf for Ruby returns an array of successful conversions, whereas
|
---|
174 | scanf(3) returns the number of conversions successfully
|
---|
175 | completed. (See below for more details on scanf for Ruby's return
|
---|
176 | values.)
|
---|
177 |
|
---|
178 | ==Return values
|
---|
179 |
|
---|
180 | Without a block, scanf returns an array containing all the conversions
|
---|
181 | it has found. If none are found, scanf will return an empty array. An
|
---|
182 | unsuccesful match is never ignored, but rather always signals the end
|
---|
183 | of the scanning operation. If the first unsuccessful match takes place
|
---|
184 | after one or more successful matches have already taken place, the
|
---|
185 | returned array will contain the results of those successful matches.
|
---|
186 |
|
---|
187 | With a block scanf returns a 'map'-like array of transformations from
|
---|
188 | the block -- that is, an array reflecting what the block did with each
|
---|
189 | yielded result from the iterative scanf operation. (See "Block
|
---|
190 | usage", above.)
|
---|
191 |
|
---|
192 | ==Test suite
|
---|
193 |
|
---|
194 | scanf for Ruby includes a suite of unit tests (requiring the
|
---|
195 | <tt>TestUnit</tt> package), which can be run with the command <tt>ruby
|
---|
196 | tests/scanftests.rb</tt> or the command <tt>make test</tt>.
|
---|
197 |
|
---|
198 | ==Current limitations and bugs
|
---|
199 |
|
---|
200 | When using IO#scanf under Windows, make sure you open your files in
|
---|
201 | binary mode:
|
---|
202 |
|
---|
203 | File.open("filename", "rb")
|
---|
204 |
|
---|
205 | so that scanf can keep track of characters correctly.
|
---|
206 |
|
---|
207 | Support for character classes is reasonably complete (since it
|
---|
208 | essentially piggy-backs on Ruby's regular expression handling of
|
---|
209 | character classes), but users are advised that character class testing
|
---|
210 | has not been exhaustive, and that they should exercise some caution
|
---|
211 | in using any of the more complex and/or arcane character class
|
---|
212 | idioms.
|
---|
213 |
|
---|
214 |
|
---|
215 | ==Technical notes
|
---|
216 |
|
---|
217 | ===Rationale behind scanf for Ruby
|
---|
218 |
|
---|
219 | The impetus for a scanf implementation in Ruby comes chiefly from the fact
|
---|
220 | that existing pattern matching operations, such as Regexp#match and
|
---|
221 | String#scan, return all results as strings, which have to be converted to
|
---|
222 | integers or floats explicitly in cases where what's ultimately wanted are
|
---|
223 | integer or float values.
|
---|
224 |
|
---|
225 | ===Design of scanf for Ruby
|
---|
226 |
|
---|
227 | scanf for Ruby is essentially a <format string>-to-<regular
|
---|
228 | expression> converter.
|
---|
229 |
|
---|
230 | When scanf is called, a FormatString object is generated from the
|
---|
231 | format string ("%d%s...") argument. The FormatString object breaks the
|
---|
232 | format string down into atoms ("%d", "%5f", "blah", etc.), and from
|
---|
233 | each atom it creates a FormatSpecifier object, which it
|
---|
234 | saves.
|
---|
235 |
|
---|
236 | Each FormatSpecifier has a regular expression fragment and a "handler"
|
---|
237 | associated with it. For example, the regular expression fragment
|
---|
238 | associated with the format "%d" is "([-+]?\d+)", and the handler
|
---|
239 | associated with it is a wrapper around String#to_i. scanf itself calls
|
---|
240 | FormatString#match, passing in the input string. FormatString#match
|
---|
241 | iterates through its FormatSpecifiers; for each one, it matches the
|
---|
242 | corresponding regular expression fragment against the string. If
|
---|
243 | there's a match, it sends the matched string to the handler associated
|
---|
244 | with the FormatSpecifier.
|
---|
245 |
|
---|
246 | Thus, to follow up the "%d" example: if "123" occurs in the input
|
---|
247 | string when a FormatSpecifier consisting of "%d" is reached, the "123"
|
---|
248 | will be matched against "([-+]?\d+)", and the matched string will be
|
---|
249 | rendered into an integer by a call to to_i.
|
---|
250 |
|
---|
251 | The rendered match is then saved to an accumulator array, and the
|
---|
252 | input string is reduced to the post-match substring. Thus the string
|
---|
253 | is "eaten" from the left as the FormatSpecifiers are applied in
|
---|
254 | sequence. (This is done to a duplicate string; the original string is
|
---|
255 | not altered.)
|
---|
256 |
|
---|
257 | As soon as a regular expression fragment fails to match the string, or
|
---|
258 | when the FormatString object runs out of FormatSpecifiers, scanning
|
---|
259 | stops and results accumulated so far are returned in an array.
|
---|
260 |
|
---|
261 | ==License and copyright
|
---|
262 |
|
---|
263 | Copyright:: (c) 2002-2003 David Alan Black
|
---|
264 | License:: Distributed on the same licensing terms as Ruby itself
|
---|
265 |
|
---|
266 | ==Warranty disclaimer
|
---|
267 |
|
---|
268 | This software is provided "as is" and without any express or implied
|
---|
269 | warranties, including, without limitation, the implied warranties of
|
---|
270 | merchantibility and fitness for a particular purpose.
|
---|
271 |
|
---|
272 | ==Credits and acknowledgements
|
---|
273 |
|
---|
274 | scanf for Ruby was developed as the major activity of the Austin
|
---|
275 | Ruby Codefest (Austin, Texas, August 2002).
|
---|
276 |
|
---|
277 | Principal author:: David Alan Black (mailto:[email protected])
|
---|
278 | Co-author:: Hal Fulton (mailto:[email protected])
|
---|
279 | Project contributors:: Nolan Darilek, Jason Johnston
|
---|
280 |
|
---|
281 | Thanks to Hal Fulton for hosting the Codefest.
|
---|
282 |
|
---|
283 | Thanks to Matz for suggestions about the class design.
|
---|
284 |
|
---|
285 | Thanks to Gavin Sinclair for some feedback on the documentation.
|
---|
286 |
|
---|
287 | The text for parts of this document, especially the Description and
|
---|
288 | Conversions sections, above, were adapted from the Linux Programmer's
|
---|
289 | Manual manpage for scanf(3), dated 1995-11-01.
|
---|
290 |
|
---|
291 | ==Bugs and bug reports
|
---|
292 |
|
---|
293 | scanf for Ruby is based on something of an amalgam of C scanf
|
---|
294 | implementations and documentation, rather than on a single canonical
|
---|
295 | description. Suggestions for features and behaviors which appear in
|
---|
296 | other scanfs, and would be meaningful in Ruby, are welcome, as are
|
---|
297 | reports of suspicious behaviors and/or bugs. (Please see "Credits and
|
---|
298 | acknowledgements", above, for email addresses.)
|
---|
299 |
|
---|
300 | =end
|
---|
301 |
|
---|
302 | module Scanf
|
---|
303 |
|
---|
304 | class FormatSpecifier
|
---|
305 |
|
---|
306 | attr_reader :re_string, :matched_string, :conversion, :matched
|
---|
307 |
|
---|
308 | private
|
---|
309 |
|
---|
310 | def skip; /^\s*%\*/.match(@spec_string); end
|
---|
311 |
|
---|
312 | def extract_float(s); s.to_f if s &&! skip; end
|
---|
313 | def extract_decimal(s); s.to_i if s &&! skip; end
|
---|
314 | def extract_hex(s); s.hex if s &&! skip; end
|
---|
315 | def extract_octal(s); s.oct if s &&! skip; end
|
---|
316 | def extract_integer(s); Integer(s) if s &&! skip; end
|
---|
317 | def extract_plain(s); s unless skip; end
|
---|
318 |
|
---|
319 | def nil_proc(s); nil; end
|
---|
320 |
|
---|
321 | public
|
---|
322 |
|
---|
323 | def to_s
|
---|
324 | @spec_string
|
---|
325 | end
|
---|
326 |
|
---|
327 | def count_space?
|
---|
328 | /(?:\A|\S)%\*?\d*c|\[/.match(@spec_string)
|
---|
329 | end
|
---|
330 |
|
---|
331 | def initialize(str)
|
---|
332 | @spec_string = str
|
---|
333 | h = '[A-Fa-f0-9]'
|
---|
334 |
|
---|
335 | @re_string, @handler =
|
---|
336 | case @spec_string
|
---|
337 |
|
---|
338 | # %[[:...:]]
|
---|
339 | when /%\*?(\[\[:[a-z]+:\]\])/
|
---|
340 | [ "(#{$1}+)", :extract_plain ]
|
---|
341 |
|
---|
342 | # %5[[:...:]]
|
---|
343 | when /%\*?(\d+)(\[\[:[a-z]+:\]\])/
|
---|
344 | [ "(#{$2}{1,#{$1}})", :extract_plain ]
|
---|
345 |
|
---|
346 | # %[...]
|
---|
347 | when /%\*?\[([^\]]*)\]/
|
---|
348 | yes = $1
|
---|
349 | if /^\^/.match(yes) then no = yes[1..-1] else no = '^' + yes end
|
---|
350 | [ "([#{yes}]+)(?=[#{no}]|\\z)", :extract_plain ]
|
---|
351 |
|
---|
352 | # %5[...]
|
---|
353 | when /%\*?(\d+)\[([^\]]*)\]/
|
---|
354 | yes = $2
|
---|
355 | w = $1
|
---|
356 | [ "([#{yes}]{1,#{w}})", :extract_plain ]
|
---|
357 |
|
---|
358 | # %i
|
---|
359 | when /%\*?i/
|
---|
360 | [ "([-+]?(?:(?:0[0-7]+)|(?:0[Xx]#{h}+)|(?:[1-9]\\d+)))", :extract_integer ]
|
---|
361 |
|
---|
362 | # %5i
|
---|
363 | when /%\*?(\d+)i/
|
---|
364 | n = $1.to_i
|
---|
365 | s = "("
|
---|
366 | if n > 1 then s += "[1-9]\\d{1,#{n-1}}|" end
|
---|
367 | if n > 1 then s += "0[0-7]{1,#{n-1}}|" end
|
---|
368 | if n > 2 then s += "[-+]0[0-7]{1,#{n-2}}|" end
|
---|
369 | if n > 2 then s += "[-+][1-9]\\d{1,#{n-2}}|" end
|
---|
370 | if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end
|
---|
371 | if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end
|
---|
372 | s += "\\d"
|
---|
373 | s += ")"
|
---|
374 | [ s, :extract_integer ]
|
---|
375 |
|
---|
376 | # %d, %u
|
---|
377 | when /%\*?[du]/
|
---|
378 | [ '([-+]?\d+)', :extract_decimal ]
|
---|
379 |
|
---|
380 | # %5d, %5u
|
---|
381 | when /%\*?(\d+)[du]/
|
---|
382 | n = $1.to_i
|
---|
383 | s = "("
|
---|
384 | if n > 1 then s += "[-+]\\d{1,#{n-1}}|" end
|
---|
385 | s += "\\d{1,#{$1}})"
|
---|
386 | [ s, :extract_decimal ]
|
---|
387 |
|
---|
388 | # %x
|
---|
389 | when /%\*?[Xx]/
|
---|
390 | [ "([-+]?(?:0[Xx])?#{h}+)", :extract_hex ]
|
---|
391 |
|
---|
392 | # %5x
|
---|
393 | when /%\*?(\d+)[Xx]/
|
---|
394 | n = $1.to_i
|
---|
395 | s = "("
|
---|
396 | if n > 3 then s += "[-+]0[Xx]#{h}{1,#{n-3}}|" end
|
---|
397 | if n > 2 then s += "0[Xx]#{h}{1,#{n-2}}|" end
|
---|
398 | if n > 1 then s += "[-+]#{h}{1,#{n-1}}|" end
|
---|
399 | s += "#{h}{1,#{n}}"
|
---|
400 | s += ")"
|
---|
401 | [ s, :extract_hex ]
|
---|
402 |
|
---|
403 | # %o
|
---|
404 | when /%\*?o/
|
---|
405 | [ '([-+]?[0-7]+)', :extract_octal ]
|
---|
406 |
|
---|
407 | # %5o
|
---|
408 | when /%\*?(\d+)o/
|
---|
409 | [ "([-+][0-7]{1,#{$1.to_i-1}}|[0-7]{1,#{$1}})", :extract_octal ]
|
---|
410 |
|
---|
411 | # %f
|
---|
412 | when /%\*?f/
|
---|
413 | [ '([-+]?((\d+(?>(?=[^\d.]|$)))|(\d*(\.(\d*([eE][-+]?\d+)?)))))', :extract_float ]
|
---|
414 |
|
---|
415 | # %5f
|
---|
416 | when /%\*?(\d+)f/
|
---|
417 | [ "(\\S{1,#{$1}})", :extract_float ]
|
---|
418 |
|
---|
419 | # %5s
|
---|
420 | when /%\*?(\d+)s/
|
---|
421 | [ "(\\S{1,#{$1}})", :extract_plain ]
|
---|
422 |
|
---|
423 | # %s
|
---|
424 | when /%\*?s/
|
---|
425 | [ '(\S+)', :extract_plain ]
|
---|
426 |
|
---|
427 | # %c
|
---|
428 | when /\s%\*?c/
|
---|
429 | [ "\\s*(.)", :extract_plain ]
|
---|
430 |
|
---|
431 | # %c
|
---|
432 | when /%\*?c/
|
---|
433 | [ "(.)", :extract_plain ]
|
---|
434 |
|
---|
435 | # %5c (whitespace issues are handled by the count_*_space? methods)
|
---|
436 | when /%\*?(\d+)c/
|
---|
437 | [ "(.{1,#{$1}})", :extract_plain ]
|
---|
438 |
|
---|
439 | # %%
|
---|
440 | when /%%/
|
---|
441 | [ '(\s*%)', :nil_proc ]
|
---|
442 |
|
---|
443 | # literal characters
|
---|
444 | else
|
---|
445 | [ "(#{Regexp.escape(@spec_string)})", :nil_proc ]
|
---|
446 | end
|
---|
447 |
|
---|
448 | @re_string = '\A' + @re_string
|
---|
449 | end
|
---|
450 |
|
---|
451 | def to_re
|
---|
452 | Regexp.new(@re_string,Regexp::MULTILINE)
|
---|
453 | end
|
---|
454 |
|
---|
455 | def match(str)
|
---|
456 | @matched = false
|
---|
457 | s = str.dup
|
---|
458 | s.sub!(/\A\s+/,'') unless count_space?
|
---|
459 | res = to_re.match(s)
|
---|
460 | if res
|
---|
461 | @conversion = send(@handler, res[1])
|
---|
462 | @matched_string = @conversion.to_s
|
---|
463 | @matched = true
|
---|
464 | end
|
---|
465 | res
|
---|
466 | end
|
---|
467 |
|
---|
468 | def letter
|
---|
469 | /%\*?\d*([a-z\[])/.match(@spec_string).to_a[1]
|
---|
470 | end
|
---|
471 |
|
---|
472 | def width
|
---|
473 | w = /%\*?(\d+)/.match(@spec_string).to_a[1]
|
---|
474 | w && w.to_i
|
---|
475 | end
|
---|
476 |
|
---|
477 | def mid_match?
|
---|
478 | return false unless @matched
|
---|
479 | cc_no_width = letter == '[' &&! width
|
---|
480 | c_or_cc_width = (letter == 'c' || letter == '[') && width
|
---|
481 | width_left = c_or_cc_width && (matched_string.size < width)
|
---|
482 |
|
---|
483 | return width_left || cc_no_width
|
---|
484 | end
|
---|
485 |
|
---|
486 | end
|
---|
487 |
|
---|
488 | class FormatString
|
---|
489 |
|
---|
490 | attr_reader :string_left, :last_spec_tried,
|
---|
491 | :last_match_tried, :matched_count, :space
|
---|
492 |
|
---|
493 | SPECIFIERS = 'diuXxofeEgsc'
|
---|
494 | REGEX = /
|
---|
495 | # possible space, followed by...
|
---|
496 | (?:\s*
|
---|
497 | # percent sign, followed by...
|
---|
498 | %
|
---|
499 | # another percent sign, or...
|
---|
500 | (?:%|
|
---|
501 | # optional assignment suppression flag
|
---|
502 | \*?
|
---|
503 | # optional maximum field width
|
---|
504 | \d*
|
---|
505 | # named character class, ...
|
---|
506 | (?:\[\[:\w+:\]\]|
|
---|
507 | # traditional character class, or...
|
---|
508 | \[[^\]]*\]|
|
---|
509 | # specifier letter.
|
---|
510 | [#{SPECIFIERS}])))|
|
---|
511 | # or miscellaneous characters
|
---|
512 | [^%\s]+/ix
|
---|
513 |
|
---|
514 | def initialize(str)
|
---|
515 | @specs = []
|
---|
516 | @i = 1
|
---|
517 | s = str.to_s
|
---|
518 | return unless /\S/.match(s)
|
---|
519 | @space = true if /\s\z/.match(s)
|
---|
520 | @specs.replace s.scan(REGEX).map {|spec| FormatSpecifier.new(spec) }
|
---|
521 | end
|
---|
522 |
|
---|
523 | def to_s
|
---|
524 | @specs.join('')
|
---|
525 | end
|
---|
526 |
|
---|
527 | def prune(n=matched_count)
|
---|
528 | n.times { @specs.shift }
|
---|
529 | end
|
---|
530 |
|
---|
531 | def spec_count
|
---|
532 | @specs.size
|
---|
533 | end
|
---|
534 |
|
---|
535 | def last_spec
|
---|
536 | @i == spec_count - 1
|
---|
537 | end
|
---|
538 |
|
---|
539 | def match(str)
|
---|
540 | accum = []
|
---|
541 | @string_left = str
|
---|
542 | @matched_count = 0
|
---|
543 |
|
---|
544 | @specs.each_with_index do |spec,@i|
|
---|
545 | @last_spec_tried = spec
|
---|
546 | @last_match_tried = spec.match(@string_left)
|
---|
547 | break unless @last_match_tried
|
---|
548 | @matched_count += 1
|
---|
549 |
|
---|
550 | accum << spec.conversion
|
---|
551 |
|
---|
552 | @string_left = @last_match_tried.post_match
|
---|
553 | break if @string_left.empty?
|
---|
554 | end
|
---|
555 | return accum.compact
|
---|
556 | end
|
---|
557 | end
|
---|
558 | end
|
---|
559 |
|
---|
560 | class IO
|
---|
561 |
|
---|
562 | # The trick here is doing a match where you grab one *line*
|
---|
563 | # of input at a time. The linebreak may or may not occur
|
---|
564 | # at the boundary where the string matches a format specifier.
|
---|
565 | # And if it does, some rule about whitespace may or may not
|
---|
566 | # be in effect...
|
---|
567 | #
|
---|
568 | # That's why this is much more elaborate than the string
|
---|
569 | # version.
|
---|
570 | #
|
---|
571 | # For each line:
|
---|
572 | # Match succeeds (non-emptily)
|
---|
573 | # and the last attempted spec/string sub-match succeeded:
|
---|
574 | #
|
---|
575 | # could the last spec keep matching?
|
---|
576 | # yes: save interim results and continue (next line)
|
---|
577 | #
|
---|
578 | # The last attempted spec/string did not match:
|
---|
579 | #
|
---|
580 | # are we on the next-to-last spec in the string?
|
---|
581 | # yes:
|
---|
582 | # is fmt_string.string_left all spaces?
|
---|
583 | # yes: does current spec care about input space?
|
---|
584 | # yes: fatal failure
|
---|
585 | # no: save interim results and continue
|
---|
586 | # no: continue [this state could be analyzed further]
|
---|
587 | #
|
---|
588 | #
|
---|
589 |
|
---|
590 | def scanf(str,&b)
|
---|
591 | return block_scanf(str,&b) if b
|
---|
592 | return [] unless str.size > 0
|
---|
593 |
|
---|
594 | start_position = pos rescue 0
|
---|
595 | matched_so_far = 0
|
---|
596 | source_buffer = ""
|
---|
597 | result_buffer = []
|
---|
598 | final_result = []
|
---|
599 |
|
---|
600 | fstr = Scanf::FormatString.new(str)
|
---|
601 |
|
---|
602 | loop do
|
---|
603 | if eof || (tty? &&! fstr.match(source_buffer))
|
---|
604 | final_result.concat(result_buffer)
|
---|
605 | break
|
---|
606 | end
|
---|
607 |
|
---|
608 | source_buffer << gets
|
---|
609 |
|
---|
610 | current_match = fstr.match(source_buffer)
|
---|
611 |
|
---|
612 | spec = fstr.last_spec_tried
|
---|
613 |
|
---|
614 | if spec.matched
|
---|
615 | if spec.mid_match?
|
---|
616 | result_buffer.replace(current_match)
|
---|
617 | next
|
---|
618 | end
|
---|
619 |
|
---|
620 | elsif (fstr.matched_count == fstr.spec_count - 1)
|
---|
621 | if /\A\s*\z/.match(fstr.string_left)
|
---|
622 | break if spec.count_space?
|
---|
623 | result_buffer.replace(current_match)
|
---|
624 | next
|
---|
625 | end
|
---|
626 | end
|
---|
627 |
|
---|
628 | final_result.concat(current_match)
|
---|
629 |
|
---|
630 | matched_so_far += source_buffer.size
|
---|
631 | source_buffer.replace(fstr.string_left)
|
---|
632 | matched_so_far -= source_buffer.size
|
---|
633 | break if fstr.last_spec
|
---|
634 | fstr.prune
|
---|
635 | end
|
---|
636 | seek(start_position + matched_so_far, IO::SEEK_SET) rescue Errno::ESPIPE
|
---|
637 | soak_up_spaces if fstr.last_spec && fstr.space
|
---|
638 |
|
---|
639 | return final_result
|
---|
640 | end
|
---|
641 |
|
---|
642 | private
|
---|
643 |
|
---|
644 | def soak_up_spaces
|
---|
645 | c = getc
|
---|
646 | ungetc(c) if c
|
---|
647 | until eof ||! c || /\S/.match(c.chr)
|
---|
648 | c = getc
|
---|
649 | end
|
---|
650 | ungetc(c) if (c && /\S/.match(c.chr))
|
---|
651 | end
|
---|
652 |
|
---|
653 | def block_scanf(str)
|
---|
654 | final = []
|
---|
655 | # Sub-ideal, since another FS gets created in scanf.
|
---|
656 | # But used here to determine the number of specifiers.
|
---|
657 | fstr = Scanf::FormatString.new(str)
|
---|
658 | last_spec = fstr.last_spec
|
---|
659 | begin
|
---|
660 | current = scanf(str)
|
---|
661 | break if current.empty?
|
---|
662 | final.push(yield(current))
|
---|
663 | end until eof || fstr.last_spec_tried == last_spec
|
---|
664 | return final
|
---|
665 | end
|
---|
666 | end
|
---|
667 |
|
---|
668 | class String
|
---|
669 |
|
---|
670 | def scanf(fstr,&b)
|
---|
671 | if b
|
---|
672 | block_scanf(fstr,&b)
|
---|
673 | else
|
---|
674 | fs =
|
---|
675 | if fstr.is_a? Scanf::FormatString
|
---|
676 | fstr
|
---|
677 | else
|
---|
678 | Scanf::FormatString.new(fstr)
|
---|
679 | end
|
---|
680 | fs.match(self)
|
---|
681 | end
|
---|
682 | end
|
---|
683 |
|
---|
684 | def block_scanf(fstr,&b)
|
---|
685 | fs = Scanf::FormatString.new(fstr)
|
---|
686 | str = self.dup
|
---|
687 | final = []
|
---|
688 | begin
|
---|
689 | current = str.scanf(fs)
|
---|
690 | final.push(yield(current)) unless current.empty?
|
---|
691 | str = fs.string_left
|
---|
692 | end until current.empty? || str.empty?
|
---|
693 | return final
|
---|
694 | end
|
---|
695 | end
|
---|
696 |
|
---|
697 | module Kernel
|
---|
698 | private
|
---|
699 | def scanf(fs,&b)
|
---|
700 | STDIN.scanf(fs,&b)
|
---|
701 | end
|
---|
702 | end
|
---|