source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/csv.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago

Video extension to Greenstone

File size: 24.5 KB
Line 
1# CSV -- module for generating/parsing CSV data.
2# Copyright (C) 2000-2004 NAKAMURA, Hiroshi <[email protected]>.
3
4# $Id: csv.rb 11708 2007-02-12 23:01:19Z shyouhei $
5
6# This program is copyrighted free software by NAKAMURA, Hiroshi. You can
7# redistribute it and/or modify it under the same terms of Ruby's license;
8# either the dual license version in 2003, or any later version.
9
10
11class CSV
12 class IllegalFormatError < RuntimeError; end
13
14 # deprecated
15 class Cell < String
16 def initialize(data = "", is_null = false)
17 super(is_null ? "" : data)
18 end
19
20 def data
21 to_s
22 end
23 end
24
25 # deprecated
26 class Row < Array
27 end
28
29 # Open a CSV formatted file for reading or writing.
30 #
31 # For reading.
32 #
33 # EXAMPLE 1
34 # CSV.open('csvfile.csv', 'r') do |row|
35 # p row
36 # end
37 #
38 # EXAMPLE 2
39 # reader = CSV.open('csvfile.csv', 'r')
40 # row1 = reader.shift
41 # row2 = reader.shift
42 # if row2.empty?
43 # p 'row2 not find.'
44 # end
45 # reader.close
46 #
47 # ARGS
48 # filename: filename to parse.
49 # col_sep: Column separator. ?, by default. If you want to separate
50 # fields with semicolon, give ?; here.
51 # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
52 # want to separate records with \r, give ?\r here.
53 #
54 # RETURNS
55 # reader instance. To get parse result, see CSV::Reader#each.
56 #
57 #
58 # For writing.
59 #
60 # EXAMPLE 1
61 # CSV.open('csvfile.csv', 'w') do |writer|
62 # writer << ['r1c1', 'r1c2']
63 # writer << ['r2c1', 'r2c2']
64 # writer << [nil, nil]
65 # end
66 #
67 # EXAMPLE 2
68 # writer = CSV.open('csvfile.csv', 'w')
69 # writer << ['r1c1', 'r1c2'] << ['r2c1', 'r2c2'] << [nil, nil]
70 # writer.close
71 #
72 # ARGS
73 # filename: filename to generate.
74 # col_sep: Column separator. ?, by default. If you want to separate
75 # fields with semicolon, give ?; here.
76 # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
77 # want to separate records with \r, give ?\r here.
78 #
79 # RETURNS
80 # writer instance. See CSV::Writer#<< and CSV::Writer#add_row to know how
81 # to generate CSV string.
82 #
83 def CSV.open(path, mode, fs = nil, rs = nil, &block)
84 if mode == 'r' or mode == 'rb'
85 open_reader(path, mode, fs, rs, &block)
86 elsif mode == 'w' or mode == 'wb'
87 open_writer(path, mode, fs, rs, &block)
88 else
89 raise ArgumentError.new("'mode' must be 'r', 'rb', 'w', or 'wb'")
90 end
91 end
92
93 def CSV.foreach(path, rs = nil, &block)
94 open_reader(path, 'r', ',', rs, &block)
95 end
96
97 def CSV.read(path, length = nil, offset = nil)
98 CSV.parse(IO.read(path, length, offset))
99 end
100
101 def CSV.readlines(path, rs = nil)
102 reader = open_reader(path, 'r', ',', rs)
103 begin
104 reader.collect { |row| row }
105 ensure
106 reader.close
107 end
108 end
109
110 def CSV.generate(path, fs = nil, rs = nil, &block)
111 open_writer(path, 'w', fs, rs, &block)
112 end
113
114 # Parse lines from given string or stream. Return rows as an Array of Arrays.
115 def CSV.parse(str_or_readable, fs = nil, rs = nil, &block)
116 if File.exist?(str_or_readable)
117 STDERR.puts("CSV.parse(filename) is deprecated." +
118 " Use CSV.open(filename, 'r') instead.")
119 return open_reader(str_or_readable, 'r', fs, rs, &block)
120 end
121 if block
122 CSV::Reader.parse(str_or_readable, fs, rs) do |row|
123 yield(row)
124 end
125 nil
126 else
127 CSV::Reader.create(str_or_readable, fs, rs).collect { |row| row }
128 end
129 end
130
131 # Parse a line from given string. Bear in mind it parses ONE LINE. Rest of
132 # the string is ignored for example "a,b\r\nc,d" => ['a', 'b'] and the
133 # second line 'c,d' is ignored.
134 #
135 # If you don't know whether a target string to parse is exactly 1 line or
136 # not, use CSV.parse_row instead of this method.
137 def CSV.parse_line(src, fs = nil, rs = nil)
138 fs ||= ','
139 if fs.is_a?(Fixnum)
140 fs = fs.chr
141 end
142 if !rs.nil? and rs.is_a?(Fixnum)
143 rs = rs.chr
144 end
145 idx = 0
146 res_type = :DT_COLSEP
147 row = []
148 begin
149 while res_type == :DT_COLSEP
150 res_type, idx, cell = parse_body(src, idx, fs, rs)
151 row << cell
152 end
153 rescue IllegalFormatError
154 return []
155 end
156 row
157 end
158
159 # Create a line from cells. each cell is stringified by to_s.
160 def CSV.generate_line(row, fs = nil, rs = nil)
161 if row.size == 0
162 return ''
163 end
164 fs ||= ','
165 if fs.is_a?(Fixnum)
166 fs = fs.chr
167 end
168 if !rs.nil? and rs.is_a?(Fixnum)
169 rs = rs.chr
170 end
171 res_type = :DT_COLSEP
172 result_str = ''
173 idx = 0
174 while true
175 generate_body(row[idx], result_str, fs, rs)
176 idx += 1
177 if (idx == row.size)
178 break
179 end
180 generate_separator(:DT_COLSEP, result_str, fs, rs)
181 end
182 result_str
183 end
184
185 # Parse a line from string. Consider using CSV.parse_line instead.
186 # To parse lines in CSV string, see EXAMPLE below.
187 #
188 # EXAMPLE
189 # src = "a,b\r\nc,d\r\ne,f"
190 # idx = 0
191 # begin
192 # parsed = []
193 # parsed_cells, idx = CSV.parse_row(src, idx, parsed)
194 # puts "Parsed #{ parsed_cells } cells."
195 # p parsed
196 # end while parsed_cells > 0
197 #
198 # ARGS
199 # src: a CSV data to be parsed. Must respond '[](idx)'.
200 # src[](idx) must return a char. (Not a string such as 'a', but 97).
201 # src[](idx_out_of_bounds) must return nil. A String satisfies this
202 # requirement.
203 # idx: index of parsing location of 'src'. 0 origin.
204 # out_dev: buffer for parsed cells. Must respond '<<(aString)'.
205 # col_sep: Column separator. ?, by default. If you want to separate
206 # fields with semicolon, give ?; here.
207 # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
208 # want to separate records with \r, give ?\r here.
209 #
210 # RETURNS
211 # parsed_cells: num of parsed cells.
212 # idx: index of next parsing location of 'src'.
213 #
214 def CSV.parse_row(src, idx, out_dev, fs = nil, rs = nil)
215 fs ||= ','
216 if fs.is_a?(Fixnum)
217 fs = fs.chr
218 end
219 if !rs.nil? and rs.is_a?(Fixnum)
220 rs = rs.chr
221 end
222 idx_backup = idx
223 parsed_cells = 0
224 res_type = :DT_COLSEP
225 begin
226 while res_type != :DT_ROWSEP
227 res_type, idx, cell = parse_body(src, idx, fs, rs)
228 if res_type == :DT_EOS
229 if idx == idx_backup #((parsed_cells == 0) and cell.nil?)
230 return 0, 0
231 end
232 res_type = :DT_ROWSEP
233 end
234 parsed_cells += 1
235 out_dev << cell
236 end
237 rescue IllegalFormatError
238 return 0, 0
239 end
240 return parsed_cells, idx
241 end
242
243 # Convert a line from cells data to string. Consider using CSV.generate_line
244 # instead. To generate multi-row CSV string, see EXAMPLE below.
245 #
246 # EXAMPLE
247 # row1 = ['a', 'b']
248 # row2 = ['c', 'd']
249 # row3 = ['e', 'f']
250 # src = [row1, row2, row3]
251 # buf = ''
252 # src.each do |row|
253 # parsed_cells = CSV.generate_row(row, 2, buf)
254 # puts "Created #{ parsed_cells } cells."
255 # end
256 # p buf
257 #
258 # ARGS
259 # src: an Array of String to be converted to CSV string. Must respond to
260 # 'size' and '[](idx)'. src[idx] must return String.
261 # cells: num of cells in a line.
262 # out_dev: buffer for generated CSV string. Must respond to '<<(string)'.
263 # col_sep: Column separator. ?, by default. If you want to separate
264 # fields with semicolon, give ?; here.
265 # row_sep: Row separator. nil by default. nil means "\r\n or \n". If you
266 # want to separate records with \r, give ?\r here.
267 #
268 # RETURNS
269 # parsed_cells: num of converted cells.
270 #
271 def CSV.generate_row(src, cells, out_dev, fs = nil, rs = nil)
272 fs ||= ','
273 if fs.is_a?(Fixnum)
274 fs = fs.chr
275 end
276 if !rs.nil? and rs.is_a?(Fixnum)
277 rs = rs.chr
278 end
279 src_size = src.size
280 if (src_size == 0)
281 if cells == 0
282 generate_separator(:DT_ROWSEP, out_dev, fs, rs)
283 end
284 return 0
285 end
286 res_type = :DT_COLSEP
287 parsed_cells = 0
288 generate_body(src[parsed_cells], out_dev, fs, rs)
289 parsed_cells += 1
290 while ((parsed_cells < cells) and (parsed_cells != src_size))
291 generate_separator(:DT_COLSEP, out_dev, fs, rs)
292 generate_body(src[parsed_cells], out_dev, fs, rs)
293 parsed_cells += 1
294 end
295 if (parsed_cells == cells)
296 generate_separator(:DT_ROWSEP, out_dev, fs, rs)
297 else
298 generate_separator(:DT_COLSEP, out_dev, fs, rs)
299 end
300 parsed_cells
301 end
302
303 # Private class methods.
304 class << self
305 private
306
307 def open_reader(path, mode, fs, rs, &block)
308 file = File.open(path, mode)
309 if block
310 begin
311 CSV::Reader.parse(file, fs, rs) do |row|
312 yield(row)
313 end
314 ensure
315 file.close
316 end
317 nil
318 else
319 reader = CSV::Reader.create(file, fs, rs)
320 reader.close_on_terminate
321 reader
322 end
323 end
324
325 def open_writer(path, mode, fs, rs, &block)
326 file = File.open(path, mode)
327 if block
328 begin
329 CSV::Writer.generate(file, fs, rs) do |writer|
330 yield(writer)
331 end
332 ensure
333 file.close
334 end
335 nil
336 else
337 writer = CSV::Writer.create(file, fs, rs)
338 writer.close_on_terminate
339 writer
340 end
341 end
342
343 def parse_body(src, idx, fs, rs)
344 fs_str = fs
345 fs_size = fs_str.size
346 rs_str = rs || "\n"
347 rs_size = rs_str.size
348 fs_idx = rs_idx = 0
349 cell = Cell.new
350 state = :ST_START
351 quoted = cr = false
352 c = nil
353 last_idx = idx
354 while c = src[idx]
355 unless quoted
356 fschar = (c == fs_str[fs_idx])
357 rschar = (c == rs_str[rs_idx])
358 # simple 1 char backtrack
359 if !fschar and c == fs_str[0]
360 fs_idx = 0
361 fschar = true
362 if state == :ST_START
363 state = :ST_DATA
364 elsif state == :ST_QUOTE
365 raise IllegalFormatError
366 end
367 end
368 if !rschar and c == rs_str[0]
369 rs_idx = 0
370 rschar = true
371 if state == :ST_START
372 state = :ST_DATA
373 elsif state == :ST_QUOTE
374 raise IllegalFormatError
375 end
376 end
377 end
378 if c == ?"
379 fs_idx = rs_idx = 0
380 if cr
381 raise IllegalFormatError
382 end
383 cell << src[last_idx, (idx - last_idx)]
384 last_idx = idx
385 if state == :ST_DATA
386 if quoted
387 last_idx += 1
388 quoted = false
389 state = :ST_QUOTE
390 else
391 raise IllegalFormatError
392 end
393 elsif state == :ST_QUOTE
394 cell << c.chr
395 last_idx += 1
396 quoted = true
397 state = :ST_DATA
398 else # :ST_START
399 quoted = true
400 last_idx += 1
401 state = :ST_DATA
402 end
403 elsif fschar or rschar
404 if fschar
405 fs_idx += 1
406 end
407 if rschar
408 rs_idx += 1
409 end
410 sep = nil
411 if fs_idx == fs_size
412 if state == :ST_START and rs_idx > 0 and fs_idx < rs_idx
413 state = :ST_DATA
414 end
415 cell << src[last_idx, (idx - last_idx - (fs_size - 1))]
416 last_idx = idx
417 fs_idx = rs_idx = 0
418 if cr
419 raise IllegalFormatError
420 end
421 sep = :DT_COLSEP
422 elsif rs_idx == rs_size
423 if state == :ST_START and fs_idx > 0 and rs_idx < fs_idx
424 state = :ST_DATA
425 end
426 if !(rs.nil? and cr)
427 cell << src[last_idx, (idx - last_idx - (rs_size - 1))]
428 last_idx = idx
429 end
430 fs_idx = rs_idx = 0
431 sep = :DT_ROWSEP
432 end
433 if sep
434 if state == :ST_DATA
435 return sep, idx + 1, cell;
436 elsif state == :ST_QUOTE
437 return sep, idx + 1, cell;
438 else # :ST_START
439 return sep, idx + 1, nil
440 end
441 end
442 elsif rs.nil? and c == ?\r
443 # special \r treatment for backward compatibility
444 fs_idx = rs_idx = 0
445 if cr
446 raise IllegalFormatError
447 end
448 cell << src[last_idx, (idx - last_idx)]
449 last_idx = idx
450 if quoted
451 state = :ST_DATA
452 else
453 cr = true
454 end
455 else
456 fs_idx = rs_idx = 0
457 if state == :ST_DATA or state == :ST_START
458 if cr
459 raise IllegalFormatError
460 end
461 state = :ST_DATA
462 else # :ST_QUOTE
463 raise IllegalFormatError
464 end
465 end
466 idx += 1
467 end
468 if state == :ST_START
469 if fs_idx > 0 or rs_idx > 0
470 state = :ST_DATA
471 else
472 return :DT_EOS, idx, nil
473 end
474 elsif quoted
475 raise IllegalFormatError
476 elsif cr
477 raise IllegalFormatError
478 end
479 cell << src[last_idx, (idx - last_idx)]
480 last_idx = idx
481 return :DT_EOS, idx, cell
482 end
483
484 def generate_body(cell, out_dev, fs, rs)
485 if cell.nil?
486 # empty
487 else
488 cell = cell.to_s
489 row_data = cell.dup
490 if (row_data.gsub!('"', '""') or
491 row_data.index(fs) or
492 (rs and row_data.index(rs)) or
493 (/[\r\n]/ =~ row_data) or
494 (cell.empty?))
495 out_dev << '"' << row_data << '"'
496 else
497 out_dev << row_data
498 end
499 end
500 end
501
502 def generate_separator(type, out_dev, fs, rs)
503 case type
504 when :DT_COLSEP
505 out_dev << fs
506 when :DT_ROWSEP
507 out_dev << (rs || "\n")
508 end
509 end
510 end
511
512
513 # CSV formatted string/stream reader.
514 #
515 # EXAMPLE
516 # read CSV lines untill the first column is 'stop'.
517 #
518 # CSV::Reader.parse(File.open('bigdata', 'rb')) do |row|
519 # p row
520 # break if !row[0].is_null && row[0].data == 'stop'
521 # end
522 #
523 class Reader
524 include Enumerable
525
526 # Parse CSV data and get lines. Given block is called for each parsed row.
527 # Block value is always nil. Rows are not cached for performance reason.
528 def Reader.parse(str_or_readable, fs = ',', rs = nil, &block)
529 reader = Reader.create(str_or_readable, fs, rs)
530 if block
531 reader.each do |row|
532 yield(row)
533 end
534 reader.close
535 nil
536 else
537 reader
538 end
539 end
540
541 # Returns reader instance.
542 def Reader.create(str_or_readable, fs = ',', rs = nil)
543 case str_or_readable
544 when IO
545 IOReader.new(str_or_readable, fs, rs)
546 when String
547 StringReader.new(str_or_readable, fs, rs)
548 else
549 IOReader.new(str_or_readable, fs, rs)
550 end
551 end
552
553 def each
554 while true
555 row = []
556 parsed_cells = get_row(row)
557 if parsed_cells == 0
558 break
559 end
560 yield(row)
561 end
562 nil
563 end
564
565 def shift
566 row = []
567 parsed_cells = get_row(row)
568 row
569 end
570
571 def close
572 terminate
573 end
574
575 private
576
577 def initialize(dev)
578 raise RuntimeError.new('Do not instanciate this class directly.')
579 end
580
581 def get_row(row)
582 raise NotImplementedError.new('Method get_row must be defined in a derived class.')
583 end
584
585 def terminate
586 # Define if needed.
587 end
588 end
589
590
591 class StringReader < Reader
592 def initialize(string, fs = ',', rs = nil)
593 @fs = fs
594 @rs = rs
595 @dev = string
596 @idx = 0
597 if @dev[0, 3] == "\xef\xbb\xbf"
598 @idx += 3
599 end
600 end
601
602 private
603
604 def get_row(row)
605 parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
606 if parsed_cells == 0 and next_idx == 0 and @idx != @dev.size
607 raise IllegalFormatError.new
608 end
609 @idx = next_idx
610 parsed_cells
611 end
612 end
613
614
615 class IOReader < Reader
616 def initialize(io, fs = ',', rs = nil)
617 @io = io
618 @fs = fs
619 @rs = rs
620 @dev = CSV::IOBuf.new(@io)
621 @idx = 0
622 if @dev[0] == 0xef and @dev[1] == 0xbb and @dev[2] == 0xbf
623 @idx += 3
624 end
625 @close_on_terminate = false
626 end
627
628 # Tell this reader to close the IO when terminated (Triggered by invoking
629 # CSV::IOReader#close).
630 def close_on_terminate
631 @close_on_terminate = true
632 end
633
634 private
635
636 def get_row(row)
637 parsed_cells, next_idx = CSV.parse_row(@dev, @idx, row, @fs, @rs)
638 if parsed_cells == 0 and next_idx == 0 and [email protected]_eos?
639 raise IllegalFormatError.new
640 end
641 dropped = @dev.drop(next_idx)
642 @idx = next_idx - dropped
643 parsed_cells
644 end
645
646 def terminate
647 if @close_on_terminate
648 @io.close
649 end
650
651 if @dev
652 @dev.close
653 end
654 end
655 end
656
657
658 # CSV formatted string/stream writer.
659 #
660 # EXAMPLE
661 # Write rows to 'csvout' file.
662 #
663 # outfile = File.open('csvout', 'wb')
664 # CSV::Writer.generate(outfile) do |csv|
665 # csv << ['c1', nil, '', '"', "\r\n", 'c2']
666 # ...
667 # end
668 #
669 # outfile.close
670 #
671 class Writer
672 # Given block is called with the writer instance. str_or_writable must
673 # handle '<<(string)'.
674 def Writer.generate(str_or_writable, fs = ',', rs = nil, &block)
675 writer = Writer.create(str_or_writable, fs, rs)
676 if block
677 yield(writer)
678 writer.close
679 nil
680 else
681 writer
682 end
683 end
684
685 # str_or_writable must handle '<<(string)'.
686 def Writer.create(str_or_writable, fs = ',', rs = nil)
687 BasicWriter.new(str_or_writable, fs, rs)
688 end
689
690 # dump CSV stream to the device. argument must be an Array of String.
691 def <<(row)
692 CSV.generate_row(row, row.size, @dev, @fs, @rs)
693 self
694 end
695 alias add_row <<
696
697 def close
698 terminate
699 end
700
701 private
702
703 def initialize(dev)
704 raise RuntimeError.new('Do not instanciate this class directly.')
705 end
706
707 def terminate
708 # Define if needed.
709 end
710 end
711
712
713 class BasicWriter < Writer
714 def initialize(str_or_writable, fs = ',', rs = nil)
715 @fs = fs
716 @rs = rs
717 @dev = str_or_writable
718 @close_on_terminate = false
719 end
720
721 # Tell this writer to close the IO when terminated (Triggered by invoking
722 # CSV::BasicWriter#close).
723 def close_on_terminate
724 @close_on_terminate = true
725 end
726
727 private
728
729 def terminate
730 if @close_on_terminate
731 @dev.close
732 end
733 end
734 end
735
736private
737
738 # Buffered stream.
739 #
740 # EXAMPLE 1 -- an IO.
741 # class MyBuf < StreamBuf
742 # # Do initialize myself before a super class. Super class might call my
743 # # method 'read'. (Could be awful for C++ user. :-)
744 # def initialize(s)
745 # @s = s
746 # super()
747 # end
748 #
749 # # define my own 'read' method.
750 # # CAUTION: Returning nil means EnfOfStream.
751 # def read(size)
752 # @s.read(size)
753 # end
754 #
755 # # release buffers. in Ruby which has GC, you do not have to call this...
756 # def terminate
757 # @s = nil
758 # super()
759 # end
760 # end
761 #
762 # buf = MyBuf.new(STDIN)
763 # my_str = ''
764 # p buf[0, 0] # => '' (null string)
765 # p buf[0] # => 97 (char code of 'a')
766 # p buf[0, 1] # => 'a'
767 # my_str = buf[0, 5]
768 # p my_str # => 'abcde' (5 chars)
769 # p buf[0, 6] # => "abcde\n" (6 chars)
770 # p buf[0, 7] # => "abcde\n" (6 chars)
771 # p buf.drop(3) # => 3 (dropped chars)
772 # p buf.get(0, 2) # => 'de' (2 chars)
773 # p buf.is_eos? # => false (is not EOS here)
774 # p buf.drop(5) # => 3 (dropped chars)
775 # p buf.is_eos? # => true (is EOS here)
776 # p buf[0] # => nil (is EOS here)
777 #
778 # EXAMPLE 2 -- String.
779 # This is a conceptual example. No pros with this.
780 #
781 # class StrBuf < StreamBuf
782 # def initialize(s)
783 # @str = s
784 # @idx = 0
785 # super()
786 # end
787 #
788 # def read(size)
789 # str = @str[@idx, size]
790 # @idx += str.size
791 # str
792 # end
793 # end
794 #
795 class StreamBuf
796 # get a char or a partial string from the stream.
797 # idx: index of a string to specify a start point of a string to get.
798 # unlike String instance, idx < 0 returns nil.
799 # n: size of a string to get.
800 # returns char at idx if n == nil.
801 # returns a partial string, from idx to (idx + n) if n != nil. at EOF,
802 # the string size could not equal to arg n.
803 def [](idx, n = nil)
804 if idx < 0
805 return nil
806 end
807 if (idx_is_eos?(idx))
808 if n and (@offset + idx == buf_size(@cur_buf))
809 # Like a String, 'abc'[4, 1] returns nil and
810 # 'abc'[3, 1] returns '' not nil.
811 return ''
812 else
813 return nil
814 end
815 end
816 my_buf = @cur_buf
817 my_offset = @offset
818 next_idx = idx
819 while (my_offset + next_idx >= buf_size(my_buf))
820 if (my_buf == @buf_tail_idx)
821 unless add_buf
822 break
823 end
824 end
825 next_idx = my_offset + next_idx - buf_size(my_buf)
826 my_buf += 1
827 my_offset = 0
828 end
829 loc = my_offset + next_idx
830 if !n
831 return @buf_list[my_buf][loc] # Fixnum of char code.
832 elsif (loc + n - 1 < buf_size(my_buf))
833 return @buf_list[my_buf][loc, n] # String.
834 else # should do loop insted of (tail) recursive call...
835 res = @buf_list[my_buf][loc, BufSize]
836 size_added = buf_size(my_buf) - loc
837 if size_added > 0
838 idx += size_added
839 n -= size_added
840 ret = self[idx, n]
841 if ret
842 res << ret
843 end
844 end
845 return res
846 end
847 end
848 alias get []
849
850 # drop a string from the stream.
851 # returns dropped size. at EOF, dropped size might not equals to arg n.
852 # Once you drop the head of the stream, access to the dropped part via []
853 # or get returns nil.
854 def drop(n)
855 if is_eos?
856 return 0
857 end
858 size_dropped = 0
859 while (n > 0)
860 if !@is_eos or (@cur_buf != @buf_tail_idx)
861 if (@offset + n < buf_size(@cur_buf))
862 size_dropped += n
863 @offset += n
864 n = 0
865 else
866 size = buf_size(@cur_buf) - @offset
867 size_dropped += size
868 n -= size
869 @offset = 0
870 unless rel_buf
871 unless add_buf
872 break
873 end
874 @cur_buf = @buf_tail_idx
875 end
876 end
877 end
878 end
879 size_dropped
880 end
881
882 def is_eos?
883 return idx_is_eos?(0)
884 end
885
886 # WARN: Do not instantiate this class directly. Define your own class
887 # which derives this class and define 'read' instance method.
888 def initialize
889 @buf_list = []
890 @cur_buf = @buf_tail_idx = -1
891 @offset = 0
892 @is_eos = false
893 add_buf
894 @cur_buf = @buf_tail_idx
895 end
896
897 protected
898
899 def terminate
900 while (rel_buf); end
901 end
902
903 # protected method 'read' must be defined in derived classes.
904 # CAUTION: Returning a string which size is not equal to 'size' means
905 # EnfOfStream. When it is not at EOS, you must block the callee, try to
906 # read and return the sized string.
907 def read(size) # raise EOFError
908 raise NotImplementedError.new('Method read must be defined in a derived class.')
909 end
910
911 private
912
913 def buf_size(idx)
914 @buf_list[idx].size
915 end
916
917 def add_buf
918 if @is_eos
919 return false
920 end
921 begin
922 str_read = read(BufSize)
923 rescue EOFError
924 str_read = nil
925 rescue
926 terminate
927 raise
928 end
929 if str_read.nil?
930 @is_eos = true
931 @buf_list.push('')
932 @buf_tail_idx += 1
933 false
934 else
935 @buf_list.push(str_read)
936 @buf_tail_idx += 1
937 true
938 end
939 end
940
941 def rel_buf
942 if (@cur_buf < 0)
943 return false
944 end
945 @buf_list[@cur_buf] = nil
946 if (@cur_buf == @buf_tail_idx)
947 @cur_buf = -1
948 return false
949 else
950 @cur_buf += 1
951 return true
952 end
953 end
954
955 def idx_is_eos?(idx)
956 (@is_eos and ((@cur_buf < 0) or (@cur_buf == @buf_tail_idx)))
957 end
958
959 BufSize = 1024 * 8
960 end
961
962 # Buffered IO.
963 #
964 # EXAMPLE
965 # # File 'bigdata' could be a giga-byte size one!
966 # buf = CSV::IOBuf.new(File.open('bigdata', 'rb'))
967 # CSV::Reader.new(buf).each do |row|
968 # p row
969 # break if row[0].data == 'admin'
970 # end
971 #
972 class IOBuf < StreamBuf
973 def initialize(s)
974 @s = s
975 super()
976 end
977
978 def close
979 terminate
980 end
981
982 private
983
984 def read(size)
985 @s.read(size)
986 end
987
988 def terminate
989 super()
990 end
991 end
992end
Note: See TracBrowser for help on using the repository browser.