source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/jcode.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago

Video extension to Greenstone

File size: 4.3 KB
Line 
1# jcode.rb - ruby code to handle japanese (EUC/SJIS) string
2
3if $VERBOSE && $KCODE == "NONE"
4 warn "Warning: $KCODE is NONE."
5end
6
7$vsave, $VERBOSE = $VERBOSE, false
8class String
9 warn "feel free for some warnings:\n" if $VERBOSE
10
11 def _regex_quote(str)
12 str.gsub(/(\\[\[\]\-\\])|\\(.)|([\[\]\\])/) do
13 $1 || $2 || '\\' + $3
14 end
15 end
16 private :_regex_quote
17
18 PATTERN_SJIS = '[\x81-\x9f\xe0-\xef][\x40-\x7e\x80-\xfc]'
19 PATTERN_EUC = '[\xa1-\xfe][\xa1-\xfe]'
20 PATTERN_UTF8 = '[\xc0-\xdf][\x80-\xbf]|[\xe0-\xef][\x80-\xbf][\x80-\xbf]'
21
22 RE_SJIS = Regexp.new(PATTERN_SJIS, 0, 'n')
23 RE_EUC = Regexp.new(PATTERN_EUC, 0, 'n')
24 RE_UTF8 = Regexp.new(PATTERN_UTF8, 0, 'n')
25
26 SUCC = {}
27 SUCC['s'] = Hash.new(1)
28 for i in 0 .. 0x3f
29 SUCC['s'][i.chr] = 0x40 - i
30 end
31 SUCC['s']["\x7e"] = 0x80 - 0x7e
32 SUCC['s']["\xfd"] = 0x100 - 0xfd
33 SUCC['s']["\xfe"] = 0x100 - 0xfe
34 SUCC['s']["\xff"] = 0x100 - 0xff
35 SUCC['e'] = Hash.new(1)
36 for i in 0 .. 0xa0
37 SUCC['e'][i.chr] = 0xa1 - i
38 end
39 SUCC['e']["\xfe"] = 2
40 SUCC['u'] = Hash.new(1)
41 for i in 0 .. 0x7f
42 SUCC['u'][i.chr] = 0x80 - i
43 end
44 SUCC['u']["\xbf"] = 0x100 - 0xbf
45
46 def mbchar?
47 case $KCODE[0]
48 when ?s, ?S
49 self =~ RE_SJIS
50 when ?e, ?E
51 self =~ RE_EUC
52 when ?u, ?U
53 self =~ RE_UTF8
54 else
55 nil
56 end
57 end
58
59 def end_regexp
60 case $KCODE[0]
61 when ?s, ?S
62 /#{PATTERN_SJIS}$/on
63 when ?e, ?E
64 /#{PATTERN_EUC}$/on
65 when ?u, ?U
66 /#{PATTERN_UTF8}$/on
67 else
68 /.$/on
69 end
70 end
71
72 alias original_succ! succ!
73 private :original_succ!
74
75 alias original_succ succ
76 private :original_succ
77
78 def succ!
79 reg = end_regexp
80 if $KCODE != 'NONE' && self =~ reg
81 succ_table = SUCC[$KCODE[0,1].downcase]
82 begin
83 self[-1] += succ_table[self[-1]]
84 self[-2] += 1 if self[-1] == 0
85 end while self !~ reg
86 self
87 else
88 original_succ!
89 end
90 end
91
92 def succ
93 str = self.dup
94 str.succ! or str
95 end
96
97 private
98
99 def _expand_ch str
100 a = []
101 str.scan(/(?:\\(.)|([^\\]))-(?:\\(.)|([^\\]))|(?:\\(.)|(.))/m) do
102 from = $1 || $2
103 to = $3 || $4
104 one = $5 || $6
105 if one
106 a.push one
107 elsif from.length != to.length
108 next
109 elsif from.length == 1
110 from[0].upto(to[0]) { |c| a.push c.chr }
111 else
112 from.upto(to) { |c| a.push c }
113 end
114 end
115 a
116 end
117
118 def expand_ch_hash from, to
119 h = {}
120 afrom = _expand_ch(from)
121 ato = _expand_ch(to)
122 afrom.each_with_index do |x,i| h[x] = ato[i] || ato[-1] end
123 h
124 end
125
126 HashCache = {}
127 TrPatternCache = {}
128 DeletePatternCache = {}
129 SqueezePatternCache = {}
130
131 public
132
133 def tr!(from, to)
134 return nil if from == ""
135 return self.delete!(from) if to == ""
136
137 pattern = TrPatternCache[from] ||= /[#{_regex_quote(from)}]/
138 if from[0] == ?^
139 last = /.$/.match(to)[0]
140 self.gsub!(pattern, last)
141 else
142 h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
143 self.gsub!(pattern) do |c| h[c] end
144 end
145 end
146
147 def tr(from, to)
148 (str = self.dup).tr!(from, to) or str
149 end
150
151 def delete!(del)
152 return nil if del == ""
153 self.gsub!(DeletePatternCache[del] ||= /[#{_regex_quote(del)}]+/, '')
154 end
155
156 def delete(del)
157 (str = self.dup).delete!(del) or str
158 end
159
160 def squeeze!(del=nil)
161 return nil if del == ""
162 pattern =
163 if del
164 SqueezePatternCache[del] ||= /([#{_regex_quote(del)}])\1+/
165 else
166 /(.|\n)\1+/
167 end
168 self.gsub!(pattern, '\1')
169 end
170
171 def squeeze(del=nil)
172 (str = self.dup).squeeze!(del) or str
173 end
174
175 def tr_s!(from, to)
176 return self.delete!(from) if to.length == 0
177
178 pattern = SqueezePatternCache[from] ||= /([#{_regex_quote(from)}])\1*/
179 if from[0] == ?^
180 last = /.$/.match(to)[0]
181 self.gsub!(pattern, last)
182 else
183 h = HashCache[from + "1-0" + to] ||= expand_ch_hash(from, to)
184 self.gsub!(pattern) do h[$1] end
185 end
186 end
187
188 def tr_s(from, to)
189 (str = self.dup).tr_s!(from,to) or str
190 end
191
192 def chop!
193 self.gsub!(/(?:.|\r?\n)\z/, '')
194 end
195
196 def chop
197 (str = self.dup).chop! or str
198 end
199
200 def jlength
201 self.gsub(/[^\Wa-zA-Z_\d]/, ' ').length
202 end
203 alias jsize jlength
204
205 def jcount(str)
206 self.delete("^#{str}").jlength
207 end
208
209 def each_char
210 if block_given?
211 scan(/./m) do |x|
212 yield x
213 end
214 else
215 scan(/./m)
216 end
217 end
218
219end
220$VERBOSE = $vsave
Note: See TracBrowser for help on using the repository browser.