source: extensions/gsdl-video/trunk/installed/cmdline/lib/ruby/1.8/kconv.rb@ 18425

Last change on this file since 18425 was 18425, checked in by davidb, 15 years ago

Video extension to Greenstone

File size: 8.1 KB
Line 
1#
2# kconv.rb - Kanji Converter.
3#
4# $Id: kconv.rb 11708 2007-02-12 23:01:19Z shyouhei $
5#
6# ----
7#
8# kconv.rb implements the Kconv class for Kanji Converter. Additionally,
9# some methods in String classes are added to allow easy conversion.
10#
11
12require 'nkf'
13
14#
15# Kanji Converter for Ruby.
16#
17module Kconv
18 #
19 # Public Constants
20 #
21
22 #Constant of Encoding
23
24 # Auto-Detect
25 AUTO = NKF::AUTO
26 # ISO-2022-JP
27 JIS = NKF::JIS
28 # EUC-JP
29 EUC = NKF::EUC
30 # Shift_JIS
31 SJIS = NKF::SJIS
32 # BINARY
33 BINARY = NKF::BINARY
34 # NOCONV
35 NOCONV = NKF::NOCONV
36 # ASCII
37 ASCII = NKF::ASCII
38 # UTF-8
39 UTF8 = NKF::UTF8
40 # UTF-16
41 UTF16 = NKF::UTF16
42 # UTF-32
43 UTF32 = NKF::UTF32
44 # UNKNOWN
45 UNKNOWN = NKF::UNKNOWN
46
47 #
48 # Private Constants
49 #
50
51 # Revision of kconv.rb
52 REVISION = %q$Revision: 11708 $
53
54 #Regexp of Encoding
55
56 # Regexp of Shift_JIS string (private constant)
57 RegexpShiftjis = /\A(?:
58 [\x00-\x7f\xa1-\xdf] |
59 [\x81-\x9f\xe0-\xfc][\x40-\x7e\x80-\xfc]
60 )*\z/nx
61
62 # Regexp of EUC-JP string (private constant)
63 RegexpEucjp = /\A(?:
64 [\x00-\x7f] |
65 \x8e [\xa1-\xdf] |
66 \x8f [\xa1-\xfe] [\xa1-\xfe] |
67 [\xa1-\xfe] [\xa1-\xfe]
68 )*\z/nx
69
70 # Regexp of UTF-8 string (private constant)
71 RegexpUtf8 = /\A(?:
72 [\x00-\x7f] |
73 [\xc2-\xdf] [\x80-\xbf] |
74 \xe0 [\xa0-\xbf] [\x80-\xbf] |
75 [\xe1-\xef] [\x80-\xbf] [\x80-\xbf] |
76 \xf0 [\x90-\xbf] [\x80-\xbf] [\x80-\xbf] |
77 [\xf1-\xf3] [\x80-\xbf] [\x80-\xbf] [\x80-\xbf] |
78 \xf4 [\x80-\x8f] [\x80-\xbf] [\x80-\xbf]
79 )*\z/nx
80
81 #
82 # Public Methods
83 #
84
85 # call-seq:
86 # Kconv.kconv(str, out_code, in_code = Kconv::AUTO)
87 #
88 # Convert <code>str</code> to out_code.
89 # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
90 #
91 # *Note*
92 # This method decode MIME encoded string and
93 # convert halfwidth katakana to fullwidth katakana.
94 # If you don't want to decode them, use NKF.nkf.
95 def kconv(str, out_code, in_code = AUTO)
96 opt = '-'
97 case in_code
98 when ::NKF::JIS
99 opt << 'J'
100 when ::NKF::EUC
101 opt << 'E'
102 when ::NKF::SJIS
103 opt << 'S'
104 when ::NKF::UTF8
105 opt << 'W'
106 when ::NKF::UTF16
107 opt << 'W16'
108 end
109
110 case out_code
111 when ::NKF::JIS
112 opt << 'j'
113 when ::NKF::EUC
114 opt << 'e'
115 when ::NKF::SJIS
116 opt << 's'
117 when ::NKF::UTF8
118 opt << 'w'
119 when ::NKF::UTF16
120 opt << 'w16'
121 when ::NKF::NOCONV
122 return str
123 end
124
125 opt = '' if opt == '-'
126
127 ::NKF::nkf(opt, str)
128 end
129 module_function :kconv
130
131 #
132 # Encode to
133 #
134
135 # call-seq:
136 # Kconv.tojis(str) -> string
137 #
138 # Convert <code>str</code> to ISO-2022-JP
139 #
140 # *Note*
141 # This method decode MIME encoded string and
142 # convert halfwidth katakana to fullwidth katakana.
143 # If you don't want it, use NKF.nkf('-jxm0', str).
144 def tojis(str)
145 ::NKF::nkf('-jm', str)
146 end
147 module_function :tojis
148
149 # call-seq:
150 # Kconv.toeuc(str) -> string
151 #
152 # Convert <code>str</code> to EUC-JP
153 #
154 # *Note*
155 # This method decode MIME encoded string and
156 # convert halfwidth katakana to fullwidth katakana.
157 # If you don't want it, use NKF.nkf('-exm0', str).
158 def toeuc(str)
159 ::NKF::nkf('-em', str)
160 end
161 module_function :toeuc
162
163 # call-seq:
164 # Kconv.tosjis(str) -> string
165 #
166 # Convert <code>str</code> to Shift_JIS
167 #
168 # *Note*
169 # This method decode MIME encoded string and
170 # convert halfwidth katakana to fullwidth katakana.
171 # If you don't want it, use NKF.nkf('-sxm0', str).
172 def tosjis(str)
173 ::NKF::nkf('-sm', str)
174 end
175 module_function :tosjis
176
177 # call-seq:
178 # Kconv.toutf8(str) -> string
179 #
180 # Convert <code>str</code> to UTF-8
181 #
182 # *Note*
183 # This method decode MIME encoded string and
184 # convert halfwidth katakana to fullwidth katakana.
185 # If you don't want it, use NKF.nkf('-wxm0', str).
186 def toutf8(str)
187 ::NKF::nkf('-wm', str)
188 end
189 module_function :toutf8
190
191 # call-seq:
192 # Kconv.toutf16(str) -> string
193 #
194 # Convert <code>str</code> to UTF-16
195 #
196 # *Note*
197 # This method decode MIME encoded string and
198 # convert halfwidth katakana to fullwidth katakana.
199 # If you don't want it, use NKF.nkf('-w16xm0', str).
200 def toutf16(str)
201 ::NKF::nkf('-w16m', str)
202 end
203 module_function :toutf16
204
205 #
206 # guess
207 #
208
209 # call-seq:
210 # Kconv.guess(str) -> integer
211 #
212 # Guess input encoding by NKF.guess2
213 def guess(str)
214 ::NKF::guess(str)
215 end
216 module_function :guess
217
218 # call-seq:
219 # Kconv.guess_old(str) -> integer
220 #
221 # Guess input encoding by NKF.guess1
222 def guess_old(str)
223 ::NKF::guess1(str)
224 end
225 module_function :guess_old
226
227 #
228 # isEncoding
229 #
230
231 # call-seq:
232 # Kconv.iseuc(str) -> obj or nil
233 #
234 # Returns whether input encoding is EUC-JP or not.
235 #
236 # *Note* don't expect this return value is MatchData.
237 def iseuc(str)
238 RegexpEucjp.match( str )
239 end
240 module_function :iseuc
241
242 # call-seq:
243 # Kconv.issjis(str) -> obj or nil
244 #
245 # Returns whether input encoding is Shift_JIS or not.
246 #
247 # *Note* don't expect this return value is MatchData.
248 def issjis(str)
249 RegexpShiftjis.match( str )
250 end
251 module_function :issjis
252
253 # call-seq:
254 # Kconv.isutf8(str) -> obj or nil
255 #
256 # Returns whether input encoding is UTF-8 or not.
257 #
258 # *Note* don't expect this return value is MatchData.
259 def isutf8(str)
260 RegexpUtf8.match( str )
261 end
262 module_function :isutf8
263
264end
265
266class String
267 # call-seq:
268 # String#kconv(out_code, in_code = Kconv::AUTO)
269 #
270 # Convert <code>self</code> to out_code.
271 # <code>out_code</code> and <code>in_code</code> are given as constants of Kconv.
272 #
273 # *Note*
274 # This method decode MIME encoded string and
275 # convert halfwidth katakana to fullwidth katakana.
276 # If you don't want to decode them, use NKF.nkf.
277 def kconv(out_code, in_code=Kconv::AUTO)
278 Kconv::kconv(self, out_code, in_code)
279 end
280
281 #
282 # to Encoding
283 #
284
285 # call-seq:
286 # String#tojis -> string
287 #
288 # Convert <code>self</code> to ISO-2022-JP
289 #
290 # *Note*
291 # This method decode MIME encoded string and
292 # convert halfwidth katakana to fullwidth katakana.
293 # If you don't want it, use NKF.nkf('-jxm0', str).
294 def tojis; Kconv.tojis(self) end
295
296 # call-seq:
297 # String#toeuc -> string
298 #
299 # Convert <code>self</code> to EUC-JP
300 #
301 # *Note*
302 # This method decode MIME encoded string and
303 # convert halfwidth katakana to fullwidth katakana.
304 # If you don't want it, use NKF.nkf('-exm0', str).
305 def toeuc; Kconv.toeuc(self) end
306
307 # call-seq:
308 # String#tosjis -> string
309 #
310 # Convert <code>self</code> to Shift_JIS
311 #
312 # *Note*
313 # This method decode MIME encoded string and
314 # convert halfwidth katakana to fullwidth katakana.
315 # If you don't want it, use NKF.nkf('-sxm0', str).
316 def tosjis; Kconv.tosjis(self) end
317
318 # call-seq:
319 # String#toutf8 -> string
320 #
321 # Convert <code>self</code> to UTF-8
322 #
323 # *Note*
324 # This method decode MIME encoded string and
325 # convert halfwidth katakana to fullwidth katakana.
326 # If you don't want it, use NKF.nkf('-wxm0', str).
327 def toutf8; Kconv.toutf8(self) end
328
329 # call-seq:
330 # String#toutf16 -> string
331 #
332 # Convert <code>self</code> to UTF-16
333 #
334 # *Note*
335 # This method decode MIME encoded string and
336 # convert halfwidth katakana to fullwidth katakana.
337 # If you don't want it, use NKF.nkf('-w16xm0', str).
338 def toutf16; Kconv.toutf16(self) end
339
340 #
341 # is Encoding
342 #
343
344 # call-seq:
345 # String#iseuc -> obj or nil
346 #
347 # Returns whether <code>self</code>'s encoding is EUC-JP or not.
348 #
349 # *Note* don't expect this return value is MatchData.
350 def iseuc; Kconv.iseuc(self) end
351
352 # call-seq:
353 # String#issjis -> obj or nil
354 #
355 # Returns whether <code>self</code>'s encoding is Shift_JIS or not.
356 #
357 # *Note* don't expect this return value is MatchData.
358 def issjis; Kconv.issjis(self) end
359
360 # call-seq:
361 # String#isutf8 -> obj or nil
362 #
363 # Returns whether <code>self</code>'s encoding is UTF-8 or not.
364 #
365 # *Note* don't expect this return value is MatchData.
366 def isutf8; Kconv.isutf8(self) end
367end
Note: See TracBrowser for help on using the repository browser.