1 | require "rss/utils"
|
---|
2 |
|
---|
3 | module RSS
|
---|
4 |
|
---|
5 | class Converter
|
---|
6 |
|
---|
7 | include Utils
|
---|
8 |
|
---|
9 | def initialize(to_enc, from_enc=nil)
|
---|
10 | normalized_to_enc = to_enc.downcase.gsub(/-/, '_')
|
---|
11 | from_enc ||= 'utf-8'
|
---|
12 | normalized_from_enc = from_enc.downcase.gsub(/-/, '_')
|
---|
13 | if normalized_to_enc == normalized_from_enc
|
---|
14 | def_same_enc()
|
---|
15 | else
|
---|
16 | def_diff_enc = "def_to_#{normalized_to_enc}_from_#{normalized_from_enc}"
|
---|
17 | if respond_to?(def_diff_enc)
|
---|
18 | __send__(def_diff_enc)
|
---|
19 | else
|
---|
20 | def_else_enc(to_enc, from_enc)
|
---|
21 | end
|
---|
22 | end
|
---|
23 | end
|
---|
24 |
|
---|
25 | def convert(value)
|
---|
26 | value
|
---|
27 | end
|
---|
28 |
|
---|
29 | def def_convert(depth=0)
|
---|
30 | instance_eval(<<-EOC, *get_file_and_line_from_caller(depth))
|
---|
31 | def convert(value)
|
---|
32 | if value.kind_of?(String)
|
---|
33 | #{yield('value')}
|
---|
34 | else
|
---|
35 | value
|
---|
36 | end
|
---|
37 | end
|
---|
38 | EOC
|
---|
39 | end
|
---|
40 |
|
---|
41 | def def_iconv_convert(to_enc, from_enc, depth=0)
|
---|
42 | begin
|
---|
43 | require "iconv"
|
---|
44 | @iconv = Iconv.new(to_enc, from_enc)
|
---|
45 | def_convert(depth+1) do |value|
|
---|
46 | <<-EOC
|
---|
47 | begin
|
---|
48 | @iconv.iconv(#{value})
|
---|
49 | rescue Iconv::Failure
|
---|
50 | raise ConversionError.new(#{value}, "#{to_enc}", "#{from_enc}")
|
---|
51 | end
|
---|
52 | EOC
|
---|
53 | end
|
---|
54 | rescue LoadError, ArgumentError, SystemCallError
|
---|
55 | raise UnknownConversionMethodError.new(to_enc, from_enc)
|
---|
56 | end
|
---|
57 | end
|
---|
58 |
|
---|
59 | def def_else_enc(to_enc, from_enc)
|
---|
60 | def_iconv_convert(to_enc, from_enc, 0)
|
---|
61 | end
|
---|
62 |
|
---|
63 | def def_same_enc()
|
---|
64 | def_convert do |value|
|
---|
65 | value
|
---|
66 | end
|
---|
67 | end
|
---|
68 |
|
---|
69 | def def_uconv_convert_if_can(meth, to_enc, from_enc, nkf_arg)
|
---|
70 | begin
|
---|
71 | require "uconv"
|
---|
72 | def_convert(1) do |value|
|
---|
73 | <<-EOC
|
---|
74 | begin
|
---|
75 | Uconv.#{meth}(#{value})
|
---|
76 | rescue Uconv::Error
|
---|
77 | raise ConversionError.new(#{value}, "#{to_enc}", "#{from_enc}")
|
---|
78 | end
|
---|
79 | EOC
|
---|
80 | end
|
---|
81 | rescue LoadError
|
---|
82 | require 'nkf'
|
---|
83 | def_convert(1) do |value|
|
---|
84 | "NKF.nkf(#{nkf_arg.dump}, #{value})"
|
---|
85 | end
|
---|
86 | end
|
---|
87 | end
|
---|
88 |
|
---|
89 | def def_to_euc_jp_from_utf_8
|
---|
90 | def_uconv_convert_if_can('u8toeuc', 'EUC-JP', 'UTF-8', '-We')
|
---|
91 | end
|
---|
92 |
|
---|
93 | def def_to_utf_8_from_euc_jp
|
---|
94 | def_uconv_convert_if_can('euctou8', 'UTF-8', 'EUC-JP', '-Ew')
|
---|
95 | end
|
---|
96 |
|
---|
97 | def def_to_shift_jis_from_utf_8
|
---|
98 | def_uconv_convert_if_can('u8tosjis', 'Shift_JIS', 'UTF-8', '-Ws')
|
---|
99 | end
|
---|
100 |
|
---|
101 | def def_to_utf_8_from_shift_jis
|
---|
102 | def_uconv_convert_if_can('sjistou8', 'UTF-8', 'Shift_JIS', '-Sw')
|
---|
103 | end
|
---|
104 |
|
---|
105 | def def_to_euc_jp_from_shift_jis
|
---|
106 | require "nkf"
|
---|
107 | def_convert do |value|
|
---|
108 | "NKF.nkf('-Se', #{value})"
|
---|
109 | end
|
---|
110 | end
|
---|
111 |
|
---|
112 | def def_to_shift_jis_from_euc_jp
|
---|
113 | require "nkf"
|
---|
114 | def_convert do |value|
|
---|
115 | "NKF.nkf('-Es', #{value})"
|
---|
116 | end
|
---|
117 | end
|
---|
118 |
|
---|
119 | def def_to_euc_jp_from_iso_2022_jp
|
---|
120 | require "nkf"
|
---|
121 | def_convert do |value|
|
---|
122 | "NKF.nkf('-Je', #{value})"
|
---|
123 | end
|
---|
124 | end
|
---|
125 |
|
---|
126 | def def_to_iso_2022_jp_from_euc_jp
|
---|
127 | require "nkf"
|
---|
128 | def_convert do |value|
|
---|
129 | "NKF.nkf('-Ej', #{value})"
|
---|
130 | end
|
---|
131 | end
|
---|
132 |
|
---|
133 | def def_to_utf_8_from_iso_8859_1
|
---|
134 | def_convert do |value|
|
---|
135 | "#{value}.unpack('C*').pack('U*')"
|
---|
136 | end
|
---|
137 | end
|
---|
138 |
|
---|
139 | def def_to_iso_8859_1_from_utf_8
|
---|
140 | def_convert do |value|
|
---|
141 | <<-EOC
|
---|
142 | array_utf8 = #{value}.unpack('U*')
|
---|
143 | array_enc = []
|
---|
144 | array_utf8.each do |num|
|
---|
145 | if num <= 0xFF
|
---|
146 | array_enc << num
|
---|
147 | else
|
---|
148 | array_enc.concat "&\#\#{num};".unpack('C*')
|
---|
149 | end
|
---|
150 | end
|
---|
151 | array_enc.pack('C*')
|
---|
152 | EOC
|
---|
153 | end
|
---|
154 | end
|
---|
155 |
|
---|
156 | end
|
---|
157 |
|
---|
158 | end
|
---|