1 | #
|
---|
2 | # This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
|
---|
3 | #
|
---|
4 | module REXML
|
---|
5 | module Encoding
|
---|
6 | @@__REXML_encoding_methods = %q~
|
---|
7 | # Convert from UTF-8
|
---|
8 | def encode content
|
---|
9 | array_utf8 = content.unpack('U*')
|
---|
10 | array_enc = []
|
---|
11 | array_utf8.each do |num|
|
---|
12 | case num
|
---|
13 | # shortcut first bunch basic characters
|
---|
14 | when 0..0xFF: array_enc << num
|
---|
15 | # characters added compared to iso-8859-1
|
---|
16 | when 0x20AC: array_enc << 0x80 # 0xe2 0x82 0xac
|
---|
17 | when 0x201A: array_enc << 0x82 # 0xe2 0x82 0x9a
|
---|
18 | when 0x0192: array_enc << 0x83 # 0xc6 0x92
|
---|
19 | when 0x201E: array_enc << 0x84 # 0xe2 0x82 0x9e
|
---|
20 | when 0x2026: array_enc << 0x85 # 0xe2 0x80 0xa6
|
---|
21 | when 0x2020: array_enc << 0x86 # 0xe2 0x80 0xa0
|
---|
22 | when 0x2021: array_enc << 0x87 # 0xe2 0x80 0xa1
|
---|
23 | when 0x02C6: array_enc << 0x88 # 0xcb 0x86
|
---|
24 | when 0x2030: array_enc << 0x89 # 0xe2 0x80 0xb0
|
---|
25 | when 0x0160: array_enc << 0x8A # 0xc5 0xa0
|
---|
26 | when 0x2039: array_enc << 0x8B # 0xe2 0x80 0xb9
|
---|
27 | when 0x0152: array_enc << 0x8C # 0xc5 0x92
|
---|
28 | when 0x017D: array_enc << 0x8E # 0xc5 0xbd
|
---|
29 | when 0x2018: array_enc << 0x91 # 0xe2 0x80 0x98
|
---|
30 | when 0x2019: array_enc << 0x92 # 0xe2 0x80 0x99
|
---|
31 | when 0x201C: array_enc << 0x93 # 0xe2 0x80 0x9c
|
---|
32 | when 0x201D: array_enc << 0x94 # 0xe2 0x80 0x9d
|
---|
33 | when 0x2022: array_enc << 0x95 # 0xe2 0x80 0xa2
|
---|
34 | when 0x2013: array_enc << 0x96 # 0xe2 0x80 0x93
|
---|
35 | when 0x2014: array_enc << 0x97 # 0xe2 0x80 0x94
|
---|
36 | when 0x02DC: array_enc << 0x98 # 0xcb 0x9c
|
---|
37 | when 0x2122: array_enc << 0x99 # 0xe2 0x84 0xa2
|
---|
38 | when 0x0161: array_enc << 0x9A # 0xc5 0xa1
|
---|
39 | when 0x203A: array_enc << 0x9B # 0xe2 0x80 0xba
|
---|
40 | when 0x0152: array_enc << 0x9C # 0xc5 0x93
|
---|
41 | when 0x017E: array_enc << 0x9E # 0xc5 0xbe
|
---|
42 | when 0x0178: array_enc << 0x9F # 0xc5 0xb8
|
---|
43 | else
|
---|
44 | # all remaining basic characters can be used directly
|
---|
45 | if num <= 0xFF
|
---|
46 | array_enc << num
|
---|
47 | else
|
---|
48 | # Numeric entity (&#nnnn;); shard by Stefan Scholl
|
---|
49 | array_enc.concat "&\##{num};".unpack('C*')
|
---|
50 | end
|
---|
51 | end
|
---|
52 | end
|
---|
53 | array_enc.pack('C*')
|
---|
54 | end
|
---|
55 |
|
---|
56 | # Convert to UTF-8
|
---|
57 | def decode(str)
|
---|
58 | array_latin9 = str.unpack('C*')
|
---|
59 | array_enc = []
|
---|
60 | array_latin9.each do |num|
|
---|
61 | case num
|
---|
62 | # characters that added compared to iso-8859-1
|
---|
63 | when 0x80: array_enc << 0x20AC # 0xe2 0x82 0xac
|
---|
64 | when 0x82: array_enc << 0x201A # 0xe2 0x82 0x9a
|
---|
65 | when 0x83: array_enc << 0x0192 # 0xc6 0x92
|
---|
66 | when 0x84: array_enc << 0x201E # 0xe2 0x82 0x9e
|
---|
67 | when 0x85: array_enc << 0x2026 # 0xe2 0x80 0xa6
|
---|
68 | when 0x86: array_enc << 0x2020 # 0xe2 0x80 0xa0
|
---|
69 | when 0x87: array_enc << 0x2021 # 0xe2 0x80 0xa1
|
---|
70 | when 0x88: array_enc << 0x02C6 # 0xcb 0x86
|
---|
71 | when 0x89: array_enc << 0x2030 # 0xe2 0x80 0xb0
|
---|
72 | when 0x8A: array_enc << 0x0160 # 0xc5 0xa0
|
---|
73 | when 0x8B: array_enc << 0x2039 # 0xe2 0x80 0xb9
|
---|
74 | when 0x8C: array_enc << 0x0152 # 0xc5 0x92
|
---|
75 | when 0x8E: array_enc << 0x017D # 0xc5 0xbd
|
---|
76 | when 0x91: array_enc << 0x2018 # 0xe2 0x80 0x98
|
---|
77 | when 0x92: array_enc << 0x2019 # 0xe2 0x80 0x99
|
---|
78 | when 0x93: array_enc << 0x201C # 0xe2 0x80 0x9c
|
---|
79 | when 0x94: array_enc << 0x201D # 0xe2 0x80 0x9d
|
---|
80 | when 0x95: array_enc << 0x2022 # 0xe2 0x80 0xa2
|
---|
81 | when 0x96: array_enc << 0x2013 # 0xe2 0x80 0x93
|
---|
82 | when 0x97: array_enc << 0x2014 # 0xe2 0x80 0x94
|
---|
83 | when 0x98: array_enc << 0x02DC # 0xcb 0x9c
|
---|
84 | when 0x99: array_enc << 0x2122 # 0xe2 0x84 0xa2
|
---|
85 | when 0x9A: array_enc << 0x0161 # 0xc5 0xa1
|
---|
86 | when 0x9B: array_enc << 0x203A # 0xe2 0x80 0xba
|
---|
87 | when 0x9C: array_enc << 0x0152 # 0xc5 0x93
|
---|
88 | when 0x9E: array_enc << 0x017E # 0xc5 0xbe
|
---|
89 | when 0x9F: array_enc << 0x0178 # 0xc5 0xb8
|
---|
90 | else
|
---|
91 | array_enc << num
|
---|
92 | end
|
---|
93 | end
|
---|
94 | array_enc.pack('U*')
|
---|
95 | end
|
---|
96 | ~
|
---|
97 | end
|
---|
98 | end
|
---|