1 | #
|
---|
2 | # This class was contributed by Mikko Tiihonen mikko DOT tiihonen AT hut DOT fi
|
---|
3 | #
|
---|
4 | module REXML
|
---|
5 | module Encoding
|
---|
6 | @@__REXML_encoding_methods = %q~
|
---|
7 | # Convert from UTF-8
|
---|
8 | def to_iso_8859_15 content
|
---|
9 | array_utf8 = content.unpack('U*')
|
---|
10 | array_enc = []
|
---|
11 | array_utf8.each do |num|
|
---|
12 | case num
|
---|
13 | # shortcut first bunch basic characters
|
---|
14 | when 0..0xA3: array_enc << num
|
---|
15 | # characters removed compared to iso-8859-1
|
---|
16 | when 0xA4: array_enc << '¤'
|
---|
17 | when 0xA6: array_enc << '¦'
|
---|
18 | when 0xA8: array_enc << '¨'
|
---|
19 | when 0xB4: array_enc << '´'
|
---|
20 | when 0xB8: array_enc << '¸'
|
---|
21 | when 0xBC: array_enc << '¼'
|
---|
22 | when 0xBD: array_enc << '½'
|
---|
23 | when 0xBE: array_enc << '¾'
|
---|
24 | # characters added compared to iso-8859-1
|
---|
25 | when 0x20AC: array_enc << 0xA4 # 0xe2 0x82 0xac
|
---|
26 | when 0x0160: array_enc << 0xA6 # 0xc5 0xa0
|
---|
27 | when 0x0161: array_enc << 0xA8 # 0xc5 0xa1
|
---|
28 | when 0x017D: array_enc << 0xB4 # 0xc5 0xbd
|
---|
29 | when 0x017E: array_enc << 0xB8 # 0xc5 0xbe
|
---|
30 | when 0x0152: array_enc << 0xBC # 0xc5 0x92
|
---|
31 | when 0x0153: array_enc << 0xBD # 0xc5 0x93
|
---|
32 | when 0x0178: array_enc << 0xBE # 0xc5 0xb8
|
---|
33 | else
|
---|
34 | # all remaining basic characters can be used directly
|
---|
35 | if num <= 0xFF
|
---|
36 | array_enc << num
|
---|
37 | else
|
---|
38 | # Numeric entity (&#nnnn;); shard by Stefan Scholl
|
---|
39 | array_enc.concat "&\##{num};".unpack('C*')
|
---|
40 | end
|
---|
41 | end
|
---|
42 | end
|
---|
43 | array_enc.pack('C*')
|
---|
44 | end
|
---|
45 |
|
---|
46 | # Convert to UTF-8
|
---|
47 | def from_iso_8859_15(str)
|
---|
48 | array_latin9 = str.unpack('C*')
|
---|
49 | array_enc = []
|
---|
50 | array_latin9.each do |num|
|
---|
51 | case num
|
---|
52 | # characters that differ compared to iso-8859-1
|
---|
53 | when 0xA4: array_enc << 0x20AC
|
---|
54 | when 0xA6: array_enc << 0x0160
|
---|
55 | when 0xA8: array_enc << 0x0161
|
---|
56 | when 0xB4: array_enc << 0x017D
|
---|
57 | when 0xB8: array_enc << 0x017E
|
---|
58 | when 0xBC: array_enc << 0x0152
|
---|
59 | when 0xBD: array_enc << 0x0153
|
---|
60 | when 0xBE: array_enc << 0x0178
|
---|
61 | else
|
---|
62 | array_enc << num
|
---|
63 | end
|
---|
64 | end
|
---|
65 | array_enc.pack('U*')
|
---|
66 | end
|
---|
67 | ~
|
---|
68 | end
|
---|
69 | end
|
---|