1 | # -*- mode: ruby; ruby-indent-level: 2; indent-tabs-mode: t; tab-width: 2 -*- vim: sw=2 ts=2
|
---|
2 | module REXML
|
---|
3 | module Encoding
|
---|
4 | @encoding_methods = {}
|
---|
5 | def self.register(enc, &block)
|
---|
6 | @encoding_methods[enc] = block
|
---|
7 | end
|
---|
8 | def self.apply(obj, enc)
|
---|
9 | @encoding_methods[enc][obj]
|
---|
10 | end
|
---|
11 | def self.encoding_method(enc)
|
---|
12 | @encoding_methods[enc]
|
---|
13 | end
|
---|
14 |
|
---|
15 | # Native, default format is UTF-8, so it is declared here rather than in
|
---|
16 | # an encodings/ definition.
|
---|
17 | UTF_8 = 'UTF-8'
|
---|
18 | UTF_16 = 'UTF-16'
|
---|
19 | UNILE = 'UNILE'
|
---|
20 |
|
---|
21 | # ID ---> Encoding name
|
---|
22 | attr_reader :encoding
|
---|
23 | def encoding=( enc )
|
---|
24 | old_verbosity = $VERBOSE
|
---|
25 | begin
|
---|
26 | $VERBOSE = false
|
---|
27 | enc = enc.nil? ? nil : enc.upcase
|
---|
28 | return false if defined? @encoding and enc == @encoding
|
---|
29 | if enc and enc != UTF_8
|
---|
30 | @encoding = enc
|
---|
31 | raise ArgumentError, "Bad encoding name #@encoding" unless @encoding =~ /^[\w-]+$/
|
---|
32 | @encoding.untaint
|
---|
33 | begin
|
---|
34 | require 'rexml/encodings/ICONV.rb'
|
---|
35 | Encoding.apply(self, "ICONV")
|
---|
36 | rescue LoadError, Exception
|
---|
37 | begin
|
---|
38 | enc_file = File.join( "rexml", "encodings", "#@encoding.rb" )
|
---|
39 | require enc_file
|
---|
40 | Encoding.apply(self, @encoding)
|
---|
41 | rescue LoadError => err
|
---|
42 | puts err.message
|
---|
43 | raise ArgumentError, "No decoder found for encoding #@encoding. Please install iconv."
|
---|
44 | end
|
---|
45 | end
|
---|
46 | else
|
---|
47 | @encoding = UTF_8
|
---|
48 | require 'rexml/encodings/UTF-8.rb'
|
---|
49 | Encoding.apply(self, @encoding)
|
---|
50 | end
|
---|
51 | ensure
|
---|
52 | $VERBOSE = old_verbosity
|
---|
53 | end
|
---|
54 | true
|
---|
55 | end
|
---|
56 |
|
---|
57 | def check_encoding str
|
---|
58 | # We have to recognize UTF-16, LSB UTF-16, and UTF-8
|
---|
59 | return UTF_16 if /\A\xfe\xff/n =~ str
|
---|
60 | return UNILE if /\A\xff\xfe/n =~ str
|
---|
61 | str =~ /^\s*<?xml\s*version\s*=\s*(['"]).*?\2\s*encoding\s*=\s*(["'])(.*?)\2/um
|
---|
62 | return $1.upcase if $1
|
---|
63 | return UTF_8
|
---|
64 | end
|
---|
65 | end
|
---|
66 | end
|
---|