1 | #!/usr/local/bin/perl5 -w
|
---|
2 |
|
---|
3 | ###########################################################################
|
---|
4 | #
|
---|
5 | # uc2utf8.pl --
|
---|
6 | # A component of the Greenstone digital library software
|
---|
7 | # from the New Zealand Digital Library Project at the
|
---|
8 | # University of Waikato, New Zealand.
|
---|
9 | #
|
---|
10 | # Copyright (C) 1999 New Zealand Digital Library Project
|
---|
11 | #
|
---|
12 | # This program is free software; you can redistribute it and/or modify
|
---|
13 | # it under the terms of the GNU General Public License as published by
|
---|
14 | # the Free Software Foundation; either version 2 of the License, or
|
---|
15 | # (at your option) any later version.
|
---|
16 | #
|
---|
17 | # This program is distributed in the hope that it will be useful,
|
---|
18 | # but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
19 | # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
20 | # GNU General Public License for more details.
|
---|
21 | #
|
---|
22 | # You should have received a copy of the GNU General Public License
|
---|
23 | # along with this program; if not, write to the Free Software
|
---|
24 | # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
25 | #
|
---|
26 | ###########################################################################
|
---|
27 |
|
---|
28 | # This program converts from the 16-bit unicode to UTF-8
|
---|
29 |
|
---|
30 | BEGIN {
|
---|
31 | die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
|
---|
32 | unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
|
---|
33 | }
|
---|
34 |
|
---|
35 | use unicode;
|
---|
36 |
|
---|
37 | if ($ENV{'GSDLOS'} =~ /windows/i) {
|
---|
38 | binmode (STDIN); # silly windows
|
---|
39 | }
|
---|
40 |
|
---|
41 | $c = "";
|
---|
42 | $first = 1;
|
---|
43 | $bigendian = 1;
|
---|
44 | while (read (STDIN, $c, 2) == 2) {
|
---|
45 | $c1 = ord (substr ($c, 0, 1));
|
---|
46 | $c2 = ord (substr ($c, 1, 1));
|
---|
47 | if ($first) {
|
---|
48 | if ($c1 == 0xff && $c2 == 0xfe) {
|
---|
49 | $bigendian = 0;
|
---|
50 | } elsif ($c1 == 0xfe && $c2 == 0xff) {
|
---|
51 | $bigendian = 1;
|
---|
52 | } else {
|
---|
53 | die "not unicode text\n";
|
---|
54 | }
|
---|
55 | $first = 0;
|
---|
56 |
|
---|
57 | } else {
|
---|
58 | $c = ($bigendian) ? ($c1*256+$c2) : ($c2*256+$c1);
|
---|
59 |
|
---|
60 | print &unicode::unicode2utf8([$c]);
|
---|
61 | }
|
---|
62 | }
|
---|