source: branches/New_Config_Format-branch/gsdl/bin/script/touc.pl@ 1279

Last change on this file since 1279 was 1279, checked in by sjboddie, 24 years ago

merged changes to trunk into New_Config_Format branch

  • Property svn:executable set to *
  • Property svn:keywords set to Author Date Id Revision
File size: 2.4 KB
Line 
1#!/usr/bin/perl -w
2
3###########################################################################
4#
5# touc.pl -- converts to unicode
6#
7# Copyright (C) 1999 DigiLib Systems Limited, NZ.
8#
9# This program is free software; you can redistribute it and/or modify
10# it under the terms of the GNU General Public License as published by
11# the Free Software Foundation; either version 2 of the License, or
12# (at your option) any later version.
13#
14# This program is distributed in the hope that it will be useful,
15# but WITHOUT ANY WARRANTY; without even the implied warranty of
16# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17# GNU General Public License for more details.
18#
19# You should have received a copy of the GNU General Public License
20# along with this program; if not, write to the Free Software
21# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22#
23###########################################################################
24
25BEGIN {
26 die "GSDLHOME not set\n" unless defined $ENV{'GSDLHOME'};
27 unshift (@INC, "$ENV{'GSDLHOME'}/perllib");
28}
29
30use unicode;
31use multiread;
32use parsargv;
33
34
35$encoding = "utf8";
36if (!parsargv::parse(\@ARGV,
37 'unicode', \$unicode,
38 'iso_8859_1', \$iso_8859_1,
39 'iso_8859_6', \$iso_8859_6,
40 'windows_1256', \$windows_1256,
41 'gb', \$gb)) {
42 print STDERR "\n usage: $0 [options]\n\n";
43 print STDERR " options:\n";
44 print STDERR " -unicode input is in utf-8 or unicode (default)\n";
45 print STDERR " -iso_8859_1 input is in extended ascii (ISO-8859-1 Latin 1)\n";
46 print STDERR " -iso_8859_6 input is in 8 bit Arabic (ISO-8859-6)\n";
47 print STDERR " -windows_1256 input is in Windows 1256 (Arabic)\n";
48 print STDERR " -gb input is in GB or GBK (simplified Chinese)\n\n";
49 die "\n";
50}
51
52$encoding = "utf8" if $unicode;
53$encoding = "iso_8859_1" if $iso_8859_1;
54$encoding = "iso_8859_6" if $iso_8859_6;
55$encoding = "windows_1256" if $windows_1256;
56$encoding = "gb" if $gb;
57
58
59if ($ENV{'GSDLOS'} =~ /windows/i) {
60 binmode (STDOUT); # silly windows
61}
62
63# output in little endian
64print "\xff\xfe";
65
66$reader = new multiread ();
67$reader->set_handle ('main::STDIN');
68$reader->set_encoding ($encoding);
69$line = "";
70$ucline = [];
71while (defined ($line = $reader->read_line())) {
72 $ucline = &unicode::utf82unicode ($line);
73 foreach $c (@$ucline) {
74 $c1 = chr (int ($c / 256));
75 $c2 = chr (int ($c % 256));
76 print "$c2$c1";
77 }
78}
Note: See TracBrowser for help on using the repository browser.