Changeset 1233


Ignore:
Timestamp:
2000-06-23T15:21:39+12:00 (24 years ago)
Author:
sjboddie
Message:

Created converter classes for simple 8 bit encodings that use a
simple textual map file. Instances of these classes are used to handle
the Windows 1256 (Arabic) encoding.

Location:
trunk/gsdl
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/lib/gsdlunicode.cpp

    r1076 r1233  
    2828/*
    2929   $Log$
     30   Revision 1.13  2000/06/23 03:21:38  sjboddie
     31   Created converter classes for simple 8 bit encodings that use a
     32   simple textual map file. Instances of these classes are used to handle
     33   the Windows 1256 (Arabic) encoding.
     34
    3035   Revision 1.12  2000/04/06 19:58:02  cs025
    3136   Correcting a correction - reinstated all lib files due to silly
     
    7075#include <stdio.h>
    7176
     77#if defined(GSDL_USE_OBJECTSPACE)
     78#  include <ospace\std\iostream>
     79#  include <ospace\std\fstream>
     80#elif defined(GSDL_USE_IOS_H)
     81#  include <iostream.h>
     82#  include <fstream.h>
     83#else
     84#  include <iostream>
     85#  include <fstream>
     86#endif
    7287
    7388
     
    551566  else status = unfinished;
    552567}
     568
     569
     570bool simplemapconvert::loadmapfile (bool in) {
     571  if (loaded) return true;
     572  if (mapfile.empty()) return false;
     573
     574  char *cfilename = mapfile.getcstr();
     575#ifdef GSDL_USE_IOS_H
     576  ifstream mapfilein (cfilename, ios::in | ios::nocreate);
     577#else
     578  ifstream mapfilein (cfilename, ios::in);
     579#endif
     580  delete cfilename;
     581  if (!mapfilein) return false;
     582
     583  char cline[2048];
     584  text_t line;
     585
     586  while (!mapfilein.eof()) {
     587    mapfilein.getline (cline, 2048);
     588    line.clear();
     589    line.appendcstr (cline);
     590    if (line.empty()) continue;
     591    // remove comments
     592    text_t::iterator end = line.end();
     593    text_t::iterator here = findchar (line.begin(), end, '#');
     594    if (here != end) {
     595      line.erase (here, end);
     596      if (line.empty()) continue;
     597    }
     598   
     599    text_tarray parts;
     600    splitchar (line.begin(), line.end(), '\t', parts);
     601   
     602    // do some simple sanity checks
     603    if (parts.size() < 2) continue;
     604    text_t::iterator begin1 = parts[0].begin();
     605    text_t::iterator begin2 = parts[1].begin();
     606    if (*begin1 != '0' || *(begin1+1) != 'x') continue;
     607    if (*begin2 != '0' || *(begin2+1) != 'x') continue;
     608    char *from = parts[0].getcstr();
     609    char *to = parts[1].getcstr();
     610    unsigned short f = 0, t = 0;
     611    sscanf (from, "%i", &f);
     612    sscanf (to, "%i", &t);
     613    delete from;
     614    delete to;
     615   
     616    if (in) mapping[f] = t;
     617    else mapping[t] = f;
     618  }
     619
     620  loaded = true;
     621  return true;
     622}
     623
     624unsigned short simplemapconvert::convert (unsigned short c, bool in) {
     625
     626  if (!loaded)
     627    if (!loadmapfile(in)) return absentc;
     628 
     629  return mapping[c];
     630}
     631
     632
     633void simplemapinconvertclass::convert (text_t &output, status_t &status) {
     634  output.clear();
     635 
     636  if (start == NULL || len == 0) {
     637    status = finished;
     638    return;
     639  }
     640
     641  // don't want any funny sign conversions happening
     642  unsigned char *here = (unsigned char *)start;
     643  while (len > 0) {
     644
     645    if (*here < 0x80)
     646      output.push_back (*here); // append this character
     647    else
     648      output.push_back (converter.convert(*here, true));
     649
     650    ++here;
     651    --len;
     652  }
     653
     654  start = (char *)here; // save current position
     655  status = finished;
     656}
     657
     658
     659void simplemapoutconvertclass::convert (char *output, size_t maxlen,
     660                    size_t &len, status_t &status) {
     661
     662  if (input == NULL || output == NULL) {
     663    status = finished;
     664    return;
     665  }
     666
     667  // don't want any funny sign conversions happening
     668  unsigned char *uoutput = (unsigned char *)output;
     669  text_t::iterator textend = input->end();
     670  len = 0;
     671  while ((len < maxlen) && (texthere != textend)) {
     672
     673    if (*texthere < 0x80) *uoutput = (unsigned char)(*texthere);
     674    else *uoutput = converter.convert (*texthere, false);
     675
     676    ++uoutput;
     677    ++len;
     678    ++texthere;
     679  }
     680 
     681  if (texthere == textend) status = finished;
     682  else status = unfinished;
     683}
  • trunk/gsdl/lib/gsdlunicode.h

    r1076 r1233  
    215215};
    216216
     217
     218// Simple input and output converter classes for use with 8 bit encodings
     219// using simple textual map files. Map files should contain (at least) two
     220// tab-separated fields. The first field is the mapped value and the second
     221// field is the unicode value.
     222
     223struct ltus_t
     224{
     225  bool operator()(const unsigned short &t1, const unsigned short &t2) const
     226  { return t1 < t2; }
     227};
     228
     229
     230class simplemapconvert {
     231public:
     232  simplemapconvert () {absentc=0; loaded=false;}
     233  unsigned short convert (unsigned short c, bool in);
     234  void setmapfile (const text_t &themapfile) {mapfile = themapfile;}
     235
     236protected:
     237  bool loadmapfile (bool in);
     238
     239  map <unsigned short, unsigned short, ltus_t> mapping;
     240  bool loaded;
     241  text_t mapfile;
     242  unsigned short absentc;
     243};
     244
     245
     246class simplemapinconvertclass : public inconvertclass {
     247public:
     248  void convert (text_t &output, status_t &status);
     249
     250  void setmapfile (const text_t &themapfile) {converter.setmapfile(themapfile);}
     251 
     252protected:
     253  simplemapconvert converter;
     254};
     255
     256class simplemapoutconvertclass : public rzwsoutconvertclass {
     257public:
     258  void convert (char *output, size_t maxlen,
     259        size_t &len, status_t &status);
     260
     261  void setmapfile (const text_t &themapfile) {converter.setmapfile(themapfile);}
     262 
     263protected:
     264  simplemapconvert converter;
     265};
     266
     267
     268
     269
    217270#endif
  • trunk/gsdl/src/recpt/librarymain.cpp

    r1194 r1233  
    2828/*
    2929   $Log$
     30   Revision 1.27  2000/06/23 03:21:39  sjboddie
     31   Created converter classes for simple 8 bit encodings that use a
     32   simple textual map file. Instances of these classes are used to handle
     33   the Windows 1256 (Arabic) encoding.
     34
    3035   Revision 1.26  2000/05/29 03:30:03  sjboddie
    3136   fixed a bug preventing GB encoded text from being displayed correctly
     
    230235  recpt.add_converter ("g", &gbinconvert, &gboutconvert);
    231236
     237  text_t armapfile = filename_cat (gsdlhome, "unicode", "MAPPINGS");
     238  armapfile = filename_cat (armapfile, "WINDOWS", "1256.TXT");
     239  simplemapinconvertclass arinconvert;
     240  arinconvert.setmapfile (armapfile);
     241  simplemapoutconvertclass aroutconvert;
     242  aroutconvert.setmapfile (armapfile);
     243  recpt.add_converter ("a", &arinconvert, &aroutconvert); 
     244
     245
    232246  // the list of actions. Note: these actions will become invalid
    233247  // at the end of this function.
Note: See TracChangeset for help on using the changeset viewer.