Changeset 13456


Ignore:
Timestamp:
2006-12-07T17:32:49+13:00 (17 years ago)
Author:
kjdon
Message:

cgi_safe was sometimes getting passed in unicode, sometimes utf-8. for one of these, it needs to encode chars about 127, for the other, chars above 255. The change I made last time, 255->127 mucked up the e arg for search terms e.g with diacritics. that change had been to make cgisafe work for metadata with diacritics in format statements. so now there are two cgi-safe methods, one for utf8, one for unicode. minus_safe also takes an arg for utf8 or not. Also, removed + from the list of chars to output directly. a space gets output as a +. so we need to encode + - this will enable a + to be used in a search string. hope it doesn't break anything else...

Location:
trunk/gsdl/src/recpt
Files:
2 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/recpt/cgiutils.cpp

    r13103 r13456  
    340340    // convert %xx and + to their appropriate equivalents
    341341    decode_cgi_arg (value);
    342 
    343342    value.setencoding(1); // other encoding
    344343    // store this key=value pair
     
    416415}
    417416
    418 text_t minus_safe (const text_t &intext) {
     417// set utf8 to true if input is in utf-8, otherwise expects input in unicode
     418text_t minus_safe (const text_t &intext, bool utf8) {
    419419
    420420  text_t outtext;
     
    428428    ++here;
    429429  }
    430   outtext = cgi_safe (outtext);
     430  if (utf8) {
     431    outtext = cgi_safe_utf8 (outtext);
     432  } else {
     433    outtext = cgi_safe_unicode (outtext);
     434  }
    431435  return outtext;
    432436}
    433437
    434 text_t cgi_safe (const text_t &intext) {
     438// takes utf-8 input
     439text_t cgi_safe_utf8 (const text_t &intext) {
    435440  text_t outtext;
    436441 
     
    445450    ((c >= 'A') && (c <= 'Z')) ||
    446451    ((c >= '0') && (c <= '9')) ||
    447     (c == '+') || (c == '%') || (c == '-')) {
     452    (c == '%') || (c == '-')) {
     453      // alphanumeric character
     454      outtext.push_back(c);
     455    } else if (c == ' ') {
     456      // space
     457      outtext.push_back('+');
     458    } else if (c > 255) { // not utf-8 character
     459      cerr << "WARNING: expected utf-8 char, but got unicode!!\n";
     460    } else {
     461      // everything else
     462      outtext.push_back('%');
     463      c2hex(c, ttmp);
     464      outtext += ttmp;
     465    }
     466   
     467    ++here;
     468  }
     469 
     470  return outtext;
     471}
     472// takes unicode input
     473text_t cgi_safe_unicode (const text_t &intext) {
     474  text_t outtext;
     475 
     476  text_t::const_iterator here = intext.begin ();
     477  text_t::const_iterator end = intext.end ();
     478  unsigned short c;
     479  text_t ttmp;
     480 
     481  while (here != end) {
     482    c = *here;
     483    if (((c >= 'a') && (c <= 'z')) ||
     484    ((c >= 'A') && (c <= 'Z')) ||
     485    ((c >= '0') && (c <= '9')) ||
     486    (c == '%') || (c == '-')) {
    448487      // alphanumeric character
    449488      outtext.push_back(c);
     
    738777      // multiple character argument -- sort out any '-' chars
    739778      if (args["w"]=="utf-16be") // browsers don't like \0 in urls...
    740         compressed_args += minus_safe (args[argname]);
     779        compressed_args += minus_safe (args[argname], false);
    741780      else
    742         compressed_args += minus_safe (outconvert.convert(args[argname]));
    743 
     781        compressed_args += minus_safe (outconvert.convert(args[argname]), true);
     782     
    744783      if (saveconfhere != saveconfend) compressed_args.push_back ('-');
    745784
  • trunk/gsdl/src/recpt/cgiutils.h

    r12513 r13456  
    4747
    4848// turns any '-' in a cgi argument into "(-)"
    49 text_t minus_safe (const text_t &intext);
     49// set utf8 to true if input is in utf-8, otherwise expects input in unicode
     50text_t minus_safe (const text_t &intext, bool utf8);
    5051
    5152// returns the encoded version of a cgi argument
    52 text_t cgi_safe (const text_t &intext);
     53// the original text should be in utf8
     54text_t cgi_safe_utf8 (const text_t &intext);
     55// the original text should be in unicode
     56text_t cgi_safe_unicode (const text_t &intext);
    5357
    5458// check_save_conf_str checks the configuration string for
Note: See TracChangeset for help on using the changeset viewer.