Changeset 2062


Ignore:
Timestamp:
2001-02-23T17:13:35+13:00 (23 years ago)
Author:
paynter
Message:

Added a function from the receptionist that decodes phrase CGI arguments
from URL-safe format (e.g. %43%41%54 means CAT) so we can do searches on
UTF8 text.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • trunk/gsdl/src/phind/host/phindcgi.cpp

    r2009 r2062  
    6969            unsigned long &first_d, unsigned long &last_d,
    7070            bool &XMLmode);
     71
     72void decode_cgi_arg (text_t &argstr);
    7173
    7274void print_expansions(char *cgi_script, char *collection, bool XMLmode, UCArray body,
     
    900902      // p: the phrase text
    901903      else if (key[0] == 'p') {
     904    decode_cgi_arg(value);
    902905    toUCArray(value, phrasetext);
    903906      }
     
    916919    strcpy(classifier, "1");
    917920  }
     921}
     922
     923
     924// Convert %xx and + to their appropriate equivalents
     925//
     926// This function was copied from %GSDLHOME/src/recpt/cgiutils.cpp
     927// because it was much easier to copy it than to link against it.
     928
     929static unsigned short hexdigit (unsigned short c) {
     930  if (c >= '0' && c <= '9') return (c-'0');
     931  if (c >= 'a' && c <= 'f') return (c-'a'+10);
     932  if (c >= 'A' && c <= 'F') return (c-'A'+10);
     933  return c;
     934}
     935
     936void decode_cgi_arg (text_t &argstr) {
     937  text_t::iterator in = argstr.begin();
     938  text_t::iterator out = in;
     939  text_t::iterator end = argstr.end();
     940 
     941  while (in != end) {
     942    if (*in == '+') *out = ' ';
     943   
     944    else if (*in == '%') {
     945      unsigned short c = '%';
     946      in++;
     947      if (in != end) {
     948    c = hexdigit (*in);
     949    in++;
     950      }
     951      if (in != end && c < 16) { // sanity check on the previous character
     952    c = c*16 + hexdigit (*in);
     953      }
     954     
     955      *out = c;
     956    } else *out = *in;
     957   
     958    if (in != end) in++;
     959    out++;
     960  }
     961 
     962  // remove the excess characters
     963  argstr.erase (out, end);
    918964}
    919965
Note: See TracChangeset for help on using the changeset viewer.