Show
Ignore:
Timestamp:
21.02.2014 18:46:01 (7 years ago)
Author:
ak19
Message:

Fixing up URL encoding of cgi args so that phrase searching works again. Tested MGPP, Lucene and SQLite searching. Tested simple search, fielded search, advanced single field and multi-field as well as running a query.

Files:
1 modified

Legend:

Unmodified
Added
Removed
  • main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp

    r26560 r28841  
    4343#endif 
    4444 
     45// set to false to undo security changes (url-encoding arguments) 
     46static bool do_safe_cgi_args = true; 
    4547 
    4648static unsigned short hexdigit (unsigned short c) { 
     
    336338// This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to 
    337339// break out of an html/XML/javascript context. 
    338 void safe_cgi_arg (text_t &argstr) { 
     340void safe_cgi_arg (const text_t &key, text_t &argstr) { 
     341  if(!do_safe_cgi_args) { 
     342    return; 
     343  } 
     344 
    339345  text_t::iterator in = argstr.begin(); 
    340346  text_t out = ""; 
     
    350356    else { // append whatever char is in *in, but as a char, not int 
    351357            //out += *in; // appends as int 
    352       out += " "; // append placeholder character 
    353       out[out.size()-1] = *in; // now set location containing placeholder to what's in *in 
     358      out.push_back(*in); 
    354359    } 
    355360    ++in; 
     
    359364  argstr += out;   
    360365} 
     366 
     367 
     368// given a list of characters (or "all") to decode, and given the string, str, where those  
     369// characters are to be decoded, this method replaces any occurrences of the url-encoded  
     370// variants of those characters with their actual characters in the given string str. 
     371void unsafe_cgi_arg(const text_t &chars, text_t &str) { 
     372  if(!do_safe_cgi_args) { 
     373    return; 
     374  } 
     375 
     376  text_t allchars = "<>&\"\'/"; 
     377 
     378  text_t chars_to_decode = (chars == "all" || chars == "ALL") ? allchars : chars; 
     379 
     380  text_t::iterator in = chars_to_decode.begin(); 
     381  text_t::iterator end = chars_to_decode.end(); 
     382 
     383  char hex_char[4]; 
     384 
     385  // using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html 
     386 
     387  while (in != end) {  
     388     
     389    // *in is a character from the accepted list of chars_to_decode list 
     390     
     391    // 1. create the url-encoded value of the char *in in variable hex_char 
     392    // sprintf adds in a null byte at the end 
     393    sprintf(hex_char,"%%%02X",*in); 
     394     
     395    // 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it 
     396    text_t tmp = ""; 
     397    tmp.push_back(*in); 
     398     
     399    // 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version     
     400    str.replace(hex_char, tmp); 
     401 
     402    ++in; 
     403  }   
     404} 
     405 
    361406 
    362407// split up the cgi arguments 
     
    378423    decode_cgi_arg (value); 
    379424 
    380     safe_cgi_arg(value); // mitigate obvious cross-site scripting hacks in URL cgi-params 
     425    safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params 
    381426 
    382427    value.setencoding(1); // other encoding