Changeset 28841 for main/trunk/greenstone2/runtime-src/src
- Timestamp:
- 2014-02-21T18:46:01+13:00 (10 years ago)
- Location:
- main/trunk/greenstone2/runtime-src/src/recpt
- Files:
-
- 5 edited
Legend:
- Unmodified
- Added
- Removed
-
main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.cpp
r26560 r28841 43 43 #endif 44 44 45 // set to false to undo security changes (url-encoding arguments) 46 static bool do_safe_cgi_args = true; 45 47 46 48 static unsigned short hexdigit (unsigned short c) { … … 336 338 // This function encodes <>, &, ", ', / which are scripting chars or chars which can be used to 337 339 // break out of an html/XML/javascript context. 338 void safe_cgi_arg (text_t &argstr) { 340 void safe_cgi_arg (const text_t &key, text_t &argstr) { 341 if(!do_safe_cgi_args) { 342 return; 343 } 344 339 345 text_t::iterator in = argstr.begin(); 340 346 text_t out = ""; … … 350 356 else { // append whatever char is in *in, but as a char, not int 351 357 //out += *in; // appends as int 352 out += " "; // append placeholder character 353 out[out.size()-1] = *in; // now set location containing placeholder to what's in *in 358 out.push_back(*in); 354 359 } 355 360 ++in; … … 359 364 argstr += out; 360 365 } 366 367 368 // given a list of characters (or "all") to decode, and given the string, str, where those 369 // characters are to be decoded, this method replaces any occurrences of the url-encoded 370 // variants of those characters with their actual characters in the given string str. 371 void unsafe_cgi_arg(const text_t &chars, text_t &str) { 372 if(!do_safe_cgi_args) { 373 return; 374 } 375 376 text_t allchars = "<>&\"\'/"; 377 378 text_t chars_to_decode = (chars == "all" || chars == "ALL") ? allchars : chars; 379 380 text_t::iterator in = chars_to_decode.begin(); 381 text_t::iterator end = chars_to_decode.end(); 382 383 char hex_char[4]; 384 385 // using sprint to urlencode a character. See http://www.programmingforums.org/thread15443.html 386 387 while (in != end) { 388 389 // *in is a character from the accepted list of chars_to_decode list 390 391 // 1. create the url-encoded value of the char *in in variable hex_char 392 // sprintf adds in a null byte at the end 393 sprintf(hex_char,"%%%02X",*in); 394 395 // 2. Need the actual char to be decoded as a text_t string, so we can do a string replace with it 396 text_t tmp = ""; 397 tmp.push_back(*in); 398 399 // 3. replaces occurrences of hex_char (the url_encoded version of the char *in) in str with its decoded version 400 str.replace(hex_char, tmp); 401 402 ++in; 403 } 404 } 405 361 406 362 407 // split up the cgi arguments … … 378 423 decode_cgi_arg (value); 379 424 380 safe_cgi_arg( value); // mitigate obvious cross-site scripting hacks in URL cgi-params425 safe_cgi_arg(key, value); // mitigate obvious cross-site scripting hacks in URL cgi-params 381 426 382 427 value.setencoding(1); // other encoding -
main/trunk/greenstone2/runtime-src/src/recpt/cgiutils.h
r13456 r28841 42 42 void split_cgi_args (const cgiargsinfoclass &argsinfo, text_t argstr, 43 43 cgiargsclass &args); 44 45 // url-decode selected chars of a given string 46 void unsafe_cgi_arg(const text_t &chars_to_decode, text_t &str); 44 47 45 48 text_t encode_commas (const text_t &intext); -
main/trunk/greenstone2/runtime-src/src/recpt/queryaction.cpp
r28220 r28841 1342 1342 formattedstring = args["q"]; 1343 1343 // remove & | ! for simple search,do segmentation if necessary 1344 // To url-decode the '&', format_querystring() will call unsafe_cgi_arg() first 1344 1345 format_querystring (formattedstring, args.getintarg("b"), segment); 1345 1346 if (args["ct"]!=0) { // mgpp and lucene - need to add in tag info if appropriate … … 1358 1359 if (args["b"]=="1" && args["fqa"]=="1") { // explicit query 1359 1360 formattedstring = args["q"]; 1361 1362 // Replace %22 and %26 with " and & respectively, since these characters have meaning 1363 // in queries: " are used in phrases and & is used in boolean advanced searches. 1364 // For form searches below, unsafe_cgi_arg is called in the parse_..._form() functions 1365 1366 unsafe_cgi_arg("ALL", formattedstring); 1360 1367 } 1361 1368 else { // form search 1362 1369 if (args["b"]=="0") { // regular form 1363 parse_reg_query_form(formattedstring, args, segment); 1370 parse_reg_query_form(formattedstring, args, segment); // will call unsafe_cgi_arg to decode url encoding 1364 1371 } 1365 1372 else { // advanced form 1366 parse_adv_query_form(formattedstring, args, segment); 1373 parse_adv_query_form(formattedstring, args, segment); // will call unsafe_cgi_arg to decode url encoding 1367 1374 } 1368 1375 args["q"] = formattedstring; -
main/trunk/greenstone2/runtime-src/src/recpt/querytools.cpp
r28222 r28841 25 25 26 26 #include "querytools.h" 27 #include "cgiutils.h" 27 28 #include <ctype.h> 28 29 #include "unitool.h" // for is_unicode_letdig … … 343 344 // This function removes boolean operators from simple searches, and segments 344 345 // chinese characters if segment=true 346 // Called by several parse_..._form methods here, this function decodes & 347 // to undo the URL encoding done in cgiutils.cpp for security purposes 345 348 void format_querystring (text_t &querystring, int querymode, bool segment) { 346 349 text_t formattedstring; 350 351 // & has meaning in boolean searches and can be %26 encoded at this point, need to decode them now. 352 // Also decode any " here, so that the entire search phrase is highlighted and not just the final word 353 unsafe_cgi_arg("ALL", querystring); 347 354 348 355 // advanced search, no segmenting, don't need to do anything … … 449 456 } 450 457 451 452 458 if (arg_ct == "2") { // lucene 453 459 // look for AND OR NOT and remove … … 579 585 580 586 587 // The following parse_..._form functions first decode various fields for 588 // both simple and advanced searches to undo the URL encoding. 589 // E.g. quotes have meaning in phrase searches and these have to be decoded 590 // before sending the search off to the index. 591 581 592 // some query form parsing functions for use with mgpp & lucene 582 593 … … 599 610 text_t field = args["fqf"]; 600 611 if (field.empty()) return; // no query 612 unsafe_cgi_arg("ALL", field); 601 613 text_tarray fields; 602 614 splitchar(field.begin(), field.end(), ',', fields); … … 604 616 text_t value = args["fqv"]; 605 617 if (value.empty()) return; // somethings wrong 618 unsafe_cgi_arg("ALL", value); 606 619 text_tarray values; 607 620 splitchar(value.begin(), value.end(), ',', values); … … 651 664 text_t field = args["fqf"]; 652 665 if (field.empty()) return; // no query 666 unsafe_cgi_arg("ALL", field); 653 667 text_tarray fields; 654 668 splitchar(field.begin(), field.end(), ',', fields); … … 656 670 text_t value = args["fqv"]; 657 671 if (value.empty()) return; // somethings wrong 672 unsafe_cgi_arg("ALL", value); 658 673 text_tarray values; 659 674 splitchar(value.begin(), value.end(), ',', values); … … 661 676 text_t comb = args["fqc"]; 662 677 if (comb.empty()) return; //somethings wrong 678 //unsafe_cgi_arg("ALL", comb); 663 679 text_tarray combs; 664 680 splitchar(comb.begin(), comb.end(), ',', combs); … … 734 750 text_t field = args["sqlfqf"]; 735 751 if (field.empty()) return; // no query 752 unsafe_cgi_arg("ALL", field); // for the slash. //unsafe_cgi_arg("/", field); 736 753 text_tarray fields; 737 754 splitchar(field.begin(), field.end(), ',', fields); … … 739 756 text_t sqlcomb = args["sqlfqc"]; 740 757 if (sqlcomb.empty()) return; //somethings wrong 758 //unsafe_cgi_arg("ALL", sqlcomb); 741 759 text_tarray sqlcombs; 742 760 splitchar(sqlcomb.begin(), sqlcomb.end(), ',', sqlcombs); … … 744 762 text_t value = args["fqv"]; 745 763 if (value.empty()) return; // somethings wrong 764 unsafe_cgi_arg("ALL", value); 746 765 text_tarray values; 747 766 splitchar(value.begin(), value.end(), ',', values); … … 808 827 809 828 if (field.empty()) return; // no query 829 // need to decode %2F to / in the URL, e.g. to get dc.Title/Title/ex.Title again in the fields to search in 830 unsafe_cgi_arg("ALL", field); //unsafe_cgi_arg("/", field); 810 831 text_tarray fields; 811 832 splitchar(field.begin(), field.end(), ',', fields); … … 813 834 text_t sqlcomb = args["sqlfqc"]; 814 835 if (sqlcomb.empty()) return; //somethings wrong 836 //unsafe_cgi_arg("ALL", sqlcomb); 815 837 text_tarray sqlcombs; 816 838 splitchar(sqlcomb.begin(), sqlcomb.end(), ',', sqlcombs); … … 818 840 text_t value = args["fqv"]; 819 841 if (value.empty()) return; // somethings wrong 842 unsafe_cgi_arg("ALL", value); // decode all url-encoded parts of the values to search in 820 843 text_tarray values; 821 844 splitchar(value.begin(), value.end(), ',', values); … … 823 846 text_t comb = args["fqc"]; 824 847 if (comb.empty()) return; //somethings wrong 848 //unsafe_cgi_arg("ALL", comb); 825 849 text_tarray combs; 826 850 splitchar(comb.begin(), comb.end(), ',', combs); -
main/trunk/greenstone2/runtime-src/src/recpt/sqlqueryaction.cpp
r23398 r28841 260 260 ostream& logout) 261 261 { 262 // A great many characters have meanings in SQL queries, including > and %, 263 // where % stands for a multi-char wildcard 264 // http://docs.oracle.com/cd/B10501_01/text.920/a96518/cqspcl.htm 265 // Further, Greenstone's Advanced SQLite Search allows <, >, %, ' (rounded brackets and more) 266 // So it's best to url-decode all encoded cgi-args 267 // We do so here if normal text search or explicit query, and in the 268 // parse_sql_query_form functions if dealing with forms. 269 262 270 if (args["qt"]=="0" && args["sqlqto"] != "1") { // normal text search 271 unsafe_cgi_arg("ALL", args["q"]); 263 272 formattedstring = "SELECT DISTINCT docOID FROM document_metadata WHERE " + args["q"]; 264 273 } … … 267 276 if (args["b"]=="1" && args["fqa"]=="1") { // explicit query 268 277 formattedstring = args["q"]; 278 unsafe_cgi_arg("ALL", formattedstring); 269 279 } 270 280 else { // form search
Note:
See TracChangeset
for help on using the changeset viewer.