Changeset 3008
- Timestamp:
- 2002-02-27T11:55:28+13:00 (22 years ago)
- Location:
- trunk/gsdl/src/mgpp/text
- Files:
-
- 10 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/gsdl/src/mgpp/text/GSDLQueryLex.cpp
r2693 r3008 39 39 40 40 // this version of end is used in unitool 41 UCArray::const_iterator endMinus1 = end-1; 41 // UCArray::const_iterator endMinus1 = end-1; 42 const unsigned char* endMinus1 = &(*end)-1; 42 43 43 44 int charLen; 44 45 unsigned short c; // one character lookahead 45 charLen = parse_utf8_char ( here, endMinus1, &c);46 charLen = parse_utf8_char (&*here, endMinus1, &c); 46 47 47 48 // check for positive or negative … … 49 50 if (c == '+') { 50 51 AddNChar (here, el.text, charLen); 51 charLen = parse_utf8_char ( here, endMinus1, &c);52 charLen = parse_utf8_char (&*here, endMinus1, &c); 52 53 } else if (c == '-') { 53 54 neg = true; 54 55 AddNChar (here, el.text, charLen); 55 charLen = parse_utf8_char ( here, endMinus1, &c);56 charLen = parse_utf8_char (&*here, endMinus1, &c); 56 57 } 57 58 … … 64 65 el.num = el.num*10 + c - '0'; 65 66 AddNChar (here, el.text, charLen); 66 charLen = parse_utf8_char ( here, endMinus1, &c);67 charLen = parse_utf8_char (&*here, endMinus1, &c); 67 68 } 68 69 … … 78 79 79 80 // this version of end is used in unitool 80 UCArray::const_iterator endMinus1 = end-1; 81 //UCArray::const_iterator endMinus1 = end-1; 82 const unsigned char* endMinus1 = &(*end)-1; 81 83 82 84 int charLen=0; 83 85 int length=0; 84 86 unsigned short c; // one character lookahead 85 charLen = parse_utf8_char ( here, endMinus1, &c);87 charLen = parse_utf8_char (&*here, endMinus1, &c); 86 88 87 89 // read in number part … … 94 96 AddNChar (here, el.text, charLen); 95 97 length += charLen; 96 charLen = parse_utf8_char ( here, endMinus1, &c);98 charLen = parse_utf8_char (&*here, endMinus1, &c); 97 99 } 98 100 // check the next character -if it is a letter, then have a term, not an integer … … 110 112 AddNChar (here, el.text, charLen); 111 113 length += charLen; 112 charLen = parse_utf8_char ( here, endMinus1, &c);114 charLen = parse_utf8_char (&*here, endMinus1, &c); 113 115 } 114 116 … … 118 120 UCArray::const_iterator end, 119 121 UCArray &text) { 120 UCArray::const_iterator endMinus1 = end-1; 121 here = ParseIndexWord (here, endMinus1, text); 122 //UCArray::const_iterator endMinus1 = end-1; 123 const unsigned char* endMinus1 = &(*end)-1; 124 const unsigned char* new_here = ParseIndexWord (&*here, endMinus1, text); 125 here += (new_here - &*here); // advance iterator by number of chars advanced 122 126 return !text.empty(); 123 127 } … … 133 137 134 138 // this version of end is used in unitool 135 UCArray::const_iterator endMinus1 = end-1; 136 139 //UCArray::const_iterator endMinus1 = end-1; 140 const unsigned char* endMinus1 = &(*end)-1; 141 137 142 // ignore all white space 138 143 int charLen; 139 144 unsigned short c; // one character lookahead 140 charLen = parse_utf8_char ( here, endMinus1, &c);145 charLen = parse_utf8_char (&*here, endMinus1, &c); 141 146 while (here != end && is_unicode_space (c)) { 142 147 here += charLen; 143 charLen = parse_utf8_char ( here, endMinus1, &c);148 charLen = parse_utf8_char (&*here, endMinus1, &c); 144 149 } 145 150 if (here == end) return false; -
trunk/gsdl/src/mgpp/text/Terms.cpp
r2468 r3008 244 244 // convert the word to an "mg word" 245 245 mgWord[0] = term.size(); 246 memcpy ((char *)&mgWord[1], (const char *)term.begin(), term.size());246 memcpy ((char *)&mgWord[1], &(term[0]), term.size()); 247 247 248 248 // stem the word -
trunk/gsdl/src/mgpp/text/UCArray.cpp
r2468 r3008 41 41 int i = 0; 42 42 while (here != end) { 43 cstr[i] = (char)*here;43 cstr[i] = text[i]; 44 44 here++; i++; 45 45 } … … 253 253 fputc (sufLen, f); 254 254 if (ferror(f) != 0) return false; 255 return (fwrite ((char *)a.begin()+preLen, sizeof (char), sufLen, f) == sufLen); 255 char* tmp=GetCStr(a); 256 int ret=(fwrite (tmp+preLen, sizeof (char), sufLen, f) == sufLen); 257 delete (tmp); 258 return (ret); 256 259 } 257 260 -
trunk/gsdl/src/mgpp/text/UCArray.h
r2468 r3008 24 24 25 25 // need this to avoid bizarre compiler problems under VC++ 6.0 26 #if defined (__WIN32__) && !defined (GSDL_USE_IOS_H)26 #if !defined (GSDL_NAMESPACE_BROKEN) && !defined (GSDL_USE_IOS_H) 27 27 # include <iostream> 28 28 using namespace std; -
trunk/gsdl/src/mgpp/text/ivf.pass1.cpp
r2468 r3008 272 272 if (!wordLevelIndex && !inFrag) return; 273 273 274 const unsigned char *textHere = el.text.begin();275 const unsigned char *textEnd = el.text.end() - 1;274 const unsigned char *textHere = &(el.text[0]); 275 const unsigned char *textEnd = &(el.text[el.text.size() - 1]); 276 276 UCArray word; 277 277 -
trunk/gsdl/src/mgpp/text/ivf.pass2.cpp
r2468 r3008 804 804 if (!wordLevelIndex && !inFrag) return; 805 805 806 const unsigned char *textHere = el.text.begin();807 const unsigned char *textEnd = el.text.end() - 1;806 const unsigned char *textHere = &(el.text[0]); 807 const unsigned char *textEnd = &(el.text[el.text.size() - 1]); 808 808 unsigned char mgWord[MAXSTEMLEN + 1]; 809 809 -
trunk/gsdl/src/mgpp/text/mgpp_perf_hash_build.cpp
r2557 r3008 93 93 94 94 *pool++ = wordEl.el.size(); 95 memcpy ((char *) pool, (const char *) wordEl.el.begin(), wordEl.el.size());95 memcpy ((char *) pool, &(wordEl.el[0]), wordEl.el.size()); 96 96 //cerr << pool<<" " <<starts[i]<<endl; 97 97 pool += wordEl.el.size(); -
trunk/gsdl/src/mgpp/text/mgpp_stem_idx.cpp
r2557 r3008 76 76 // convert the word to an "mg word" 77 77 mgWord[0] = wordEl.el.size(); 78 memcpy((char *)&mgWord[1], (const char *)wordEl.el.begin(), wordEl.el.size());78 memcpy((char *)&mgWord[1], &(wordEl.el[0]), wordEl.el.size()); 79 79 80 80 // stem the word -
trunk/gsdl/src/mgpp/text/text.pass1.cpp
r2698 r3008 269 269 docLen += textLen; 270 270 271 retValue = process_text_element ( (*here).text.begin(), textLen);271 retValue = process_text_element (&(here->text[0]), textLen); 272 272 if (retValue != COMPALLOK) return retValue; 273 273 -
trunk/gsdl/src/mgpp/text/text.pass2.cpp
r2541 r3008 384 384 // compress the text 385 385 if (compress_text (textOutBuf, 386 (*here).text.begin(),386 &(here->text[0]), 387 387 (*here).text.size(), 388 388 whichWordType,
Note:
See TracChangeset
for help on using the changeset viewer.