Changeset 8946
- Timestamp:
- 2005-02-02T12:11:08+13:00 (19 years ago)
- Location:
- trunk
- Files:
-
- 4 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/indexers/mgpp/text/TextEl.cpp
r8692 r8946 78 78 } 79 79 80 static bool ReadTextTag (u_char ** buffer, TextEl &el) { 81 // we must have some sort of tag 82 u_char * buffer_pos = *buffer; 83 if (*buffer_pos == '\0' || *buffer_pos != '<') { 84 return false; 85 } 86 el.text.push_back('<'); 87 buffer_pos ++; 88 if (*buffer_pos == '\0') { 89 *buffer = buffer_pos; 90 return false; 91 } 92 // see if we have a '/' 93 if (*buffer_pos == '/') { 94 el.text.push_back('/'); 95 el.elType = CloseTagE; 96 buffer_pos++; 97 if (*buffer_pos == '\0') { 98 *buffer = buffer_pos; 99 return false; 100 } 101 } else { 102 el.elType = OpenTagE; 103 } 104 // get the tag name 105 while (isalnum(*buffer_pos)) { 106 el.text.push_back (*buffer_pos); 107 el.tagName.push_back (*buffer_pos); 108 buffer_pos++; 109 110 if (*buffer_pos == '\0') { 111 *buffer = buffer_pos; 112 return false; 113 } 114 115 } 116 cout << "tag name = "<<el.tagName<<endl; 117 // get everything up to closing '>' 118 while (*buffer_pos != '>') { 119 el.text.push_back(*buffer_pos); 120 buffer_pos++; 121 if (*buffer_pos == '\0') { 122 *buffer = buffer_pos; 123 return false; 124 } 125 126 } 127 // get teh closing '>' 128 el.text.push_back('>'); 129 buffer_pos++; 130 *buffer = buffer_pos; 131 cout << "at end of readtexttag, buffer is "<<*buffer<<endl; 132 // if there is no tag name then make this a text element 133 if (el.tagName.empty()) el.elType = TextE; 134 135 return true; 136 } 137 80 138 static void ToggleParaTag (TextEl &el, bool &compatInPara) { 81 139 SetCStr (el.tagName, "Paragraph", 9); … … 140 198 return true; 141 199 } 142 200 bool ReadTextEl (u_char **buffer, TextEl &el, 201 bool compatMode, bool &compatInPara) { 202 el.Clear(); 203 u_char * buffer_pos = *buffer; 204 if (*buffer_pos == '\0') return false; 205 if (compatMode) { 206 if (*buffer_pos == PARA_TERM) { 207 ToggleParaTag (el, compatInPara); 208 if (!compatInPara) buffer_pos++; 209 *buffer = buffer_pos; 210 return true; 211 } 212 if (*buffer_pos == REC_TERM) { 213 if (compatInPara) { 214 // need to close this paragraph 215 ToggleParaTag (el, compatInPara); 216 *buffer = buffer_pos; 217 return true; 218 } 219 // can close this document 220 buffer_pos++; 221 SetRecTag (el, CloseTagE); 222 *buffer = buffer_pos; 223 return true; 224 } 225 if (!compatInPara) { 226 // need to open a paragraph 227 ToggleParaTag (el, compatInPara); 228 *buffer = buffer_pos; 229 return true; 230 } 231 } 232 233 // see if we have some sort of tag 234 if (*buffer_pos == '<') return ReadTextTag (buffer, el); 235 236 // read in a text element 237 el.elType = TextE; 238 while (*buffer_pos != '<' && !(compatMode && (*buffer_pos == PARA_TERM || *buffer_pos == REC_TERM))) { 239 el.text.push_back (*buffer_pos); 240 buffer_pos++; 241 if (*buffer_pos == '\0') break; 242 } 243 *buffer = buffer_pos; 244 cout << "text element: "<<el.text<<endl; 245 return true; 246 } 247 143 248 static void AddTextEl (TextElArray &doc, unsigned long &docLen, 144 249 const TextEl &el) { … … 209 314 } 210 315 211 316 // copy of readdoc to get TextElArray from u_char buffer 317 bool ReadDoc(u_char ** buffer, const UCArray &docTag, 318 TextElArray &doc, unsigned long &docLen, 319 bool compatMode) { 320 bool compatInPara = false; 321 bool foundDocEl = false; 322 TextEl el; 323 324 doc.erase (doc.begin(), doc.end()); 325 docLen = 0; 326 327 if (compatMode) { 328 // add <Document><Paragraph> 329 SetRecTag (el, OpenTagE); 330 AddTextEl (doc, docLen, el); 331 ToggleParaTag (el, compatInPara); 332 AddTextEl (doc, docLen, el); 333 334 } else { 335 // look for an opening docTag 336 do { 337 if (!ReadTextEl (buffer, el, compatMode, compatInPara)) return false; 338 } while (el.elType != OpenTagE || el.tagName != docTag); 339 340 AddTextEl (doc, docLen, el); 341 } 342 343 344 // get all elements until the closing docTag 345 while (ReadTextEl (buffer, el, compatMode, compatInPara)) { 346 foundDocEl = true; 347 AddTextEl (doc, docLen, el); 348 if (el.elType == CloseTagE && el.tagName == docTag) 349 return true; 350 } 351 352 353 if (compatMode) { 354 if (!foundDocEl) { // end of text 355 doc.erase (doc.begin(), doc.end()); 356 return false; 357 } 358 359 // if we got here then the eof was encountered before 360 // the closing document tag 361 if (compatInPara) { 362 // need to close this paragraph 363 ToggleParaTag (el, compatInPara); 364 AddTextEl (doc, docLen, el); 365 } 366 // close this document 367 SetRecTag (el, CloseTagE); 368 AddTextEl (doc, docLen, el); 369 } 370 371 return true; 372 } 373 374 -
trunk/indexers/mgpp/text/TextEl.h
r3365 r8946 32 32 protected: 33 33 bool ReadTextTag (FileBuf &buf, TextEl &el); 34 bool ReadTextTag (u_char ** buffer, TextEl &el); 34 35 public: 35 36 TextElType elType; … … 51 52 bool ReadTextEl (FileBuf &buf, TextEl &el, 52 53 bool compatMode, bool &compatInPara); 54 bool ReadTextEl (u_char **buffer, TextEl &el, 55 bool compatMode, bool &compatInPara); 53 56 54 57 // looks for an opening docTag and reads all text elements up … … 60 63 bool compatMode=false); 61 64 65 bool ReadDoc(u_char **buffer, const UCArray &docTag, 66 TextElArray &doc, unsigned long &docLen, 67 bool compatMode=false); 68 62 69 #endif -
trunk/mgpp/text/TextEl.cpp
r8692 r8946 78 78 } 79 79 80 static bool ReadTextTag (u_char ** buffer, TextEl &el) { 81 // we must have some sort of tag 82 u_char * buffer_pos = *buffer; 83 if (*buffer_pos == '\0' || *buffer_pos != '<') { 84 return false; 85 } 86 el.text.push_back('<'); 87 buffer_pos ++; 88 if (*buffer_pos == '\0') { 89 *buffer = buffer_pos; 90 return false; 91 } 92 // see if we have a '/' 93 if (*buffer_pos == '/') { 94 el.text.push_back('/'); 95 el.elType = CloseTagE; 96 buffer_pos++; 97 if (*buffer_pos == '\0') { 98 *buffer = buffer_pos; 99 return false; 100 } 101 } else { 102 el.elType = OpenTagE; 103 } 104 // get the tag name 105 while (isalnum(*buffer_pos)) { 106 el.text.push_back (*buffer_pos); 107 el.tagName.push_back (*buffer_pos); 108 buffer_pos++; 109 110 if (*buffer_pos == '\0') { 111 *buffer = buffer_pos; 112 return false; 113 } 114 115 } 116 cout << "tag name = "<<el.tagName<<endl; 117 // get everything up to closing '>' 118 while (*buffer_pos != '>') { 119 el.text.push_back(*buffer_pos); 120 buffer_pos++; 121 if (*buffer_pos == '\0') { 122 *buffer = buffer_pos; 123 return false; 124 } 125 126 } 127 // get teh closing '>' 128 el.text.push_back('>'); 129 buffer_pos++; 130 *buffer = buffer_pos; 131 cout << "at end of readtexttag, buffer is "<<*buffer<<endl; 132 // if there is no tag name then make this a text element 133 if (el.tagName.empty()) el.elType = TextE; 134 135 return true; 136 } 137 80 138 static void ToggleParaTag (TextEl &el, bool &compatInPara) { 81 139 SetCStr (el.tagName, "Paragraph", 9); … … 140 198 return true; 141 199 } 142 200 bool ReadTextEl (u_char **buffer, TextEl &el, 201 bool compatMode, bool &compatInPara) { 202 el.Clear(); 203 u_char * buffer_pos = *buffer; 204 if (*buffer_pos == '\0') return false; 205 if (compatMode) { 206 if (*buffer_pos == PARA_TERM) { 207 ToggleParaTag (el, compatInPara); 208 if (!compatInPara) buffer_pos++; 209 *buffer = buffer_pos; 210 return true; 211 } 212 if (*buffer_pos == REC_TERM) { 213 if (compatInPara) { 214 // need to close this paragraph 215 ToggleParaTag (el, compatInPara); 216 *buffer = buffer_pos; 217 return true; 218 } 219 // can close this document 220 buffer_pos++; 221 SetRecTag (el, CloseTagE); 222 *buffer = buffer_pos; 223 return true; 224 } 225 if (!compatInPara) { 226 // need to open a paragraph 227 ToggleParaTag (el, compatInPara); 228 *buffer = buffer_pos; 229 return true; 230 } 231 } 232 233 // see if we have some sort of tag 234 if (*buffer_pos == '<') return ReadTextTag (buffer, el); 235 236 // read in a text element 237 el.elType = TextE; 238 while (*buffer_pos != '<' && !(compatMode && (*buffer_pos == PARA_TERM || *buffer_pos == REC_TERM))) { 239 el.text.push_back (*buffer_pos); 240 buffer_pos++; 241 if (*buffer_pos == '\0') break; 242 } 243 *buffer = buffer_pos; 244 cout << "text element: "<<el.text<<endl; 245 return true; 246 } 247 143 248 static void AddTextEl (TextElArray &doc, unsigned long &docLen, 144 249 const TextEl &el) { … … 209 314 } 210 315 211 316 // copy of readdoc to get TextElArray from u_char buffer 317 bool ReadDoc(u_char ** buffer, const UCArray &docTag, 318 TextElArray &doc, unsigned long &docLen, 319 bool compatMode) { 320 bool compatInPara = false; 321 bool foundDocEl = false; 322 TextEl el; 323 324 doc.erase (doc.begin(), doc.end()); 325 docLen = 0; 326 327 if (compatMode) { 328 // add <Document><Paragraph> 329 SetRecTag (el, OpenTagE); 330 AddTextEl (doc, docLen, el); 331 ToggleParaTag (el, compatInPara); 332 AddTextEl (doc, docLen, el); 333 334 } else { 335 // look for an opening docTag 336 do { 337 if (!ReadTextEl (buffer, el, compatMode, compatInPara)) return false; 338 } while (el.elType != OpenTagE || el.tagName != docTag); 339 340 AddTextEl (doc, docLen, el); 341 } 342 343 344 // get all elements until the closing docTag 345 while (ReadTextEl (buffer, el, compatMode, compatInPara)) { 346 foundDocEl = true; 347 AddTextEl (doc, docLen, el); 348 if (el.elType == CloseTagE && el.tagName == docTag) 349 return true; 350 } 351 352 353 if (compatMode) { 354 if (!foundDocEl) { // end of text 355 doc.erase (doc.begin(), doc.end()); 356 return false; 357 } 358 359 // if we got here then the eof was encountered before 360 // the closing document tag 361 if (compatInPara) { 362 // need to close this paragraph 363 ToggleParaTag (el, compatInPara); 364 AddTextEl (doc, docLen, el); 365 } 366 // close this document 367 SetRecTag (el, CloseTagE); 368 AddTextEl (doc, docLen, el); 369 } 370 371 return true; 372 } 373 374 -
trunk/mgpp/text/TextEl.h
r3365 r8946 32 32 protected: 33 33 bool ReadTextTag (FileBuf &buf, TextEl &el); 34 bool ReadTextTag (u_char ** buffer, TextEl &el); 34 35 public: 35 36 TextElType elType; … … 51 52 bool ReadTextEl (FileBuf &buf, TextEl &el, 52 53 bool compatMode, bool &compatInPara); 54 bool ReadTextEl (u_char **buffer, TextEl &el, 55 bool compatMode, bool &compatInPara); 53 56 54 57 // looks for an opening docTag and reads all text elements up … … 60 63 bool compatMode=false); 61 64 65 bool ReadDoc(u_char **buffer, const UCArray &docTag, 66 TextElArray &doc, unsigned long &docLen, 67 bool compatMode=false); 68 62 69 #endif
Note:
See TracChangeset
for help on using the changeset viewer.