/************************************************************************** * * TextEl.cpp -- Data structures for parsed documents * Copyright (C) 1999 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************************/ #include "TextEl.h" #include "ctype.h" #define REC_TERM '\002' #define PARA_TERM '\003' void TextEl::Clear () { elType = TextE; tagName.erase (tagName.begin(), tagName.end()); text.erase (text.begin(), text.end()); } static bool ReadTextTag (FileBuf &buf, TextEl &el) { // we must have some sort of tag unsigned char c; if (!buf.Peek(c)) return false; // get the '<' if (c != '<') return false; el.text.push_back (c); if (!buf.Next() || !buf.Peek (c)) return false; // see if we have a '/' if (c == '/') { // we have a closing tag el.text.push_back (c); if (!buf.Next() || !buf.Peek (c)) return false; el.elType = CloseTagE; } else { // we have an opening tag el.elType = OpenTagE; } // get the tag name while (isalnum(c)) { el.text.push_back (c); el.tagName.push_back (c); if (!buf.Next() || !buf.Peek (c)) return false; } // get everything up to the closing '>' while (c != '>') { el.text.push_back (c); if (!buf.Next() || !buf.Peek (c)) return false; } // get the closing '>' el.text.push_back (c); buf.Next(); // if there is no tag name then make this a text element if (el.tagName.empty()) el.elType = TextE; return true; } static bool ReadTextTag (u_char ** buffer, TextEl &el) { // we must have some sort of tag u_char * buffer_pos = *buffer; if (*buffer_pos == '\0' || *buffer_pos != '<') { return false; } el.text.push_back('<'); buffer_pos ++; if (*buffer_pos == '\0') { *buffer = buffer_pos; return false; } // see if we have a '/' if (*buffer_pos == '/') { el.text.push_back('/'); el.elType = CloseTagE; buffer_pos++; if (*buffer_pos == '\0') { *buffer = buffer_pos; return false; } } else { el.elType = OpenTagE; } // get the tag name while (isalnum(*buffer_pos)) { el.text.push_back (*buffer_pos); el.tagName.push_back (*buffer_pos); buffer_pos++; if (*buffer_pos == '\0') { *buffer = buffer_pos; return false; } } cout << "tag name = "<' while (*buffer_pos != '>') { el.text.push_back(*buffer_pos); buffer_pos++; if (*buffer_pos == '\0') { *buffer = buffer_pos; return false; } } // get teh closing '>' el.text.push_back('>'); buffer_pos++; *buffer = buffer_pos; cout << "at end of readtexttag, buffer is "<<*buffer< SetRecTag (el, OpenTagE); AddTextEl (doc, docLen, el); ToggleParaTag (el, compatInPara); AddTextEl (doc, docLen, el); } else { // look for an opening docTag do { if (!ReadTextEl (buf, el, compatMode, compatInPara)) return false; } while (el.elType != OpenTagE || el.tagName != docTag); AddTextEl (doc, docLen, el); } // get all elements until the closing docTag while (ReadTextEl (buf, el, compatMode, compatInPara)) { foundDocEl = true; AddTextEl (doc, docLen, el); if (el.elType == CloseTagE && el.tagName == docTag) return true; } if (compatMode) { if (!foundDocEl) { // end of text doc.erase (doc.begin(), doc.end()); return false; } // if we got here then the eof was encountered before // the closing document tag if (compatInPara) { // need to close this paragraph ToggleParaTag (el, compatInPara); AddTextEl (doc, docLen, el); } // close this document SetRecTag (el, CloseTagE); AddTextEl (doc, docLen, el); } return true; } // copy of readdoc to get TextElArray from u_char buffer bool ReadDoc(u_char ** buffer, const UCArray &docTag, TextElArray &doc, mg_u_long &docLen, bool compatMode) { bool compatInPara = false; bool foundDocEl = false; TextEl el; doc.erase (doc.begin(), doc.end()); docLen = 0; if (compatMode) { // add SetRecTag (el, OpenTagE); AddTextEl (doc, docLen, el); ToggleParaTag (el, compatInPara); AddTextEl (doc, docLen, el); } else { // look for an opening docTag do { if (!ReadTextEl (buffer, el, compatMode, compatInPara)) return false; } while (el.elType != OpenTagE || el.tagName != docTag); AddTextEl (doc, docLen, el); } // get all elements until the closing docTag while (ReadTextEl (buffer, el, compatMode, compatInPara)) { foundDocEl = true; AddTextEl (doc, docLen, el); if (el.elType == CloseTagE && el.tagName == docTag) return true; } if (compatMode) { if (!foundDocEl) { // end of text doc.erase (doc.begin(), doc.end()); return false; } // if we got here then the eof was encountered before // the closing document tag if (compatInPara) { // need to close this paragraph ToggleParaTag (el, compatInPara); AddTextEl (doc, docLen, el); } // close this document SetRecTag (el, CloseTagE); AddTextEl (doc, docLen, el); } return true; }