/************************************************************************** * * TextEl.cpp -- Data structures for parsed documents * Copyright (C) 1999 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * * $Id: TextEl.cpp 855 2000-01-14 02:17:52Z sjboddie $ * **************************************************************************/ #include "TextEl.h" #include "ctype.h" #define REC_TERM '\002' #define PARA_TERM '\003' void TextEl::Clear () { elType = TextE; tagName.erase (tagName.begin(), tagName.end()); text.erase (text.begin(), text.end()); } static bool ReadTextTag (FileBuf &buf, TextEl &el) { // we must have some sort of tag unsigned char c; if (!buf.Peek(c)) return false; // get the '<' if (c != '<') return false; el.text.push_back (c); if (!buf.Next() || !buf.Peek (c)) return false; // see if we have a '/' if (c == '/') { // we have a closing tag el.text.push_back (c); if (!buf.Next() || !buf.Peek (c)) return false; el.elType = CloseTagE; } else { // we have an opening tag el.elType = OpenTagE; } // get the tag name while (isalnum(c)) { el.text.push_back (c); el.tagName.push_back (c); if (!buf.Next() || !buf.Peek (c)) return false; } // get everything up to the closing '>' while (c != '>') { el.text.push_back (c); if (!buf.Next() || !buf.Peek (c)) return false; } // get the closing '>' el.text.push_back (c); buf.Next(); // if there is no tag name then make this a text element if (el.tagName.empty()) el.elType = TextE; return true; } static void ToggleParaTag (TextEl &el, bool &compatInPara) { SetCStr (el.tagName, "Paragraph"); el.text.erase (el.text.begin(), el.text.end()); if (compatInPara) { el.elType = CloseTagE; el.text.push_back (PARA_TERM); } else el.elType = OpenTagE; compatInPara = !compatInPara; } static void SetRecTag (TextEl &el, TextElType elType) { el.elType = elType; SetCStr (el.tagName, "Document"); el.text.erase (el.text.begin(), el.text.end()); if (elType == CloseTagE) el.text.push_back (REC_TERM); } bool ReadTextEl (FileBuf &buf, TextEl &el, bool compatMode, bool &compatInPara) { el.Clear(); unsigned char c; if (!buf.Peek (c)) return false; if (compatMode) { if (c == PARA_TERM) { ToggleParaTag (el, compatInPara); if (!compatInPara) buf.Next(); return true; } if (c == REC_TERM) { if (compatInPara) { // need to close this paragraph ToggleParaTag (el, compatInPara); return true; } // can close this document buf.Next(); SetRecTag (el, CloseTagE); return true; } if (!compatInPara) { // need to open a paragraph ToggleParaTag (el, compatInPara); return true; } } // see if we have some sort of tag if (c == '<') return ReadTextTag (buf, el); // read in a text element el.elType = TextE; while (c != '<' && !(compatMode && (c == PARA_TERM || c == REC_TERM))) { el.text.push_back (c); if (!buf.Next() || !buf.Peek (c)) break; } return true; } static void AddTextEl (TextElArray &doc, unsigned long &docLen, const TextEl &el) { doc.push_back (el); docLen += el.text.size(); // cout << "elType: " << el.elType << "\n"; // cout << "tagName: " << el.tagName << "\n"; // cout << "text: \"" << el.text << "\"\n\n"; } bool ReadDoc (FileBuf &buf, const UCArray &docTag, TextElArray &doc, unsigned long &docLen, bool compatMode) { bool compatInPara = false; bool foundDocEl = false; TextEl el; doc.erase (doc.begin(), doc.end()); docLen = 0; if (compatMode) { // add SetRecTag (el, OpenTagE); AddTextEl (doc, docLen, el); ToggleParaTag (el, compatInPara); AddTextEl (doc, docLen, el); } else { // look for an opening docTag do { if (!ReadTextEl (buf, el, compatMode, compatInPara)) return false; } while (el.elType != OpenTagE || el.tagName != docTag); AddTextEl (doc, docLen, el); } // get all elements until the closing docTag while (ReadTextEl (buf, el, compatMode, compatInPara)) { foundDocEl = true; AddTextEl (doc, docLen, el); if (el.elType == CloseTagE && el.tagName == docTag) return true; } if (compatMode) { if (!foundDocEl) { // end of text doc.erase (doc.begin(), doc.end()); return false; } // if we got here then the eof was encountered before // the closing document tag if (compatInPara) { // need to close this paragraph ToggleParaTag (el, compatInPara); AddTextEl (doc, docLen, el); } // close this document SetRecTag (el, CloseTagE); AddTextEl (doc, docLen, el); } return true; }