/************************************************************************** * * FText.h -- File structures for text compression * Copyright (C) 1999 Rodger McNab * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. * **************************************************************************/ #ifndef FTEXT_H #define FTEXT_H #if defined(GSDL_USE_OBJECTSPACE) # include #elif defined(GSDL_USE_STL_H) # include #else # include #endif #include #include "UCArray.h" // holds a bit address class BitAddr { public: unsigned long byte; unsigned char bit; BitAddr (); void Clear (); bool Read (FILE *f); bool Write (FILE *f) const; }; class TextLevelInfo { public: UCArray levelTag; unsigned long textIdxPtr; unsigned long numEntries; TextLevelInfo (); void Clear (); bool Read (FILE *f); bool Write (FILE *f) const; }; // stream output for debugging purposes ostream &operator<<(ostream &s, const TextLevelInfo &l); typedef map TextLevelInfoMap; class FTextLevel { public: TextLevelInfoMap levelInfo; FTextLevel (); void Clear (); bool Read (FILE *f); bool Write (FILE *f) const; }; // stream output for debugging purposes ostream &operator<<(ostream &s, const FTextLevel &l); // the text.idx file points into the compressed text class TextIdx { public: BitAddr start; // the first bit of the compressed text BitAddr end; // the first bit past the end of the text unsigned char which; // 0 = non-word, 1 = word TextIdx (); void Clear (); // does a seek and reads the appropriate record bool Read (FILE *f, const TextLevelInfo &levelInfo, unsigned long docNum); bool Read (FILE *f); bool Write (FILE *f) const; }; // stream output for debugging purposes ostream &operator<<(ostream &s, const TextIdx &t); // note: document numbers start at 1 bool SeekTextIdx (FILE *f, const TextLevelInfo &levelInfo, unsigned long docNum); // used to store the pointers while compressing the text typedef vector TextIdxArray; bool ReadTextIdxArray (FILE *f, TextIdxArray &a, unsigned long arrSize); bool WriteTextIdxArray (FILE *f, const TextIdxArray &a); // used to store information about a level while compressing the text class CompressTextInfo { public: bool inDoc; BitAddr start; unsigned char which; TextIdxArray docPtrs; CompressTextInfo (); void Clear (); // clears inDoc, start, and which (not the docPtrs) void ResetStart (); // set the start of a level (closing off the last opening // tag if needed) void SetStart (unsigned long startPos, unsigned char startBit, unsigned char startWhich); // if in a document, it will set the end, add the document to the // list of document ptrs, and then call ResetStart void SetEnd (unsigned long endPos, unsigned char endBit); }; typedef map CompressTextInfoMap; #endif