source: main/trunk/greenstone2/runtime-src/src/colservr/expat_document.cpp@ 24874

Last change on this file since 24874 was 22738, checked in by mdewsnip, 14 years ago

Added copyright header to runtime-src/src/colservr/expat_document.cpp, runtime-src/src/colservr/expat_document.h, runtime-src/src/colservr/expat_resultset.cpp, runtime-src/src/colservr/expat_resultset.h.

  • Property svn:keywords set to Author Date Id Revision
File size: 3.7 KB
Line 
1/**********************************************************************
2 *
3 * expat_document.cpp --
4 *
5 * Copyright (C) 2005-2010 The New Zealand Digital Library Project
6 *
7 * A component of the Greenstone digital library software
8 * from the New Zealand Digital Library Project at the
9 * University of Waikato, New Zealand.
10 *
11 * This program is free software; you can redistribute it and/or modify
12 * it under the terms of the GNU General Public License as published by
13 * the Free Software Foundation; either version 2 of the License, or
14 * (at your option) any later version.
15 *
16 * This program is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU General Public License for more details.
20 *
21 * You should have received a copy of the GNU General Public License
22 * along with this program; if not, write to the Free Software
23 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24 *
25 *********************************************************************/
26
27#include <stdio.h>
28#include <string.h>
29#include <expat.h>
30#include "gsdlunicode.h"
31
32#if defined(GSDL_USE_OBJECTSPACE)
33# include <ospace\std\iostream>
34#elif defined(GSDL_USE_IOS_H)
35# include <iostream.h>
36#else
37# include <iostream>
38using namespace std;
39#endif
40
41#include "queryinfo.h"
42
43#include "expat_document.h"
44
45#include "text_t.h"
46#include "fileutil.h"
47#include "expat_resultset.h"
48
49struct resultpack {
50 text_t *current_text;
51 text_t *section_num;
52 text_t *section_level;
53 bool store_text;
54};
55
56static void XMLCALL
57startElement(void *userData, const char *name, const char **attributes)
58{
59 resultpack * rpack_ptr = (resultpack*)userData;
60 text_t element_name = (char*)name;
61 if (element_name == *(rpack_ptr->section_level)) {
62 text_t id_att = (char *)get_attribute(attributes,"gs2:docOID");
63 if (id_att == *(rpack_ptr->section_num)) {
64 rpack_ptr->store_text = true;
65 }
66
67 }
68}
69
70static void XMLCALL
71endElement(void *userData, const char *name)
72{
73 resultpack * rpack_ptr = (resultpack*)userData;
74 text_t element_name = (char*)name;
75 if (element_name == *(rpack_ptr->section_level)) {
76 if (rpack_ptr->store_text == true) {
77 // we have finished now, can we quit this??
78 rpack_ptr->store_text = false;
79 }
80 }
81
82}
83
84static void XMLCALL
85characterData(void *userData, const char * text, int len) {
86 resultpack * rpack_ptr = (resultpack*)userData;
87 if (rpack_ptr->store_text) {
88 rpack_ptr->current_text->appendcarr(text, len);
89 }
90}
91
92
93int expat_document(const text_t &filename, const text_t &sec_level, const text_t &sec_num, text_t & doc_content)
94{
95 text_t current_text;
96 current_text.clear();
97 text_t section_num = sec_num;
98 text_t section_level = sec_level;
99
100 resultpack rpack = { &current_text, &section_num, &section_level, false};
101 //cerr << "sec num = "<<sec_num<<", sec level="<<sec_level<<", filename="<<filename<<endl;
102 text_t doc_text;
103 read_file(filename, doc_text);
104
105 char* c_doc_text = doc_text.getcstr();
106 XML_Parser parser = XML_ParserCreate(NULL);
107
108 XML_SetUserData(parser, &rpack);
109 XML_SetElementHandler(parser, startElement, endElement);
110 XML_SetCharacterDataHandler(parser, characterData);
111 int return_status = 0;
112 const int parse_status
113 = XML_Parse(parser, c_doc_text, strlen(c_doc_text), XML_TRUE);
114
115 if (parse_status == XML_STATUS_ERROR) {
116 cerr << "Error: " << XML_ErrorString(XML_GetErrorCode(parser)) << " at line " << XML_GetCurrentLineNumber(parser) << endl;
117 return_status = 1;
118 }
119
120 XML_ParserFree(parser);
121 delete []c_doc_text;
122
123 doc_content = to_uni(current_text); // Vital for non-ASCII documents
124 return return_status;
125}
126
127
Note: See TracBrowser for help on using the repository browser.