source: gsdl/trunk/src/colservr/expat_document.cpp@ 16445

Last change on this file since 16445 was 12427, checked in by mdewsnip, 18 years ago

Errors parsing the XML documents from Lucene never went anywhere useful... now they go to the error.txt file.

  • Property svn:keywords set to Author Date Id Revision
File size: 2.6 KB
Line 
1
2#include <stdio.h>
3#include <string.h>
4#include <expat.h>
5#include "gsdlunicode.h"
6
7#if defined(GSDL_USE_OBJECTSPACE)
8# include <ospace\std\iostream>
9#elif defined(GSDL_USE_IOS_H)
10# include <iostream.h>
11#else
12# include <iostream>
13using namespace std;
14#endif
15
16#include "queryinfo.h"
17
18#include "expat_document.h"
19
20#include "text_t.h"
21#include "fileutil.h"
22#include "expat_resultset.h"
23
24struct resultpack {
25 text_t *current_text;
26 text_t *section_num;
27 text_t *section_level;
28 bool store_text;
29};
30
31static void XMLCALL
32startElement(void *userData, const char *name, const char **attributes)
33{
34 resultpack * rpack_ptr = (resultpack*)userData;
35 text_t element_name = (char*)name;
36 if (element_name == *(rpack_ptr->section_level)) {
37 text_t id_att = (char *)get_attribute(attributes,"gs2:id");
38 if (id_att == *(rpack_ptr->section_num)) {
39 rpack_ptr->store_text = true;
40 }
41
42 }
43}
44
45static void XMLCALL
46endElement(void *userData, const char *name)
47{
48 resultpack * rpack_ptr = (resultpack*)userData;
49 text_t element_name = (char*)name;
50 if (element_name == *(rpack_ptr->section_level)) {
51 if (rpack_ptr->store_text == true) {
52 // we have finished now, can we quit this??
53 rpack_ptr->store_text = false;
54 }
55 }
56
57}
58
59static void XMLCALL
60characterData(void *userData, const char * text, int len) {
61 resultpack * rpack_ptr = (resultpack*)userData;
62 if (rpack_ptr->store_text) {
63 rpack_ptr->current_text->appendcarr(text, len);
64 }
65}
66
67
68int expat_document(const text_t &filename, const text_t &sec_level, const text_t &sec_num, text_t & doc_content)
69{
70 text_t current_text;
71 current_text.clear();
72 text_t section_num = sec_num;
73 text_t section_level = sec_level;
74
75 resultpack rpack = { &current_text, &section_num, &section_level, false};
76 //cerr << "sec num = "<<sec_num<<", sec level="<<sec_level<<", filename="<<filename<<endl;
77 text_t doc_text;
78 read_file(filename, doc_text);
79
80 char* c_doc_text = doc_text.getcstr();
81 XML_Parser parser = XML_ParserCreate(NULL);
82
83 XML_SetUserData(parser, &rpack);
84 XML_SetElementHandler(parser, startElement, endElement);
85 XML_SetCharacterDataHandler(parser, characterData);
86 int return_status = 0;
87 const int parse_status
88 = XML_Parse(parser, c_doc_text, strlen(c_doc_text), XML_TRUE);
89
90 if (parse_status == XML_STATUS_ERROR) {
91 cerr << "Error: " << XML_ErrorString(XML_GetErrorCode(parser)) << " at line " << XML_GetCurrentLineNumber(parser) << endl;
92 return_status = 1;
93 }
94
95 XML_ParserFree(parser);
96 free(c_doc_text);
97 doc_content = to_uni(current_text); // Vital for non-ASCII documents
98 return return_status;
99}
100
101
Note: See TracBrowser for help on using the repository browser.