1 | /**********************************************************************
|
---|
2 | *
|
---|
3 | * expat_document.cpp --
|
---|
4 | *
|
---|
5 | * Copyright (C) 2005-2010 The New Zealand Digital Library Project
|
---|
6 | *
|
---|
7 | * A component of the Greenstone digital library software
|
---|
8 | * from the New Zealand Digital Library Project at the
|
---|
9 | * University of Waikato, New Zealand.
|
---|
10 | *
|
---|
11 | * This program is free software; you can redistribute it and/or modify
|
---|
12 | * it under the terms of the GNU General Public License as published by
|
---|
13 | * the Free Software Foundation; either version 2 of the License, or
|
---|
14 | * (at your option) any later version.
|
---|
15 | *
|
---|
16 | * This program is distributed in the hope that it will be useful,
|
---|
17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
19 | * GNU General Public License for more details.
|
---|
20 | *
|
---|
21 | * You should have received a copy of the GNU General Public License
|
---|
22 | * along with this program; if not, write to the Free Software
|
---|
23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
24 | *
|
---|
25 | *********************************************************************/
|
---|
26 |
|
---|
27 | #include <stdio.h>
|
---|
28 | #include <string.h>
|
---|
29 | #include <expat.h>
|
---|
30 | #include "gsdlunicode.h"
|
---|
31 |
|
---|
32 | #if defined(GSDL_USE_OBJECTSPACE)
|
---|
33 | # include <ospace\std\iostream>
|
---|
34 | #elif defined(GSDL_USE_IOS_H)
|
---|
35 | # include <iostream.h>
|
---|
36 | #else
|
---|
37 | # include <iostream>
|
---|
38 | using namespace std;
|
---|
39 | #endif
|
---|
40 |
|
---|
41 | #include "queryinfo.h"
|
---|
42 |
|
---|
43 | #include "expat_document.h"
|
---|
44 |
|
---|
45 | #include "text_t.h"
|
---|
46 | #include "fileutil.h"
|
---|
47 | #include "expat_resultset.h"
|
---|
48 |
|
---|
49 | struct resultpack {
|
---|
50 | text_t *current_text;
|
---|
51 | text_t *section_num;
|
---|
52 | text_t *section_level;
|
---|
53 | bool store_text;
|
---|
54 | };
|
---|
55 |
|
---|
56 | static void XMLCALL
|
---|
57 | startElement(void *userData, const char *name, const char **attributes)
|
---|
58 | {
|
---|
59 | resultpack * rpack_ptr = (resultpack*)userData;
|
---|
60 | text_t element_name = (char*)name;
|
---|
61 | if (element_name == *(rpack_ptr->section_level)) {
|
---|
62 | text_t id_att = (char *)get_attribute(attributes,"gs2:docOID");
|
---|
63 | if (id_att == *(rpack_ptr->section_num)) {
|
---|
64 | rpack_ptr->store_text = true;
|
---|
65 | }
|
---|
66 |
|
---|
67 | }
|
---|
68 | }
|
---|
69 |
|
---|
70 | static void XMLCALL
|
---|
71 | endElement(void *userData, const char *name)
|
---|
72 | {
|
---|
73 | resultpack * rpack_ptr = (resultpack*)userData;
|
---|
74 | text_t element_name = (char*)name;
|
---|
75 | if (element_name == *(rpack_ptr->section_level)) {
|
---|
76 | if (rpack_ptr->store_text == true) {
|
---|
77 | // we have finished now, can we quit this??
|
---|
78 | rpack_ptr->store_text = false;
|
---|
79 | }
|
---|
80 | }
|
---|
81 |
|
---|
82 | }
|
---|
83 |
|
---|
84 | static void XMLCALL
|
---|
85 | characterData(void *userData, const char * text, int len) {
|
---|
86 | resultpack * rpack_ptr = (resultpack*)userData;
|
---|
87 | if (rpack_ptr->store_text) {
|
---|
88 | rpack_ptr->current_text->appendcarr(text, len);
|
---|
89 | }
|
---|
90 | }
|
---|
91 |
|
---|
92 |
|
---|
93 | int expat_document(const text_t &filename, const text_t &sec_level, const text_t &sec_num, text_t & doc_content)
|
---|
94 | {
|
---|
95 | text_t current_text;
|
---|
96 | current_text.clear();
|
---|
97 | text_t section_num = sec_num;
|
---|
98 | text_t section_level = sec_level;
|
---|
99 |
|
---|
100 | resultpack rpack = { ¤t_text, §ion_num, §ion_level, false};
|
---|
101 | //cerr << "sec num = "<<sec_num<<", sec level="<<sec_level<<", filename="<<filename<<endl;
|
---|
102 | text_t doc_text;
|
---|
103 | read_file(filename, doc_text);
|
---|
104 |
|
---|
105 | char* c_doc_text = doc_text.getcstr();
|
---|
106 | XML_Parser parser = XML_ParserCreate(NULL);
|
---|
107 |
|
---|
108 | XML_SetUserData(parser, &rpack);
|
---|
109 | XML_SetElementHandler(parser, startElement, endElement);
|
---|
110 | XML_SetCharacterDataHandler(parser, characterData);
|
---|
111 | int return_status = 0;
|
---|
112 | const int parse_status
|
---|
113 | = XML_Parse(parser, c_doc_text, strlen(c_doc_text), XML_TRUE);
|
---|
114 |
|
---|
115 | if (parse_status == XML_STATUS_ERROR) {
|
---|
116 | cerr << "Error: " << XML_ErrorString(XML_GetErrorCode(parser)) << " at line " << XML_GetCurrentLineNumber(parser) << endl;
|
---|
117 | return_status = 1;
|
---|
118 | }
|
---|
119 |
|
---|
120 | XML_ParserFree(parser);
|
---|
121 | delete []c_doc_text;
|
---|
122 |
|
---|
123 | doc_content = to_uni(current_text); // Vital for non-ASCII documents
|
---|
124 | return return_status;
|
---|
125 | }
|
---|
126 |
|
---|
127 |
|
---|