[22738] | 1 | /**********************************************************************
|
---|
| 2 | *
|
---|
| 3 | * expat_document.cpp --
|
---|
| 4 | *
|
---|
| 5 | * Copyright (C) 2005-2010 The New Zealand Digital Library Project
|
---|
| 6 | *
|
---|
| 7 | * A component of the Greenstone digital library software
|
---|
| 8 | * from the New Zealand Digital Library Project at the
|
---|
| 9 | * University of Waikato, New Zealand.
|
---|
| 10 | *
|
---|
| 11 | * This program is free software; you can redistribute it and/or modify
|
---|
| 12 | * it under the terms of the GNU General Public License as published by
|
---|
| 13 | * the Free Software Foundation; either version 2 of the License, or
|
---|
| 14 | * (at your option) any later version.
|
---|
| 15 | *
|
---|
| 16 | * This program is distributed in the hope that it will be useful,
|
---|
| 17 | * but WITHOUT ANY WARRANTY; without even the implied warranty of
|
---|
| 18 | * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
---|
| 19 | * GNU General Public License for more details.
|
---|
| 20 | *
|
---|
| 21 | * You should have received a copy of the GNU General Public License
|
---|
| 22 | * along with this program; if not, write to the Free Software
|
---|
| 23 | * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
---|
| 24 | *
|
---|
| 25 | *********************************************************************/
|
---|
[9406] | 26 |
|
---|
| 27 | #include <stdio.h>
|
---|
[9173] | 28 | #include <string.h>
|
---|
| 29 | #include <expat.h>
|
---|
[9984] | 30 | #include "gsdlunicode.h"
|
---|
[9173] | 31 |
|
---|
| 32 | #if defined(GSDL_USE_OBJECTSPACE)
|
---|
| 33 | # include <ospace\std\iostream>
|
---|
| 34 | #elif defined(GSDL_USE_IOS_H)
|
---|
| 35 | # include <iostream.h>
|
---|
| 36 | #else
|
---|
| 37 | # include <iostream>
|
---|
| 38 | using namespace std;
|
---|
| 39 | #endif
|
---|
| 40 |
|
---|
[9216] | 41 | #include "queryinfo.h"
|
---|
| 42 |
|
---|
| 43 | #include "expat_document.h"
|
---|
| 44 |
|
---|
[9173] | 45 | #include "text_t.h"
|
---|
| 46 | #include "fileutil.h"
|
---|
| 47 | #include "expat_resultset.h"
|
---|
| 48 |
|
---|
| 49 | struct resultpack {
|
---|
[9215] | 50 | text_t *current_text;
|
---|
[9216] | 51 | text_t *section_num;
|
---|
| 52 | text_t *section_level;
|
---|
[9215] | 53 | bool store_text;
|
---|
[9173] | 54 | };
|
---|
| 55 |
|
---|
| 56 | static void XMLCALL
|
---|
| 57 | startElement(void *userData, const char *name, const char **attributes)
|
---|
| 58 | {
|
---|
[9215] | 59 | resultpack * rpack_ptr = (resultpack*)userData;
|
---|
[9173] | 60 | text_t element_name = (char*)name;
|
---|
[9216] | 61 | if (element_name == *(rpack_ptr->section_level)) {
|
---|
[20730] | 62 | text_t id_att = (char *)get_attribute(attributes,"gs2:docOID");
|
---|
[9216] | 63 | if (id_att == *(rpack_ptr->section_num)) {
|
---|
[9215] | 64 | rpack_ptr->store_text = true;
|
---|
[9173] | 65 | }
|
---|
| 66 |
|
---|
| 67 | }
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 | static void XMLCALL
|
---|
| 71 | endElement(void *userData, const char *name)
|
---|
| 72 | {
|
---|
[9215] | 73 | resultpack * rpack_ptr = (resultpack*)userData;
|
---|
[9173] | 74 | text_t element_name = (char*)name;
|
---|
[9216] | 75 | if (element_name == *(rpack_ptr->section_level)) {
|
---|
[9215] | 76 | if (rpack_ptr->store_text == true) {
|
---|
[9173] | 77 | // we have finished now, can we quit this??
|
---|
[9215] | 78 | rpack_ptr->store_text = false;
|
---|
[9173] | 79 | }
|
---|
| 80 | }
|
---|
| 81 |
|
---|
| 82 | }
|
---|
| 83 |
|
---|
| 84 | static void XMLCALL
|
---|
| 85 | characterData(void *userData, const char * text, int len) {
|
---|
[9215] | 86 | resultpack * rpack_ptr = (resultpack*)userData;
|
---|
| 87 | if (rpack_ptr->store_text) {
|
---|
[9744] | 88 | rpack_ptr->current_text->appendcarr(text, len);
|
---|
[9173] | 89 | }
|
---|
| 90 | }
|
---|
| 91 |
|
---|
[9984] | 92 |
|
---|
| 93 | int expat_document(const text_t &filename, const text_t &sec_level, const text_t &sec_num, text_t & doc_content)
|
---|
| 94 | {
|
---|
[9215] | 95 | text_t current_text;
|
---|
[9173] | 96 | current_text.clear();
|
---|
[9215] | 97 | text_t section_num = sec_num;
|
---|
| 98 | text_t section_level = sec_level;
|
---|
| 99 |
|
---|
[9216] | 100 | resultpack rpack = { ¤t_text, §ion_num, §ion_level, false};
|
---|
[9215] | 101 | //cerr << "sec num = "<<sec_num<<", sec level="<<sec_level<<", filename="<<filename<<endl;
|
---|
[9173] | 102 | text_t doc_text;
|
---|
| 103 | read_file(filename, doc_text);
|
---|
| 104 |
|
---|
[9984] | 105 | char* c_doc_text = doc_text.getcstr();
|
---|
[9173] | 106 | XML_Parser parser = XML_ParserCreate(NULL);
|
---|
| 107 |
|
---|
| 108 | XML_SetUserData(parser, &rpack);
|
---|
| 109 | XML_SetElementHandler(parser, startElement, endElement);
|
---|
| 110 | XML_SetCharacterDataHandler(parser, characterData);
|
---|
| 111 | int return_status = 0;
|
---|
| 112 | const int parse_status
|
---|
| 113 | = XML_Parse(parser, c_doc_text, strlen(c_doc_text), XML_TRUE);
|
---|
[12427] | 114 |
|
---|
[9173] | 115 | if (parse_status == XML_STATUS_ERROR) {
|
---|
[12427] | 116 | cerr << "Error: " << XML_ErrorString(XML_GetErrorCode(parser)) << " at line " << XML_GetCurrentLineNumber(parser) << endl;
|
---|
[9173] | 117 | return_status = 1;
|
---|
| 118 | }
|
---|
| 119 |
|
---|
| 120 | XML_ParserFree(parser);
|
---|
[20761] | 121 | delete []c_doc_text;
|
---|
| 122 |
|
---|
[9984] | 123 | doc_content = to_uni(current_text); // Vital for non-ASCII documents
|
---|
[9173] | 124 | return return_status;
|
---|
| 125 | }
|
---|
| 126 |
|
---|
| 127 |
|
---|