source: gsdl/trunk/runtime-src/src/colservr/expat_resultset.cpp@ 16947

Last change on this file since 16947 was 16947, checked in by mdewsnip, 16 years ago

Changed the Lucene code to use the Greenstone document OIDs directly, instead of creating its own numeric IDs and then mapping them to the Greenstone OIDs in the GDBM file. As well as being simpler and more space and speed efficient (the mapping no longer needs to be stored in the GDBM file, and no lookup needs to be done for each search result), this is another important step along the road to true incremental building.

  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1
2#include <stdio.h>
3#include <string.h>
4#include <expat.h>
5
6#if defined(GSDL_USE_OBJECTSPACE)
7# include <ospace\std\iostream>
8#elif defined(GSDL_USE_IOS_H)
9# include <iostream.h>
10#else
11# include <iostream>
12using namespace std;
13#endif
14
15#include "text_t.h"
16#include "queryinfo.h"
17#include "gsdlunicode.h"
18
19struct queryresultpack {
20 queryresultsclass* queryresult_ptr;
21 int match_count;
22};
23
24char* get_attribute(const char** attr, char* att_name)
25{
26 char* att_val = NULL;
27
28 for (int i = 0; attr[i]; i += 2) {
29 if (strcmp(attr[i],att_name)==0) {
30 att_val = strdup(attr[i+1]);
31 }
32 }
33
34 return att_val;
35}
36
37static void XMLCALL
38startElement(void *userData, const char *name, const char **attributes)
39{
40 queryresultpack* qrpack_ptr = (queryresultpack*)userData;
41 queryresultsclass* queryresult_ptr = qrpack_ptr->queryresult_ptr;
42
43 text_t element_name = (char*)name;
44
45 if (element_name == "ResultSet") {
46 char* cached_attribute_str = get_attribute(attributes, "cached");
47 if (cached_attribute_str != NULL) {
48 cerr << "Cached: " << cached_attribute_str << endl;
49 }
50 }
51
52 if (element_name == "Error") {
53 char* error_type_str = get_attribute(attributes, "type");
54 queryresult_ptr->error_message = error_type_str;
55 if ((text_t) error_type_str == "PARSE_EXCEPTION") {
56 queryresult_ptr->syntax_error = true;
57 }
58 }
59
60 if (element_name == "Term") {
61 char* term_value_str = get_attribute(attributes, "value");
62 char* term_frequency_str = get_attribute(attributes, "freq");
63
64 termfreqclass termfreqobj;
65 termfreqobj.termstr = to_uni(term_value_str);
66
67 termfreqobj.termfreq = atoi(term_frequency_str);
68 queryresult_ptr->orgterms.push_back(termfreqobj);
69 }
70
71 if (element_name == "StopWord") {
72 char* stop_word_value_str = get_attribute(attributes, "value");
73 queryresult_ptr->stopwords.insert((text_t) stop_word_value_str);
74 }
75
76 if (element_name=="MatchingDocsInfo") {
77 char* num_match_docs_str = get_attribute(attributes,"num");
78 if (num_match_docs_str != NULL) {
79 int num_match_docs = atoi(num_match_docs_str);
80
81 queryresult_ptr->docs_matched = num_match_docs;
82 free(num_match_docs_str);
83 }
84 else {
85 queryresult_ptr->docs_matched = 0;
86 }
87 }
88
89
90 if (element_name=="Match") {
91
92 char* id = get_attribute(attributes,"id");
93 if (id != NULL)
94 {
95 docresultclass doc;
96 doc.clear();
97 doc.docid = id;
98 doc.docweight = qrpack_ptr->match_count;
99
100 char* termfreq = get_attribute(attributes, "termfreq");
101 if (termfreq != NULL)
102 {
103 doc.num_query_terms_matched = atoi(termfreq);
104 }
105
106 queryresult_ptr->docs.docset[doc.docid] = doc;
107 queryresult_ptr->docs.docorder.push_back(doc.docid);
108 ++qrpack_ptr->match_count;
109
110 free(id);
111 }
112
113 }
114}
115
116static void XMLCALL
117endElement(void *userData, const char *name)
118{
119 // no need to do anything
120}
121
122
123
124
125int expat_resultset(text_t& xml_text, queryresultsclass& queryresult)
126{
127 queryresult.clear();
128 queryresultpack qrpack = { &queryresult, 0 };
129
130 char * xml_text_cstr = xml_text.getcstr();
131
132 XML_Parser parser = XML_ParserCreate(NULL);
133
134 XML_SetUserData(parser, &qrpack);
135 XML_SetElementHandler(parser, startElement, endElement);
136
137 int return_status = 0;
138 const int parse_status
139 = XML_Parse(parser, xml_text_cstr, strlen(xml_text_cstr), XML_TRUE);
140
141 if (parse_status == XML_STATUS_ERROR) {
142 fprintf(stderr,
143 "%s at line %d\n",
144 XML_ErrorString(XML_GetErrorCode(parser)),
145 XML_GetCurrentLineNumber(parser));
146 return_status = 1;
147 }
148
149 XML_ParserFree(parser);
150 free(xml_text_cstr);
151
152 return return_status;
153}
Note: See TracBrowser for help on using the repository browser.