source: gsdl/trunk/runtime-src/src/colservr/expat_resultset.cpp@ 20939

Last change on this file since 20939 was 20939, checked in by mdewsnip, 11 years ago

Now uses the "rank" attributes returned from Lucene searches for the [DocRank] values.

  • Property svn:keywords set to Author Date Id Revision
File size: 3.6 KB
Line 
1
2#include <stdio.h>
3#include <string.h>
4#include <expat.h>
5
6#if defined(GSDL_USE_OBJECTSPACE)
7# include <ospace\std\iostream>
8#elif defined(GSDL_USE_IOS_H)
9# include <iostream.h>
10#else
11# include <iostream>
12using namespace std;
13#endif
14
15#include "text_t.h"
16#include "queryinfo.h"
17#include "gsdlunicode.h"
18
19struct queryresultpack {
20 queryresultsclass* queryresult_ptr;
21 int match_count;
22};
23
24char* get_attribute(const char** attr, char* att_name)
25{
26 char* att_val = NULL;
27
28 for (int i = 0; attr[i]; i += 2) {
29 if (strcmp(attr[i],att_name)==0) {
30 att_val = strdup(attr[i+1]);
31 }
32 }
33
34 return att_val;
35}
36
37static void XMLCALL
38startElement(void *userData, const char *name, const char **attributes)
39{
40 queryresultpack* qrpack_ptr = (queryresultpack*)userData;
41 queryresultsclass* queryresult_ptr = qrpack_ptr->queryresult_ptr;
42
43 text_t element_name = (char*)name;
44
45 if (element_name == "ResultSet") {
46 char* cached_attribute_str = get_attribute(attributes, "cached");
47 if (cached_attribute_str != NULL) {
48 cerr << "Cached: " << cached_attribute_str << endl;
49 }
50 }
51
52 if (element_name == "Error") {
53 char* error_type_str = get_attribute(attributes, "type");
54 queryresult_ptr->error_message = error_type_str;
55 if ((text_t) error_type_str == "PARSE_EXCEPTION") {
56 queryresult_ptr->syntax_error = true;
57 }
58 }
59
60 if (element_name == "Term") {
61 char* term_value_str = get_attribute(attributes, "value");
62 char* term_frequency_str = get_attribute(attributes, "freq");
63
64 termfreqclass termfreqobj;
65 termfreqobj.termstr = to_uni(term_value_str);
66
67 termfreqobj.termfreq = atoi(term_frequency_str);
68 queryresult_ptr->orgterms.push_back(termfreqobj);
69 }
70
71 if (element_name == "StopWord") {
72 char* stop_word_value_str = get_attribute(attributes, "value");
73 queryresult_ptr->stopwords.insert((text_t) stop_word_value_str);
74 }
75
76 if (element_name=="MatchingDocsInfo") {
77 char* num_match_docs_str = get_attribute(attributes,"num");
78 if (num_match_docs_str != NULL) {
79 int num_match_docs = atoi(num_match_docs_str);
80
81 queryresult_ptr->docs_matched = num_match_docs;
82 free(num_match_docs_str);
83 }
84 else {
85 queryresult_ptr->docs_matched = 0;
86 }
87 }
88
89
90 if (element_name=="Match") {
91
92 char* id = get_attribute(attributes,"id");
93 if (id != NULL)
94 {
95 docresultclass doc;
96 doc.clear();
97 doc.docid = id;
98
99 char* docrank = get_attribute(attributes, "rank");
100 if (docrank != NULL)
101 {
102 doc.docweight = atof(docrank);
103 }
104
105 char* termfreq = get_attribute(attributes, "termfreq");
106 if (termfreq != NULL)
107 {
108 doc.num_query_terms_matched = atoi(termfreq);
109 }
110
111 queryresult_ptr->docs.docset[doc.docid] = doc;
112 queryresult_ptr->docs.docorder.push_back(doc.docid);
113 ++qrpack_ptr->match_count;
114
115 free(id);
116 }
117
118 }
119}
120
121static void XMLCALL
122endElement(void *userData, const char *name)
123{
124 // no need to do anything
125}
126
127
128
129
130int expat_resultset(text_t& xml_text, queryresultsclass& queryresult)
131{
132 queryresult.clear();
133 queryresultpack qrpack = { &queryresult, 0 };
134
135 char * xml_text_cstr = xml_text.getcstr();
136
137 XML_Parser parser = XML_ParserCreate(NULL);
138
139 XML_SetUserData(parser, &qrpack);
140 XML_SetElementHandler(parser, startElement, endElement);
141
142 int return_status = 0;
143 const int parse_status
144 = XML_Parse(parser, xml_text_cstr, strlen(xml_text_cstr), XML_TRUE);
145
146 if (parse_status == XML_STATUS_ERROR) {
147 fprintf(stderr,
148 "%s at line %d\n",
149 XML_ErrorString(XML_GetErrorCode(parser)),
150 XML_GetCurrentLineNumber(parser));
151 return_status = 1;
152 }
153
154 XML_ParserFree(parser);
155 delete []xml_text_cstr;
156
157 return return_status;
158}
Note: See TracBrowser for help on using the repository browser.