source: trunk/gsdl/src/colservr/expat_resultset.cpp@ 12380

Last change on this file since 12380 was 12380, checked in by mdewsnip, 18 years ago

Now shows the stopwords removed by Lucene, many thanks to Me and DL Consulting Ltd.

  • Property svn:keywords set to Author Date Id Revision
File size: 3.0 KB
Line 
1
2#include <stdio.h>
3#include <string.h>
4#include <expat.h>
5
6#if defined(GSDL_USE_OBJECTSPACE)
7# include <ospace\std\iostream>
8#elif defined(GSDL_USE_IOS_H)
9# include <iostream.h>
10#else
11# include <iostream>
12using namespace std;
13#endif
14
15#include "text_t.h"
16#include "queryinfo.h"
17#include "gsdlunicode.h"
18
19struct queryresultpack {
20 queryresultsclass* queryresult_ptr;
21 int match_count;
22};
23
24char* get_attribute(const char** attr, char* att_name)
25{
26 char* att_val = NULL;
27
28 for (int i = 0; attr[i]; i += 2) {
29 if (strcmp(attr[i],att_name)==0) {
30 att_val = strdup(attr[i+1]);
31 }
32 }
33
34 return att_val;
35}
36
37static void XMLCALL
38startElement(void *userData, const char *name, const char **attributes)
39{
40 queryresultpack* qrpack_ptr = (queryresultpack*)userData;
41 queryresultsclass* queryresult_ptr = qrpack_ptr->queryresult_ptr;
42
43 text_t element_name = (char*)name;
44
45 if (element_name == "Term") {
46 char* term_value_str = get_attribute(attributes, "value");
47 char* term_frequency_str = get_attribute(attributes, "freq");
48
49 termfreqclass termfreqobj;
50 termfreqobj.termstr = to_uni(term_value_str);
51 termfreqobj.termfreq = atoi(term_frequency_str);
52 queryresult_ptr->orgterms.push_back(termfreqobj);
53 }
54
55 if (element_name == "StopWord") {
56 char* stop_word_value_str = get_attribute(attributes, "value");
57 queryresult_ptr->stopwords.insert((text_t) stop_word_value_str);
58 }
59
60 if (element_name=="MatchingDocsInfo") {
61 char* num_match_docs_str = get_attribute(attributes,"num");
62 if (num_match_docs_str != NULL) {
63 int num_match_docs = atoi(num_match_docs_str);
64
65 queryresult_ptr->docs_matched = num_match_docs;
66 free(num_match_docs_str);
67 }
68 else {
69 queryresult_ptr->docs_matched = 0;
70 }
71 }
72
73
74 if (element_name=="Match") {
75
76 char* id = get_attribute(attributes,"id");
77 if (id!=NULL) {
78 int id_num = atoi(id);
79
80 docresultclass doc;
81 doc.clear();
82 doc.docnum = id_num;
83 doc.docweight = qrpack_ptr->match_count;
84 queryresult_ptr->docs.docset[doc.docnum] = doc;
85 queryresult_ptr->docs.docorder.push_back(doc.docnum);
86 ++qrpack_ptr->match_count;
87
88 free(id);
89 }
90
91 }
92}
93
94static void XMLCALL
95endElement(void *userData, const char *name)
96{
97 // no need to do anything
98}
99
100
101
102
103int expat_resultset(text_t& xml_text, queryresultsclass& queryresult)
104{
105 queryresult.clear();
106 queryresultpack qrpack = { &queryresult, 0 };
107
108 char * xml_text_cstr = xml_text.getcstr();
109
110 cerr << "**** xml_text = " << xml_text.getcstr() << endl;
111
112 XML_Parser parser = XML_ParserCreate(NULL);
113
114 XML_SetUserData(parser, &qrpack);
115 XML_SetElementHandler(parser, startElement, endElement);
116
117 int return_status = 0;
118 const int parse_status
119 = XML_Parse(parser, xml_text_cstr, strlen(xml_text_cstr), XML_TRUE);
120
121 if (parse_status == XML_STATUS_ERROR) {
122 fprintf(stderr,
123 "%s at line %d\n",
124 XML_ErrorString(XML_GetErrorCode(parser)),
125 XML_GetCurrentLineNumber(parser));
126 return_status = 1;
127 }
128
129 XML_ParserFree(parser);
130 free(xml_text_cstr);
131
132 return return_status;
133}
Note: See TracBrowser for help on using the repository browser.