source: main/trunk/greenstone3/src/java/org/greenstone/gsdl3/service/GoogleNgramMGPPSearch.java@ 28181

Last change on this file since 28181 was 28181, checked in by kjdon, 11 years ago

making search param defaults able to be set in config file. uses <paramDefault name=xx value=yy> element. Now all defaults are set in paramDefaults HashMap instead of individual variables. have left index etc ones for now as they are more complicated.

File size: 4.9 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mgpp.*;
23import org.greenstone.gsdl3.util.*;
24import org.w3c.dom.Element;
25
26import java.util.Vector;
27import java.util.ArrayList;
28import java.util.Collections;
29import org.apache.log4j.*;
30
31/**
32 *
33 * @author <a href="mailto:[email protected]">Shaoqun Wu</a>
34 */
35
36public class GoogleNgramMGPPSearch
37 extends GS2MGPPSearch {
38 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GoogleNgramMGPPSearch.class.getName());
39
40 protected String default_max_docs = "-1";
41 protected String default_hits_per_page = "30";
42 /** constructor */
43 public GoogleNgramMGPPSearch(){
44 super();
45 }
46
47 /** configure this service */
48 public boolean configure(Element info, Element extra_info) {
49 if (!super.configure(info, extra_info)){
50 return false;
51 }
52
53 this.default_max_docs = "-1";
54 this.default_hits_per_page = "30";
55 this.does_stem = false;
56 this.does_paging = true;
57 return true;
58 }
59
60 // sort the doc_nums by their frequency
61 protected String [] getDocIDs(Object query_result) {
62 try{
63 Vector docs = ((MGPPQueryResult)query_result).getDocs();
64 //ArrayList docList_past = new ArrayList();
65 //ArrayList docList_future = new ArrayList();
66 //ArrayList docList_present = new ArrayList();
67
68 ArrayList<DocWrapper> docList = new ArrayList<DocWrapper>();
69
70 for (int d = 0; d < docs.size(); d++) {
71 String num = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
72 String doc_id = internalNum2OID(num);
73 DBInfo dbInfo = this.gs_doc_db.getInfo(doc_id);
74 String fre = (String)dbInfo.getInfo("Frequency");
75 String tense = (String)dbInfo.getInfo("Tense");
76
77 if(!fre.equals("")){
78 // if (tense.equals("past")){
79 // docList_past.add(new DocWrapper(num,Integer.parseInt(fre),tense));
80 // }
81 // else{
82 // if (tense.equals("future")){
83 // docList_future.add(new DocWrapper(num,Integer.parseInt(fre),tense));
84 // }
85 // else{
86 // if(tense.equals("present")){
87 // docList_present.add(new DocWrapper(num,Integer.parseInt(fre),tense));
88 // }
89 // }
90 //}
91 docList.add(new DocWrapper(num,Integer.parseInt(fre),tense));
92 }
93
94 }
95
96
97 //Collections.sort(docList_past);
98 //Collections.sort(docList_future);
99 //Collections.sort(docList_present);
100
101 Collections.sort(docList);
102 int i_pa = 0;
103 int i_f = 0;
104 int i_pre = 0;
105
106 //String [] doc_nums = new String [docList_past.size()+docList_future.size()+docList_present.size()];
107 String [] doc_nums = new String [docList.size()];
108 int interval = 10;
109
110 for(int d = 0; d < doc_nums.length; d++){
111
112 // for(;i_pre < docList_present.size() && interval > 0;i_pre++){
113// doc_nums[d] = ((DocWrapper)docList_present.get(i_pre)).num;
114// d++;
115// interval--;
116// }
117
118// interval = 10+interval;
119
120// for(;i_pa < docList_past.size() && interval > 0;i_pa++){
121// doc_nums[d] = ((DocWrapper)docList_past.get(i_pa)).num;
122// d++;
123// interval--;
124// }
125
126
127// interval = 10+interval;
128
129// for(;i_f < docList_future.size() && interval > 0;i_f++){
130// doc_nums[d] = ((DocWrapper)docList_future.get(i_f)).num;
131// d++;
132// interval--;
133// }
134
135// interval = 10;
136
137 doc_nums[d] = docList.get(d).num;
138
139 }
140
141 return doc_nums;
142 }
143 catch(Exception e){
144 e.printStackTrace();
145 }
146
147 return null;
148 }
149
150 static class DocWrapper implements Comparable{
151 public int fre = 0;
152 public String num = "";
153 public String tense = "";
154
155
156 public DocWrapper(String num, int fre, String tense){
157 this.fre = fre;
158 this.num = num;
159 this.tense = tense;
160 }
161
162 public int compareTo(Object o){
163
164 if (!(o instanceof DocWrapper)) return -1;
165 DocWrapper docIn = (DocWrapper)o;
166 if (num.equals(docIn.num)){
167 return 0;
168 }
169
170 if (fre > docIn.fre) return -1;
171 return 1;
172 }
173
174 public boolean equals(Object o){
175 if (!(o instanceof DocWrapper)) return false;
176 DocWrapper docIn = (DocWrapper)o;
177 if (num.equals(docIn.num)){
178 return true;
179 }
180 return false;
181 }
182
183
184 }
185
186
187}
188
189
Note: See TracBrowser for help on using the repository browser.