source: greenstone3/branches/customizingGreenstone3/src/java/org/greenstone/gsdl3/service/GoogleNgramMGPPSearch.java@ 15191

Last change on this file since 15191 was 15191, checked in by dmn, 16 years ago

updating branch from trunk

File size: 5.2 KB
Line 
1/*
2 * GS2MGPPSearch.java
3 * Copyright (C) 2002 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * the Free Software Foundation; either version 2 of the License, or
7 * (at your option) any later version.
8 *
9 * This program is distributed in the hope that it will be useful,
10 * but WITHOUT ANY WARRANTY; without even the implied warranty of
11 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
12 * GNU General Public License for more details.
13 *
14 * You should have received a copy of the GNU General Public License
15 * along with this program; if not, write to the Free Software
16 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
17 */
18package org.greenstone.gsdl3.service;
19
20
21// Greenstone classes
22import org.greenstone.mgpp.*;
23import org.greenstone.gsdl3.util.*;
24import org.w3c.dom.Element;
25
26import java.util.Vector;
27import java.util.ArrayList;
28import java.util.Collections;
29import org.apache.log4j.*;
30
31/**
32 *
33 * @author <a href="mailto:[email protected]">Shaoqun Wu</a>
34 */
35
36public class GoogleNgramMGPPSearch
37 extends GS2MGPPSearch {
38 static Logger logger = Logger.getLogger(org.greenstone.gsdl3.service.GoogleNgramMGPPSearch.class.getName());
39 protected GDBMWrapper gdbm_src = null;
40
41 /** constructor */
42 public GoogleNgramMGPPSearch(){
43 super();
44 gdbm_src = new GDBMWrapper();
45
46 }
47
48 /** configure this service */
49 public boolean configure(Element info, Element extra_info) {
50 if (!super.configure(info, extra_info)){
51 return false;
52 }
53
54 // Open GDBM database for querying
55 String gdbm_db_file = GSFile.GDBMDatabaseFile(this.site_home, this.cluster_name, this.index_stem);
56 if (!this.gdbm_src.openDatabase(gdbm_db_file, GDBMWrapper.READER)) {
57 logger.error("Could not open GDBM database!");
58 return false;
59 }
60 this.default_max_docs = "-1";
61 this.default_hits_per_page = "30";
62 this.does_stem = false;
63 this.does_paging = true;
64 return true;
65 }
66
67 // sort the doc_nums by their frequency
68 protected String [] getDocIDs(Object query_result) {
69 try{
70 Vector docs = ((MGPPQueryResult)query_result).getDocs();
71 //ArrayList docList_past = new ArrayList();
72 //ArrayList docList_future = new ArrayList();
73 //ArrayList docList_present = new ArrayList();
74
75 ArrayList docList = new ArrayList();
76
77 for (int d = 0; d < docs.size(); d++) {
78 String num = Long.toString((((MGPPDocInfo) docs.elementAt(d)).num_));
79 String doc_id = internalNum2OID(num);
80 DBInfo gdbmInfo = this.gdbm_src.getInfo(doc_id);
81 String fre = (String)gdbmInfo.getInfo("Frequency");
82 String tense = (String)gdbmInfo.getInfo("Tense");
83
84 if(!fre.equals("")){
85 // if (tense.equals("past")){
86 // docList_past.add(new DocWrapper(num,Integer.parseInt(fre),tense));
87 // }
88 // else{
89 // if (tense.equals("future")){
90 // docList_future.add(new DocWrapper(num,Integer.parseInt(fre),tense));
91 // }
92 // else{
93 // if(tense.equals("present")){
94 // docList_present.add(new DocWrapper(num,Integer.parseInt(fre),tense));
95 // }
96 // }
97 //}
98 docList.add(new DocWrapper(num,Integer.parseInt(fre),tense));
99 }
100
101 }
102
103
104 //Collections.sort(docList_past);
105 //Collections.sort(docList_future);
106 //Collections.sort(docList_present);
107
108 Collections.sort(docList);
109 int i_pa = 0;
110 int i_f = 0;
111 int i_pre = 0;
112
113 //String [] doc_nums = new String [docList_past.size()+docList_future.size()+docList_present.size()];
114 String [] doc_nums = new String [docList.size()];
115 int interval = 10;
116
117 for(int d = 0; d < doc_nums.length; d++){
118
119 // for(;i_pre < docList_present.size() && interval > 0;i_pre++){
120// doc_nums[d] = ((DocWrapper)docList_present.get(i_pre)).num;
121// d++;
122// interval--;
123// }
124
125// interval = 10+interval;
126
127// for(;i_pa < docList_past.size() && interval > 0;i_pa++){
128// doc_nums[d] = ((DocWrapper)docList_past.get(i_pa)).num;
129// d++;
130// interval--;
131// }
132
133
134// interval = 10+interval;
135
136// for(;i_f < docList_future.size() && interval > 0;i_f++){
137// doc_nums[d] = ((DocWrapper)docList_future.get(i_f)).num;
138// d++;
139// interval--;
140// }
141
142// interval = 10;
143
144 doc_nums[d] = ((DocWrapper)docList.get(d)).num;
145
146 }
147
148 return doc_nums;
149 }
150 catch(Exception e){
151 e.printStackTrace();
152 }
153
154 return null;
155 }
156
157 static class DocWrapper implements Comparable{
158 public int fre = 0;
159 public String num = "";
160 public String tense = "";
161
162
163 public DocWrapper(String num, int fre, String tense){
164 this.fre = fre;
165 this.num = num;
166 this.tense = tense;
167 }
168
169 public int compareTo(Object o){
170
171 if (!(o instanceof DocWrapper)) return -1;
172 DocWrapper docIn = (DocWrapper)o;
173 if (num.equals(docIn.num)){
174 return 0;
175 }
176
177 if (fre > docIn.fre) return -1;
178 return 1;
179 }
180
181 public boolean equals(Object o){
182 if (!(o instanceof DocWrapper)) return false;
183 DocWrapper docIn = (DocWrapper)o;
184 if (num.equals(docIn.num)){
185 return true;
186 }
187 return false;
188 }
189
190
191 }
192
193
194}
195
196
Note: See TracBrowser for help on using the repository browser.