source: gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaFindInstanceKNN.java@ 36859

Last change on this file since 36859 was 36859, checked in by davidb, 19 months ago

Coding developments that mean param passed arousal and valence values not used; query_resutls_ capped to max_docs_

File size: 5.4 KB
Line 
1/*
2 * WekaFindInstanceKNN.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21
22import weka.core.DenseInstance;
23import weka.core.Instance;
24import weka.core.Instances;
25import weka.core.converters.ConverterUtils.DataSource;
26
27import weka.core.neighboursearch.LinearNNSearch;
28import weka.core.neighboursearch.NearestNeighbourSearch;
29//import weka.core.neighboursearch.KDTree;
30
31// Based on StackOverflow:
32// https://stackoverflow.com/questions/31350506/how-to-calculate-the-nearest-neighbors-using-weka-from-the-command-line
33
34// The following was also useful as a reference:
35// https://waikato.github.io/weka-blog/posts/2018-10-08-making-a-weka-classifier/
36
37public class WekaFindInstanceKNN
38{
39
40 public static Instances loadDataset(String input_filename)
41 {
42 Instances instances = null;
43
44 try {
45 DataSource source = new DataSource(input_filename);
46 instances = source.getDataSet();
47 }
48 catch (Exception e) {
49 e.printStackTrace();
50 }
51
52 return instances;
53 }
54
55 public static NearestNeighbourSearch initKNN(Instances instances)
56 {
57 LinearNNSearch knn = new LinearNNSearch(instances);
58
59 return knn;
60 }
61
62 public static void printNearestKNN(Instance sample_instance, Instances nearest_instances,
63 int k_nearest)
64 {
65 try {
66 //cycle through the instances and printout the nearestneighbors
67
68 System.err.println("\n" + sample_instance);
69 for(int i =0; i<k_nearest; i++) {
70 System.err.println("\t" + nearest_instances.instance(i));
71 }
72 }
73 catch (Exception e) {
74 e.printStackTrace();
75 }
76
77 }
78
79 static Instances input_instances_ = null;
80 static NearestNeighbourSearch knn_ = null;
81
82 public static void init(String input_filename)
83 {
84 if (input_instances_ == null) {
85 input_instances_ = loadDataset(input_filename);
86 }
87 if (knn_ == null) {
88 knn_ = initKNN(input_instances_);
89 }
90
91 }
92
93 public static Instances kNearestNeighbours(String doc_id_segment, double arousal_val,double valence_val, int k_nearest_num)
94 {
95 System.err.println("**** wekaFindInstnaceKNN::kNearestNeighbours() called with:");
96 System.err.print( "**** doc_id_segment = " + doc_id_segment);
97 System.err.print( " arousal_val = " + arousal_val);
98 System.err.print( " valence_val = " + valence_val);
99 System.err.println(" k_nearest_num = " + k_nearest_num);
100
101 Instance sample_instance = new DenseInstance(3);
102 sample_instance.setDataset(input_instances_);
103
104 // sample sample:
105 // ds_22716_5743-6,-0.549489,-0.118439
106 //sample_instance.setValue(0, "ds_22716_5743-6");
107 //sample_instance.setValue(1, -0.549489);
108 //sample_instance.setValue(2, -0.118439);
109
110
111 sample_instance.setValue(0, doc_id_segment);
112
113 sample_instance.setValue(1, arousal_val);
114 sample_instance.setValue(2, valence_val);
115
116 Instances nearest_instances = null;
117 try {
118 nearest_instances = knn_.kNearestNeighbours(sample_instance, k_nearest_num);
119 System.err.println("**** nearest_instances len = " + nearest_instances.numInstances());
120
121 //printNearestKNN(sample_instance,nearest_instances, k_nearest_num);
122
123 }
124 catch (Exception e) {
125 e.printStackTrace();
126 }
127
128 return nearest_instances;
129
130 }
131
132 public static void main(String[] args)
133 {
134 // First example output, when working through the instances specified in the CSV file
135 // (looking for similaries amongst all the instances in the CSV file)
136
137 // ds_22716_5743-6,-0.549489,-0.118439
138 // ds_22761_1171-12,-0.549489,-0.118439
139 // ds_21046_7743-30,-0.549489,-0.118439
140 // ds_24768_23507-6,-0.549489,-0.118439
141 // ds_22761_1171-15,-0.549489,-0.118439
142
143 if (args.length != 2) {
144 System.err.println("Usage: k-nearest-num file.{arff,csv}");
145 System.exit(1);
146 }
147
148 String k_nearest_str = args[0];
149 String input_filename = args[1];
150
151 int k_nearest = Integer.parseInt(k_nearest_str);
152
153 System.out.println("Weka Command Line Find Nearest " + k_nearest_str
154 + " Neighbors for each Instance in " + input_filename);
155
156 init(input_filename);
157
158 //Instances instances = loadDataset(input_filename);
159 //NearestNeighbourSearch knn = initKNN(instances);
160
161
162 Instance sample_instance = new DenseInstance(3);
163 sample_instance.setDataset(input_instances_);
164
165 // sample sample:
166 // ds_22716_5743-6,-0.549489,-0.118439
167 sample_instance.setValue(0, "ds_22716_5743-6");
168 sample_instance.setValue(1, -0.549489);
169 sample_instance.setValue(2, -0.118439);
170
171 try {
172 Instances nearest_instances = knn_.kNearestNeighbours(sample_instance, k_nearest);
173 System.out.println("**** nearest_instances len = " + nearest_instances.numInstances());
174
175 printNearestKNN(sample_instance,nearest_instances, k_nearest);
176
177 }
178 catch (Exception e) {
179 e.printStackTrace();
180 }
181
182
183
184 }
185}
Note: See TracBrowser for help on using the repository browser.