source: gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaFindInstanceKNN.java@ 36853

Last change on this file since 36853 was 36853, checked in by davidb, 19 months ago

Logic that was being developed as a separate standalone Java project in mars-site brought over to be a util to support the main Service

File size: 5.2 KB
Line 
1/*
2 * WekaFindInstanceKNN.java
3 * Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
4 *
5 * This program is free software; you can redistribute it and/or modify
6 * it under the terms of the GNU General Public License as published by
7 * the Free Software Foundation; either version 2 of the License, or
8 * (at your option) any later version.
9 *
10 * This program is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 * GNU General Public License for more details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with this program; if not, write to the Free Software
17 * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
18 */
19package org.greenstone.gsdl3.util;
20
21
22import weka.core.DenseInstance;
23import weka.core.Instance;
24import weka.core.Instances;
25import weka.core.converters.ConverterUtils.DataSource;
26
27import weka.core.neighboursearch.LinearNNSearch;
28import weka.core.neighboursearch.NearestNeighbourSearch;
29//import weka.core.neighboursearch.KDTree;
30
31// Based on StackOverflow:
32// https://stackoverflow.com/questions/31350506/how-to-calculate-the-nearest-neighbors-using-weka-from-the-command-line
33
34// The following was also useful as a reference:
35// https://waikato.github.io/weka-blog/posts/2018-10-08-making-a-weka-classifier/
36
37public class WekaFindInstanceKNN
38{
39
40 public static Instances loadDataset(String input_filename)
41 {
42 Instances instances = null;
43
44 try {
45 DataSource source = new DataSource(input_filename);
46 instances = source.getDataSet();
47 }
48 catch (Exception e) {
49 e.printStackTrace();
50 }
51
52 return instances;
53 }
54
55 public static NearestNeighbourSearch initKNN(Instances instances)
56 {
57 LinearNNSearch knn = new LinearNNSearch(instances);
58
59 return knn;
60 }
61
62 public static void printNearestKNN(Instance sample_instance, Instances nearest_instances,
63 int k_nearest)
64 {
65 try {
66 //cycle through the instances and printout the nearestneighbors
67
68 System.err.println("\n" + sample_instance);
69 for(int i =0; i<k_nearest; i++) {
70 System.err.println("\t" + nearest_instances.instance(i));
71 }
72 }
73 catch (Exception e) {
74 e.printStackTrace();
75 }
76
77 }
78
79 static Instances input_instances_ = null;
80 static NearestNeighbourSearch knn_ = null;
81
82 public static void init(String input_filename)
83 {
84 if (input_instances_ == null) {
85 input_instances_ = loadDataset(input_filename);
86 }
87 if (knn_ == null) {
88 knn_ = initKNN(input_instances_);
89 }
90
91 }
92
93 public static Instances kNearestNeighbours(String doc_id_segment, double arousal_val,double valence_val, int k_nearest_num)
94 {
95
96 Instance sample_instance = new DenseInstance(3);
97 sample_instance.setDataset(input_instances_);
98
99 // sample sample:
100 // ds_22716_5743-6,-0.549489,-0.118439
101 //sample_instance.setValue(0, "ds_22716_5743-6");
102 //sample_instance.setValue(1, -0.549489);
103 //sample_instance.setValue(2, -0.118439);
104
105 //String segment_str = Integer.toString(segment);
106 //sample_instance.setValue(0, doc_id +"-" + segment_str);
107
108 sample_instance.setValue(0, doc_id_segment);
109
110 sample_instance.setValue(1, arousal_val);
111 sample_instance.setValue(2, valence_val);
112
113 Instances nearest_instances = null;
114 try {
115 nearest_instances = knn_.kNearestNeighbours(sample_instance, k_nearest_num);
116 System.err.println("**** nearest_instances len = " + nearest_instances.numInstances());
117
118 //printNearestKNN(sample_instance,nearest_instances, k_nearest_num);
119
120 }
121 catch (Exception e) {
122 e.printStackTrace();
123 }
124
125 return nearest_instances;
126
127 }
128
129 public static void main(String[] args)
130 {
131 // First example output, when working through the instances specified in the CSV file
132 // (looking for similaries amongst all the instances in the CSV file)
133
134 // ds_22716_5743-6,-0.549489,-0.118439
135 // ds_22761_1171-12,-0.549489,-0.118439
136 // ds_21046_7743-30,-0.549489,-0.118439
137 // ds_24768_23507-6,-0.549489,-0.118439
138 // ds_22761_1171-15,-0.549489,-0.118439
139
140 if (args.length != 2) {
141 System.err.println("Usage: k-nearest-num file.{arff,csv}");
142 System.exit(1);
143 }
144
145 String k_nearest_str = args[0];
146 String input_filename = args[1];
147
148 int k_nearest = Integer.parseInt(k_nearest_str);
149
150 System.out.println("Weka Command Line Find Nearest " + k_nearest_str
151 + " Neighbors for each Instance in " + input_filename);
152
153 init(input_filename);
154
155 //Instances instances = loadDataset(input_filename);
156 //NearestNeighbourSearch knn = initKNN(instances);
157
158
159 Instance sample_instance = new DenseInstance(3);
160 sample_instance.setDataset(input_instances_);
161
162 // sample sample:
163 // ds_22716_5743-6,-0.549489,-0.118439
164 sample_instance.setValue(0, "ds_22716_5743-6");
165 sample_instance.setValue(1, -0.549489);
166 sample_instance.setValue(2, -0.118439);
167
168 try {
169 Instances nearest_instances = knn_.kNearestNeighbours(sample_instance, k_nearest);
170 System.out.println("**** nearest_instances len = " + nearest_instances.numInstances());
171
172 printNearestKNN(sample_instance,nearest_instances, k_nearest);
173
174 }
175 catch (Exception e) {
176 e.printStackTrace();
177 }
178
179
180
181 }
182}
Note: See TracBrowser for help on using the repository browser.