source: main/trunk/model-sites-dev/mars/src/java/org/greenstone/mars/WekaApplyValanceModel.java@ 34788

Last change on this file since 34788 was 34788, checked in by davidb, 3 years ago

Code refactored, and then valence version of training and applying model developed

File size: 2.5 KB
Line 
1package org.greenstone.mars;
2
3//import java.util.Random;
4
5import java.io.BufferedInputStream;
6import java.io.FileInputStream;
7
8import java.io.BufferedReader;
9import java.io.BufferedWriter;
10import java.io.FileReader;
11import java.io.FileWriter;
12
13import weka.core.converters.ConverterUtils.DataSource;
14import weka.core.Attribute;
15import weka.core.Instance;
16import weka.core.Instances;
17import weka.core.SerializationHelper;
18
19import weka.filters.Filter;
20import weka.filters.unsupervised.attribute.Remove;
21
22import weka.classifiers.Classifier;
23
24
25// Based on:
26// https://waikato.github.io/weka-wiki/use_weka_in_your_java_code/
27
28class WekaApplyValanceModel
29{
30
31 public static void main(String[] args)
32 {
33 WekaUtil.checkUsageApplyModel(args);
34
35 String classifier_input_filename = args[0];
36 String unclassified_data_input_filename = args[1];
37 String classified_data_output_filename = args[2];
38
39 Classifier classifier = WekaUtil.loadClassifierModel(classifier_input_filename);
40
41 Instances unlabeled_instances= WekaUtil.loadInstancesForClassification(unclassified_data_input_filename);
42
43 // It is permissible to run this code and supply it with a data file that includes groundtruth in it.
44 // In this situation, the 'unlabeled' instances:
45 // (i) need to be massaged to be in the same form as truly unlabeled data
46 // (ii) we also set up 'groundtruth_instances' as an alias (reference) to 'filtered_unlabeled_instanced'
47 // to trigger calculating the error on the predicted vaues
48
49 boolean has_groundtruth_data = WekaUtil.instancesHavePredictAttribute(unlabeled_instances,WekaUtil.VALANCE_ATTRIBUTE_NAME);
50
51 // The following deals with (i) internally, ensuring that what is returned is suitable for making predictions on
52 Instances filtered_unlabeled_instances
53 = WekaUtil.filterInstancesForApplying(unlabeled_instances,has_groundtruth_data,
54 WekaUtil.VALANCE_ATTRIBUTE_NAME,"471");
55
56 // The following deals with (ii)
57 Instances groundtruth_instances = (has_groundtruth_data) ? filtered_unlabeled_instances : null;
58
59 Instances labeled_instances = WekaUtil.makePredictions(classifier, filtered_unlabeled_instances, groundtruth_instances);
60
61 try {
62 // Save labeled data
63
64 System.out.println("Saving labeled instances: " + classified_data_output_filename);
65 FileWriter fw = new FileWriter(classified_data_output_filename);
66 BufferedWriter bw = new BufferedWriter(fw);
67
68 bw.write(labeled_instances.toString());
69 bw.newLine();
70 bw.flush();
71 bw.close();
72
73 }
74 catch (Exception e) {
75 e.printStackTrace();
76 }
77
78 }
79}
Note: See TracBrowser for help on using the repository browser.