source: main/trunk/model-sites-dev/mars/src/java/org/greenstone/mars/WekaApplyValenceModel.java@ 35207

Last change on this file since 35207 was 34802, checked in by davidb, 3 years ago

Saving instances changed to a DataSink, and located in WekaUtil

File size: 2.7 KB
Line 
1package org.greenstone.mars;
2
3/*
4import java.io.BufferedInputStream;
5import java.io.FileInputStream;
6
7import java.io.BufferedReader;
8import java.io.BufferedWriter;
9import java.io.FileReader;
10import java.io.FileWriter;
11
12import weka.core.converters.ConverterUtils.DataSource;
13import weka.core.Attribute;
14import weka.core.Instance;
15*/
16
17import weka.core.Instances;
18/*
19import weka.core.SerializationHelper;
20
21import weka.filters.Filter;
22import weka.filters.unsupervised.attribute.Remove;
23*/
24
25import weka.classifiers.Classifier;
26
27
28// Based on:
29// https://waikato.github.io/weka-wiki/use_weka_in_your_java_code/
30
31class WekaApplyValenceModel
32{
33
34 public static void main(String[] args)
35 {
36 WekaUtil.checkUsageApplyModel(args);
37
38 String classifier_input_filename = args[0];
39 String unclassified_data_input_filename = args[1];
40 String classified_data_output_filename = args[2];
41
42 Classifier classifier = WekaUtil.loadClassifierModel(classifier_input_filename);
43
44 Instances unlabeled_instances= WekaUtil.loadInstancesForClassification(unclassified_data_input_filename);
45
46 // It is permissible to run this code and supply it with a data file that includes groundtruth in it.
47 // In this situation, the 'unlabeled' instances:
48 // (i) need to be massaged to be in the same form as truly unlabeled data
49 // (ii) we also set up 'groundtruth_instances' as an alias (reference) to 'filtered_unlabeled_instanced'
50 // to trigger calculating the error on the predicted vaues
51
52 boolean has_groundtruth_data = WekaUtil.instancesHavePredictAttribute(unlabeled_instances,WekaUtil.VALENCE_ATTRIBUTE_NAME);
53
54 // The following deals with (i) internally, ensuring that what is returned is suitable for making predictions on
55 Instances filtered_unlabeled_instances
56 = WekaUtil.filterInstancesForApplying(unlabeled_instances,has_groundtruth_data,
57 WekaUtil.VALENCE_ATTRIBUTE_NAME,"471");
58
59 WekaUtil.checkDatasetInstancesCompatible(filtered_unlabeled_instances,"471");
60
61 // The following deals with (ii)
62 Instances groundtruth_instances = (has_groundtruth_data) ? filtered_unlabeled_instances : null;
63
64 System.out.println("Predicting valence:");
65 Instances labeled_instances = WekaUtil.makePredictions(classifier, filtered_unlabeled_instances, groundtruth_instances);
66
67 /*
68 try {
69 // Save labeled data
70
71 System.out.println("Saving labeled instances: " + classified_data_output_filename);
72 FileWriter fw = new FileWriter(classified_data_output_filename);
73 BufferedWriter bw = new BufferedWriter(fw);
74
75 bw.write(labeled_instances.toString());
76 bw.newLine();
77 bw.flush();
78 bw.close();
79
80 }
81 catch (Exception e) {
82 e.printStackTrace();
83 }
84 */
85
86 WekaUtil.saveInstancesAsDataSink(labeled_instances,classified_data_output_filename);
87 }
88}
Note: See TracBrowser for help on using the repository browser.