source: other-projects/the-macronizer/trunk/src/java/util/MacroniserLogFileProcessor.java@ 32757

Last change on this file since 32757 was 32757, checked in by ak19, 5 years ago

Minor improvements to code and correction to mysql.properties file path

File size: 10.7 KB
Line 
1package util;
2
3import java.io.BufferedReader;
4import java.io.FileReader;
5import java.io.IOException;
6import java.time.LocalDate;
7import java.time.LocalTime;
8import java.time.format.DateTimeFormatter;
9import java.util.ArrayList;
10import java.util.regex.Matcher;
11import java.util.regex.Pattern;
12
13//import MySQLAccess.Tuple;
14
15public class MacroniserLogFileProcessor {
16 static boolean debug = false;
17
18 /** ARGUMENTS:
19 * /home/wjkw1/RESEARCH_2018-19/bash_test/loggingtest.log
20 * /home/wjkw1/comp520/MacroniserLogs/Week02/macron.log
21 */
22
23 static final String directInputRegexp = "INFO : \\[\\d\\d\\d\\d\\-\\d\\d\\-\\d\\d \\d\\d:\\d\\d:\\d\\d\\] DirectInput.doPost\\(\\)\\s*";
24 static final String fileUploadRegexp = "ERROR: \\[\\d\\d\\d\\d\\-\\d\\d\\-\\d\\d \\d\\d:\\d\\d:\\d\\d\\] FileUpload.doPost\\(\\)\\s*";
25
26 static ArrayList<MacroniserLogFileData> extractedEntries;
27
28 public static void main(String[] args) {
29 checkArgs(args.length);
30 String filename = args[0];
31 //extracts using yesterdays date
32 extractFromLogFile(filename);
33
34
35 if(debug) {
36 int count = 0;
37 for (MacroniserLogFileData entry : extractedEntries) {
38 printMessage((++count) + ": " + entry.toString());
39 }
40 }
41
42 if(exportToDB()) {
43 System.out.println("Success. Move the log file " + filename + " to processed folder");
44 System.exit(0);
45 } else {
46 System.err.println("FAILED. Move the log file " + filename + " to reprocess folder");
47 System.exit(-1);
48 }
49 }
50
51 //Goal is to extract information from here to DB
52 private static boolean exportToDB() {
53
54 boolean success = false;
55 MySQLAccess sqlAccess = new MySQLAccess();
56
57 try {
58
59 sqlAccess.makeConnection();
60
61 System.out.println("Sending " + extractedEntries.size() + " entries to DB...");
62
63 //loop through all entries
64 for (MacroniserLogFileData entry : extractedEntries) {
65 //get the marked words from first entry
66 ArrayList<String> markedWords = getMarkedWordsFromOutput(entry.getOutputText());
67 // all these markedWords share the same date and time
68 LocalDate date = entry.getDate();
69 LocalTime time = entry.getTime();
70
71 if (markedWords == null) { // no words, enter NULL into db
72 if(debug) {
73 printMessage("word=NULL");
74 printMessage(date.toString());
75 printMessage(time.format(DateTimeFormatter.ofPattern("HH:mm:ss")));
76 printMessage(""); // newline
77 }
78
79 MySQLAccess.Tuple tuple = new MySQLAccess.Tuple(null,date,time);
80 // b. add to DB
81 sqlAccess.addNewEntry(tuple);
82 } else { // process all the words
83 for (String word : markedWords) {
84 if(debug) {
85 printMessage(word);
86 printMessage(date.toString());
87 printMessage(time.format(DateTimeFormatter.ofPattern("HH:mm:ss")));
88 printMessage(""); // newline
89 }
90
91 //insert all into database:
92 // a. create the tuple: always entering into db as lowercase so we don't consider tō different from Tō
93 // whether when inserting into DB or searching for the term
94 MySQLAccess.Tuple tuple = new MySQLAccess.Tuple(word.toLowerCase(),date,time);
95 // b. add to DB
96 sqlAccess.addNewEntry(tuple);
97 }
98
99 }
100 }
101 success = true;
102
103 } catch (Exception e) {
104 e.printStackTrace(); // goes to std.err, see https://stackoverflow.com/questions/12095378/difference-between-e-printstacktrace-and-system-out-printlne
105 success = false;
106
107 } finally {
108 sqlAccess.closeConnection();
109
110 }
111 return success;
112 }
113
114 //returns an array list of all marked words, null if there are none
115 private static ArrayList<String> getMarkedWordsFromOutput(String outputText) {
116 if(outputText == null) { return null; }
117
118 final Pattern TAG_REGEXP = Pattern.compile("<mark>(.+?)</mark>", Pattern.DOTALL);
119 final Matcher matcher = TAG_REGEXP.matcher(outputText);
120 ArrayList<String> markedWords = new ArrayList<>();
121 //find all values from matches
122 while (matcher.find()){
123 String s = matcher.group(1);
124 markedWords.add(s);
125 }
126 return markedWords;
127 }
128
129 //reads the log file and creates a list of data
130 private static void extractFromLogFile(String filename) {
131
132 ///System.out.println("** In extractFromLogFile: " + filename);
133
134 BufferedReader br = null;
135 try {
136 br = new BufferedReader(new FileReader(filename));
137 extractedEntries = new ArrayList<>();
138
139 boolean firstTagFound = false;
140 String line = "";
141 StringBuilder extractedSB = new StringBuilder();
142
143 while ((line = br.readLine())!= null){
144 ///System.out.println("@@@ Read line: " + line);
145
146 if(!firstTagFound){
147 //check for tag
148 firstTagFound = doesLineMatch(line);
149 //add to list if the first match
150 if(firstTagFound) {
151 ///System.out.println("Found first tag: " + line);
152 extractedSB.append(line);
153 continue;
154 }
155 }else {
156 //if line is a match, remove and keep
157 if(doesLineMatch(line)){
158 //turn the string builder text into MacroniserLogFileData object
159 ///System.out.println("LINE MATCHED: " + line);
160
161 MacroniserLogFileData entry = transformLogFileStringToObject(extractedSB.toString());
162 //perform operations if not null, else error
163 if(entry != null) {
164 extractedEntries.add(entry);
165 //remove old content and keep new tag
166 extractedSB.setLength(0);
167 extractedSB.append(line);
168 } else{
169 //printErrorMsg("Parsing of entry in log file found an error, continuing on next lines...");
170 extractedSB.setLength(0);
171 extractedSB.append(line);
172 }
173
174 }else {
175 //keep the line
176 extractedSB.append(line);
177 }
178 }
179 }
180
181 } catch (Exception ex) {
182 ex.printStackTrace();
183 } finally {
184 if(br != null){
185 try {
186 br.close();
187 } catch (IOException e) {
188 e.printStackTrace();
189 }
190 }
191 }
192
193 }
194
195 //Changes the string into a MacorniserLogFileData object
196 private static MacroniserLogFileData transformLogFileStringToObject(String extractedString) {
197 MacroniserLogFileData entry;
198 LocalDate entryDate;
199 LocalTime entryTime;
200 String entryInput, entryOutput;
201
202 //Get the input output portion of the string
203 String input_output = "";
204 if (extractedString.charAt(0) == 'I') {
205
206 //Get the date and time of entry
207 int INDEX_STARTOF_DATE = 8, INDEX_ENDOF_DATE = 18, INDEX_STARTOF_TIME = 19, INDEX_ENDOF_TIME = 27;
208
209 entryDate = LocalDate.parse(extractedString.substring(INDEX_STARTOF_DATE, INDEX_ENDOF_DATE));
210
211 entryTime = LocalTime.parse(extractedString.substring(INDEX_STARTOF_TIME, INDEX_ENDOF_TIME));
212
213 //Get the input and output
214 input_output = extractedString.replaceAll(directInputRegexp, "").trim();
215
216 int INDEX_STARTOF_INPUT = 6;
217 int INDEX_ENDOF_INPUT = getEndofInputIndex(input_output);
218 int INDEX_STARTOF_OUTPUT = INDEX_ENDOF_INPUT + 8;
219 int INDEX_ENDOF_OUTPUT = input_output.length();
220 if (INDEX_ENDOF_INPUT == -1) {
221 entryInput = null;
222 entryOutput = null;
223
224 } else {
225 entryInput = input_output.substring(INDEX_STARTOF_INPUT, INDEX_ENDOF_INPUT);
226 entryOutput = input_output.substring(INDEX_STARTOF_OUTPUT, INDEX_ENDOF_OUTPUT);
227 }
228 entry = new MacroniserLogFileData(entryDate, entryTime, entryInput, entryOutput);
229
230 return entry;
231
232 } else if (extractedString.charAt(0) == 'E') {
233 input_output = extractedString.replaceAll(fileUploadRegexp, "");
234 return null;
235 } else {
236 printErrorMsg("Unknown entry in log file.");
237 return null;
238 }
239 }
240
241 //gets the end of input index, protects against log file 'input' that includes the tag
242 private static int getEndofInputIndex(String str) {
243 ArrayList<Integer> indexes = new ArrayList<>();
244 String outputTag = "Output: ";
245
246 //loop through and find all "Output: " Strings
247 int i = str.indexOf(outputTag);
248 while(i >= 0){
249 indexes.add(i);
250 i = str.indexOf(outputTag, i+1);
251 }
252
253 //return the correct index
254 if(indexes.size() >= 1){
255 int middle_index = (indexes.size() / 2);
256 return indexes.get(middle_index);
257 } else {
258 printMessage("Warning: No output tag could be found. Probably NULL input.");
259 return -1;
260 }
261 }
262
263 //Does line match the regexp
264 private static boolean doesLineMatch(String line) {
265 return (line.matches(directInputRegexp) || line.matches(fileUploadRegexp));
266 }
267
268 //checks the arguments
269 private static void checkArgs(int numArgs) {
270 if( numArgs != 1){
271 printErrorMsg("Incorrect number of arguments given - "+numArgs);
272 printUsage();
273 }
274 }
275
276 //Prints a message to std output
277 private static void printMessage(String msg){
278 System.out.println(msg);
279 }
280
281 //Prints the error message to "std error"
282 private static void printErrorMsg(String msg){
283 System.err.println("ERROR: "+ msg);
284 }
285
286 //Prints the usage of this program to "std error"
287 private static void printUsage(){
288 System.err.println("Usage: ");
289 System.err.println("\tjava LogFileExtraction <log file>");
290 System.err.println("\tWhere 'log file' is the log file of the Macroniser - macron.log");
291 }
292}
Note: See TracBrowser for help on using the repository browser.