source: other-projects/the-macronizer/trunk/src/java/util/MacroniserLogFileProcessor.java@ 32749

Last change on this file since 32749 was 32749, checked in by ak19, 5 years ago

More Western Wilson stuff: finally got the bash script working. Cron task file is still a sample at present

File size: 10.5 KB
Line 
1package util;
2
3import java.io.BufferedReader;
4import java.io.FileReader;
5import java.io.IOException;
6import java.time.LocalDate;
7import java.time.LocalTime;
8import java.time.format.DateTimeFormatter;
9import java.util.ArrayList;
10import java.util.regex.Matcher;
11import java.util.regex.Pattern;
12
13//import MySQLAccess.Tuple;
14
15
16public class MacroniserLogFileProcessor {
17 static boolean debug = false;
18 /** ARGUMENTS:
19 * /home/wjkw1/RESEARCH_2018-19/bash_test/loggingtest.log
20 * /home/wjkw1/comp520/MacroniserLogs/Week02/macron.log
21 */
22
23 static final String directInputRegexp = "INFO : \\[\\d\\d\\d\\d\\-\\d\\d\\-\\d\\d \\d\\d:\\d\\d:\\d\\d\\] DirectInput.doPost\\(\\)";
24 static final String fileUploadRegexp = "ERROR: \\[\\d\\d\\d\\d\\-\\d\\d\\-\\d\\d \\d\\d:\\d\\d:\\d\\d\\] FileUpload.doPost\\(\\)";
25
26 static ArrayList<MacroniserLogFileData> extractedEntries;
27
28 public static void main(String[] args) {
29 checkArgs(args.length);
30 String filename = args[0];
31 //extracts using yesterdays date
32 extractFromLogFile(filename);
33
34
35 if(debug) {
36 int count = 0;
37 for (MacroniserLogFileData entry : extractedEntries) {
38 printMessage((++count) + ": " + entry.toString());
39 }
40 }
41
42 if(exportToDB()) {
43 System.out.println("Success. Move the log file " + filename + " to processed folder");
44 System.exit(0);
45 } else {
46 System.err.println("FAILED. Move the log file " + filename + " to reprocess folder");
47 System.exit(-1);
48 }
49 }
50
51 //Goal is to extract information from here to DB
52 private static boolean exportToDB() {
53
54 boolean success = false;
55 MySQLAccess sqlAccess = new MySQLAccess();
56
57 try {
58
59 sqlAccess.makeConnection();
60
61 System.out.println("Sending " + extractedEntries.size() + " entries to DB...");
62
63 //loop through all entries
64 for (MacroniserLogFileData entry : extractedEntries) {
65 //get the marked words from first entry
66 ArrayList<String> markedWords = getMarkedWordsFromOutput(entry.getOutputText());
67 // all these markedWords share the same date and time
68 LocalDate date = entry.getDate();
69 LocalTime time = entry.getTime();
70
71 if (markedWords == null) { // no words, enter NULL into db
72 if(debug) {
73 printMessage("word=NULL");
74 printMessage(date.toString());
75 printMessage(time.format(DateTimeFormatter.ofPattern("HH:mm:ss")));
76 printMessage(""); // newline
77 }
78
79 MySQLAccess.Tuple tuple = new MySQLAccess.Tuple(null,date,time);
80 // b. add to DB
81 sqlAccess.addNewEntry(tuple);
82 } else { // process all the words
83 for (String word : markedWords) {
84 if(debug) {
85 printMessage(word);
86 printMessage(date.toString());
87 printMessage(time.format(DateTimeFormatter.ofPattern("HH:mm:ss")));
88 printMessage(""); // newline
89 }
90
91 //insert all into database:
92 // a. create the tuple: always entering into db as lowercase so we don't consider tō different from Tō
93 // whether when inserting into DB or searching for the term
94 MySQLAccess.Tuple tuple = new MySQLAccess.Tuple(word.toLowerCase(),date,time);
95 // b. add to DB
96 sqlAccess.addNewEntry(tuple);
97 }
98
99 }
100 }
101 success = true;
102
103 } catch (Exception e) {
104 e.printStackTrace(); // goes to std.err, see https://stackoverflow.com/questions/12095378/difference-between-e-printstacktrace-and-system-out-printlne
105 success = false;
106
107 } finally {
108 sqlAccess.closeConnection();
109
110 }
111 return success;
112 }
113
114 //returns an array list of all marked words, null if there are none
115 private static ArrayList<String> getMarkedWordsFromOutput(String outputText) {
116 if(outputText == null) { return null; }
117
118 final Pattern TAG_REGEXP = Pattern.compile("<mark>(.+?)</mark>", Pattern.DOTALL);
119 final Matcher matcher = TAG_REGEXP.matcher(outputText);
120 ArrayList<String> markedWords = new ArrayList<>();
121 //find all values from matches
122 while (matcher.find()){
123 String s = matcher.group(1);
124 markedWords.add(s);
125 }
126 return markedWords;
127 }
128
129 //reads the log file and creates a list of data
130 private static void extractFromLogFile(String filename) {
131
132 BufferedReader br = null;
133 try {
134 br = new BufferedReader(new FileReader(filename));
135 extractedEntries = new ArrayList<>();
136
137 boolean firstTagFound = false;
138 String line = "";
139 StringBuilder extractedSB = new StringBuilder();
140
141 while ((line = br.readLine())!= null){
142 if(!firstTagFound){
143 //check for tag
144 firstTagFound = doesLineMatch(line);
145 //add to list if the first match
146 if(firstTagFound) {
147 extractedSB.append(line);
148 continue;
149 }
150 }else {
151 //if line is a match, remove and keep
152 if(doesLineMatch(line)){
153 //turn the string builder text into MacroniserLogFileData object
154 MacroniserLogFileData entry = transformLogFileStringToObject(extractedSB.toString());
155 //perform operations if not null, else error
156 if(entry != null) {
157 extractedEntries.add(entry);
158 //remove old content and keep new tag
159 extractedSB.setLength(0);
160 extractedSB.append(line);
161 } else{
162 //printErrorMsg("Parsing of entry in log file found an error, continuing on next lines...");
163 extractedSB.setLength(0);
164 extractedSB.append(line);
165 }
166
167 }else {
168 //keep the line
169 extractedSB.append(line);
170 }
171 }
172 }
173
174 } catch (Exception ex) {
175 ex.printStackTrace();
176 } finally {
177 if(br != null){
178 try {
179 br.close();
180 } catch (IOException e) {
181 e.printStackTrace();
182 }
183 }
184 }
185
186 }
187
188 //Changes the string into a MacorniserLogFileData object
189 private static MacroniserLogFileData transformLogFileStringToObject(String extractedString) {
190 MacroniserLogFileData entry;
191 LocalDate entryDate;
192 LocalTime entryTime;
193 String entryInput, entryOutput;
194
195 //Get the input output portion of the string
196 String input_output = "";
197 if (extractedString.charAt(0) == 'I') {
198
199 //Get the date and time of entry
200 int INDEX_STARTOF_DATE = 8, INDEX_ENDOF_DATE = 18, INDEX_STARTOF_TIME = 19, INDEX_ENDOF_TIME = 27;
201
202 entryDate = LocalDate.parse(extractedString.substring(INDEX_STARTOF_DATE, INDEX_ENDOF_DATE));
203
204 entryTime = LocalTime.parse(extractedString.substring(INDEX_STARTOF_TIME, INDEX_ENDOF_TIME));
205
206 //Get the input and output
207 input_output = extractedString.replaceAll(directInputRegexp, "").trim();
208
209 int INDEX_STARTOF_INPUT = 6;
210 int INDEX_ENDOF_INPUT = getEndofInputIndex(input_output);
211 int INDEX_STARTOF_OUTPUT = INDEX_ENDOF_INPUT + 8;
212 int INDEX_ENDOF_OUTPUT = input_output.length();
213 if (INDEX_ENDOF_INPUT == -1) {
214 entryInput = null;
215 entryOutput = null;
216
217 } else {
218 entryInput = input_output.substring(INDEX_STARTOF_INPUT, INDEX_ENDOF_INPUT);
219 entryOutput = input_output.substring(INDEX_STARTOF_OUTPUT, INDEX_ENDOF_OUTPUT);
220 }
221 entry = new MacroniserLogFileData(entryDate, entryTime, entryInput, entryOutput);
222
223 return entry;
224
225 } else if (extractedString.charAt(0) == 'E') {
226 input_output = extractedString.replaceAll(fileUploadRegexp, "");
227 return null;
228 } else {
229 printErrorMsg("Unknown entry in log file.");
230 return null;
231 }
232 }
233
234 //gets the end of input index, protects against log file 'input' that includes the tag
235 private static int getEndofInputIndex(String str) {
236 ArrayList<Integer> indexes = new ArrayList<>();
237 String outputTag = "Output: ";
238
239 //loop through and find all "Output: " Strings
240 int i = str.indexOf(outputTag);
241 while(i >= 0){
242 indexes.add(i);
243 i = str.indexOf(outputTag, i+1);
244 }
245
246 //return the correct index
247 if(indexes.size() >= 1){
248 int middle_index = (indexes.size() / 2);
249 return indexes.get(middle_index);
250 } else {
251 printMessage("Warning: No output tag could be found. Probably NULL input.");
252 return -1;
253 }
254 }
255
256 //Does line match the regexp
257 private static boolean doesLineMatch(String line) {
258 return (line.matches(directInputRegexp) || line.matches(fileUploadRegexp));
259 }
260
261 //checks the arguments
262 private static void checkArgs(int numArgs) {
263 if( numArgs != 1){
264 printErrorMsg("Incorrect number of arguments given - "+numArgs);
265 printUsage();
266 }
267 }
268
269 //Prints a message to std output
270 private static void printMessage(String msg){
271 System.out.println(msg);
272 }
273
274 //Prints the error message to "std error"
275 private static void printErrorMsg(String msg){
276 System.err.println("ERROR: "+ msg);
277 }
278
279 //Prints the usage of this program to "std error"
280 private static void printUsage(){
281 System.err.println("Usage: ");
282 System.err.println("\tjava LogFileExtraction <log file>");
283 System.err.println("\tWhere 'log file' is the log file of the Macroniser - macron.log");
284 }
285}
Note: See TracBrowser for help on using the repository browser.