Ignore:
Timestamp:
2019-02-05T23:03:16+13:00 (5 years ago)
Author:
ak19
Message:

More Western Wilson stuff. 1. Major changes to fix handling of utf8 stuff in db so uniqueness actually works: so finding (selecting) exact matches works and insert unique violations don't happen from code. Inserting is now made lowercase since only macrons matter and case doesn't. 2. The SQL db's MarkedWords table needs to specify the uniqueness of its utf8 marked_word column differently for the utf8-ness to work.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/the-macronizer/trunk/src/java/util/MacroniserLogFileProcessor.java

    r32742 r32745  
    1515
    1616public class MacroniserLogFileProcessor {
     17    static boolean debug = false;
    1718    /** ARGUMENTS:
    1819     *              /home/wjkw1/RESEARCH_2018-19/bash_test/loggingtest.log
     
    2627
    2728    public static void main(String[] args) {
    28         //TODO: re enable this method and add text to command line interface
    29         //checkArgs(args.length);
     29        checkArgs(args.length);
    3030        String filename = args[0];
    3131        //extracts using yesterdays date
    3232        extractFromLogFile(filename);
    3333
    34         //TODO: REMOVE
    35         int count = 0;
    36         for (MacroniserLogFileData entry: extractedEntries
    37              ) {
    38             printMessage((++count)+": "+entry.toString());
    39         }
     34
     35        if(debug) {
     36            int count = 0;
     37            for (MacroniserLogFileData entry : extractedEntries) {
     38                printMessage((++count) + ": " + entry.toString());
     39            }
     40        }
     41
    4042        if(exportToDB()) {
    41             // TODO: move the log file to processed folder
    42 
     43            System.out.println("Success. Move the log file " + filename + " to processed folder");
     44            System.exit(0);
    4345        } else {
    44             // TODO: move the log file to reprocess folder
     46            System.err.println("FAILED. Move the log file " + filename + " to reprocess folder");
     47            System.exit(-1);
    4548        }
    4649    }
     
    5760
    5861            //loop through all entries
    59             for (MacroniserLogFileData entry : extractedEntries
    60                     ) {
     62            for (MacroniserLogFileData entry : extractedEntries) {
    6163                //get the marked words from first entry
    6264                ArrayList<String> markedWords = getMarkedWordsFromOutput(entry.getOutputText());
    63                 if (markedWords != null) {
     65                // all these markedWords share the same date and time
     66                LocalDate date = entry.getDate();
     67                LocalTime time = entry.getTime();
     68
     69                if (markedWords == null) { // no words, enter NULL into db
     70                    if(debug) {
     71                        printMessage("word=NULL");
     72                        printMessage(date.toString());
     73                        printMessage(time.format(DateTimeFormatter.ofPattern("HH:mm:ss")));
     74                        printMessage(""); // newline
     75                    }
     76
     77                    MySQLAccess.Tuple tuple = new MySQLAccess.Tuple(null,date,time);
     78                    // b. add to DB
     79                    sqlAccess.addNewEntry(tuple);
     80                } else { // process all the words
    6481                    for (String word : markedWords) {
    65                         // create the tuple
    66                         MySQLAccess.Tuple tuple = new MySQLAccess.Tuple(word,entry.getDate(),entry.getTime());
    67                         //TODO: insert all into database
    68                         //insert all into database
    69                         printMessage(word);
    70                         printMessage(entry.getDate().toString());
    71                         printMessage(entry.getTime().format(DateTimeFormatter.ofPattern("HH:mm:ss")));
    72                         printMessage("");
    73 
     82                        if(debug) {
     83                            printMessage(word);
     84                            printMessage(date.toString());
     85                            printMessage(time.format(DateTimeFormatter.ofPattern("HH:mm:ss")));
     86                            printMessage(""); // newline
     87                        }
     88
     89                        //insert all into database:
     90                        // a. create the tuple: always entering into db as lowercase so we don't consider tō different from Tō
     91                        // whether when inserting into DB or searching for the term
     92                        MySQLAccess.Tuple tuple = new MySQLAccess.Tuple(word.toLowerCase(),date,time);
     93                        // b. add to DB
    7494                        sqlAccess.addNewEntry(tuple);
    7595                    }
     96
    7697                }
    7798            }
    7899            success = true;
    79100
    80         }catch (Exception e) {
    81             e.printStackTrace();
     101        } catch (Exception e) {
     102            e.printStackTrace(); // goes to std.err, see https://stackoverflow.com/questions/12095378/difference-between-e-printstacktrace-and-system-out-printlne
    82103            success = false;
    83104
     
    91112    //returns an array list of all marked words, null if there are none
    92113    private static ArrayList<String> getMarkedWordsFromOutput(String outputText) {
     114        if(outputText == null) { return null; }
     115
    93116        final Pattern TAG_REGEXP = Pattern.compile("<mark>(.+?)</mark>", Pattern.DOTALL);
    94117        final Matcher matcher = TAG_REGEXP.matcher(outputText);
     
    104127    //reads the log file and creates a list of data
    105128    private static void extractFromLogFile(String filename) {
    106         //TODO: change the date that is used
    107 //        LocalDate yesterday = LocalDate.now().minusDays(1L);
    108         LocalDate specifiedDate = LocalDate.of(2018,11,06);
    109129
    110130        BufferedReader br = null;
     
    133153                        //perform operations if not null, else error
    134154                        if(entry != null) {
    135                             ////check if entry date is after specified date
    136                             if(entry.getDate().isAfter(specifiedDate)){
    137                                 //stop processing
    138                                 break;
    139                             } else if (entry.getDate().isEqual(specifiedDate)){
    140                                 extractedEntries.add(entry);
    141                             }
     155                            extractedEntries.add(entry);
    142156                            //remove old content and keep new tag
    143157                            extractedSB.setLength(0);
    144158                            extractedSB.append(line);
    145159                        } else{
    146                             printErrorMsg("Parsing of entry in log file found an error, continuing on next lines...");
     160                            //printErrorMsg("Parsing of entry in log file found an error, continuing on next lines...");
    147161                            extractedSB.setLength(0);
    148162                            extractedSB.append(line);
     
    179193        //Get the input output portion of the string
    180194        String input_output = "";
    181         if(extractedString.charAt(0)=='I'){
     195        if (extractedString.charAt(0) == 'I') {
    182196
    183197            //Get the date and time of entry
     
    191205            input_output = extractedString.replaceAll(directInputRegexp, "").trim();
    192206
    193             int INDEX_STARTOF_INPUT = 6, INDEX_ENDOF_INPUT = getEndofInputIndex(input_output),
    194                     INDEX_STARTOF_OUTPUT = INDEX_ENDOF_INPUT + 8, INDEX_ENDOF_OUTPUT = input_output.length();
    195             entryInput = input_output.substring(INDEX_STARTOF_INPUT, INDEX_ENDOF_INPUT);
    196             entryOutput = input_output.substring(INDEX_STARTOF_OUTPUT, INDEX_ENDOF_OUTPUT);
    197 
     207            int INDEX_STARTOF_INPUT = 6;
     208            int INDEX_ENDOF_INPUT = getEndofInputIndex(input_output);
     209            int INDEX_STARTOF_OUTPUT = INDEX_ENDOF_INPUT + 8;
     210            int INDEX_ENDOF_OUTPUT = input_output.length();
     211            if (INDEX_ENDOF_INPUT == -1) {
     212                entryInput = null;
     213                entryOutput = null;
     214
     215            } else {
     216                entryInput = input_output.substring(INDEX_STARTOF_INPUT, INDEX_ENDOF_INPUT);
     217                entryOutput = input_output.substring(INDEX_STARTOF_OUTPUT, INDEX_ENDOF_OUTPUT);
     218            }
    198219            entry = new MacroniserLogFileData(entryDate, entryTime, entryInput, entryOutput);
     220
    199221            return entry;
    200222
    201         } else if (extractedString.charAt(0)=='E') {
     223        } else if (extractedString.charAt(0) == 'E') {
    202224            input_output = extractedString.replaceAll(fileUploadRegexp, "");
    203225            return null;
     
    225247            return indexes.get(middle_index);
    226248        } else {
    227             printErrorMsg("No output tag could be found, error in log file.");
     249            printMessage("Warning: No output tag could be found. Probably NULL input.");
    228250            return -1;
    229251        }
Note: See TracChangeset for help on using the changeset viewer.