Ignore:
Timestamp:
2019-11-15T00:21:31+13:00 (4 years ago)
Author:
ak19
Message:

Changes to support the top 5 predicted langcodes and their confidence values per sentence/overlapping sentence (all 103 made some documents, like of site 00006, too big too go into mongodb). Have re-run the NutchTextDumpToMongDB to send the new form of the docs into mongodb.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • other-projects/maori-lang-detection/src/org/greenstone/atea/morphia/SentenceInfo.java

    r33653 r33674  
    11package org.greenstone.atea.morphia;
     2
     3import java.util.Map;
     4import java.util.HashMap;
    25
    36import dev.morphia.annotations.*;
    47
     8
     9@Entity("Sentences")
     10public class SentenceInfo {
     11
     12    public final String sentence;
     13    public final Map<String, Double> languageToConfidenceMap;
     14    @Embedded
     15    public final LanguageInfo[] languagesInfo; // array of langCode and confidence value pairs
     16
     17   
     18    public SentenceInfo(String sentence, LanguageInfo[] languages) {
     19    this.sentence = sentence;
     20    this.languagesInfo = languages;
     21
     22    // let's store (langCode -> confidence) lookup in Map:
     23    this.languageToConfidenceMap = new HashMap<String, Double>();   
     24    for(LanguageInfo li : languages) {
     25        String langCode = li.langCode;
     26        Double confidence = new Double(li.confidenceLevel);
     27        languageToConfidenceMap.put(langCode, confidence);
     28    }
     29    }
     30   
     31}
     32
     33// BACK WHEN WE ONLY STORED THE BEST PREDICTED LANGUAGE META FOR EACH SENTENCE:
     34/*
    535@Entity("Sentences")
    636public class SentenceInfo {
    737    public final double confidenceLevel;
    8     /** 3 letter lang code */
     38    // 3 letter lang code
    939    public final String langCode;
    1040    public final String sentence;
     
    1646    }
    1747}
     48*/
Note: See TracChangeset for help on using the changeset viewer.