Changeset 36859


Ignore:
Timestamp:
2022-10-22T15:41:14+13:00 (18 months ago)
Author:
davidb
Message:

Coding developments that mean param passed arousal and valence values not used; query_resutls_ capped to max_docs_

Location:
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3
Files:
3 edited

Legend:

Unmodified
Added
Removed
  • gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/service/GS2WekaDBSearch.java

    r36857 r36859  
    5151    protected static final String RADIUS_PARAM  = "radius";
    5252    protected static final String MAXDOCS_PARAM = "maxDocs";
     53    protected static final String AROUSAL_PARAM  = "arousal";
     54    protected static final String VALENCE_PARAM  = "valence";
    5355
    5456    //protected static final String WEKA_MODEL_DEFAULT_DIRECTORY = "weka-model";
     
    115117        setStandardQueryParams (params); // ****
    116118       
    117         this.wekadb_src.runQuery (weka_db_index_dir, KNN_FEATURES_FILENAME, assoc_index_dir, query);
    118         Vector docs = this.wekadb_src.getQueryResult ();
     119        this.wekadb_src.runQuery(weka_db_index_dir, KNN_FEATURES_FILENAME, assoc_index_dir, query);
     120        Vector docs = this.wekadb_src.getQueryResult();
    119121
    120122        if (docs.isEmpty()) {
     
    145147        result.appendChild (document_list);
    146148        for (int d = 0; d < docs.size (); d++) {
    147             WekaDBDocInfo adb_doc = (WekaDBDocInfo) docs.elementAt(d);
    148 
    149             String doc_id  = adb_doc.getDocID();
    150             double rank    = adb_doc.getTopRank();
    151             String offsets = adb_doc.getOffsetList();
     149            WekaDBDocInfo wdb_doc = (WekaDBDocInfo) docs.elementAt(d);
     150
     151            String doc_id  = wdb_doc.getDocID();
     152            double rank    = wdb_doc.getTopRank();
     153            String offsets = wdb_doc.getOffsetList();
    152154
    153155            Element doc_node = createDocNode (result_doc, doc_id, Double.toString (rank));
     
    191193        int docs = Integer.parseInt(value);
    192194        this.wekadb_src.setMaxDocs(docs);
     195        }
     196        else if (name.equals(AROUSAL_PARAM)) {
     197        double arousal = Double.parseDouble(value);
     198        this.wekadb_src.setArousal(arousal);
     199        }       
     200        else if (name.equals(VALENCE_PARAM)) {
     201        double valence = Double.parseDouble(value);
     202        this.wekadb_src.setValence(valence);       
    193203        } // ignore any others
    194204    }
  • gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaDBWrapper.java

    r36857 r36859  
    4545
    4646    protected int offset_ = 100;
    47     protected int length_ = 20;
     47    protected int length_ = 20; // **** Unused
    4848   
    4949    // Approximate matching not yet utilized
    50     protected double radius_;
     50    protected double radius_;   // **** Unused
    5151
    5252    protected int max_docs_;
    5353
     54    protected double arousal_;
     55    protected double valence_;
     56   
    5457    static Logger logger = Logger.getLogger (org.greenstone.gsdl3.util.WekaDBWrapper.class.getName ());
    5558
     
    7679    radius_ = radius;
    7780    }
    78 
     81   
    7982    public void setMaxDocs(int max_docs) {
    8083    max_docs_ = max_docs;
    8184    }
    8285
     86    public void setArousal(double arousal) {
     87    arousal_ = arousal;
     88    }
     89    public void setValence(double valence) {
     90    valence_ = valence;
     91    }
     92   
    8393    /** returns a string with all the current query param settings */
    8494    // the following was in MG version, do we need this in WekaDB version? // ****
     
    142152    WekaFindInstanceKNN.init(full_knn_model_filename);
    143153
    144     //Instances nearest_instances = WekaFindInstanceKNN.kNearestNeighbours("ds_22716_5743",6,-0.549489,-0.118439,3);
    145     String doc_id = query_string;
    146 
    147     int segment = 6;
    148     //String segment_str = Integer.toString(segment);
    149     String doc_id_segment =  doc_id +"-" + segment;
    150 
    151     double arousal_val = -0.549489;
    152     double valence_val =  -0.118439;
    153     int k_nearest_num = 3;
    154    
    155     System.err.println("doc_id_segment = " + doc_id_segment);
    156 
    157    
    158     Instances nearest_instances = WekaFindInstanceKNN.kNearestNeighbours(doc_id_segment,arousal_val,valence_val,k_nearest_num);
    159 
    160        
    161     query_result_ = new Vector();
    162 
    163     /*
    164     WekaDBDocInfo wekaDB_doc_info1 = new WekaDBDocInfo("ds_20415_2337",0.9,4);
    165     query_result_.add(wekaDB_doc_info1);
    166 
    167     WekaDBDocInfo wekaDB_doc_info2 = new WekaDBDocInfo("ds_51017_15513",0.87,1);
    168     query_result_.add(wekaDB_doc_info2);
    169 
    170     WekaDBDocInfo wekaDB_doc_info3 = new WekaDBDocInfo("ds_20415_2337",0.82,6);
    171     query_result_.add(wekaDB_doc_info3);
    172     */
    173 
    174     int clamped_k_nearest_num = Math.min(k_nearest_num,nearest_instances.size());
    175    
    176     for (int i=0; i<clamped_k_nearest_num; i++) {
    177         Instance instance = nearest_instances.instance(i);
     154    String doc_id  = query_string;
     155    int    segment = offset_;
     156
     157    String query_doc_id_segment =  doc_id + "-" + segment;
     158
     159    double query_arousal_val = arousal_;
     160    double query_valence_val =  valence_;
     161
     162    int k_nearest_num = max_docs_;
     163    int expanded_k_nearest_num = max_docs_ * 5; // * internally get more matches, then sift through to arrive at the best 'max_docs_'
     164   
     165    Pattern doc_seg_re = Pattern.compile("^(\\w+)-(\\d+)$");
     166    //Matcher query_doc_seg_match = doc_seq_re.matcher(query_doc_id_segment);
     167   
     168    Instances nearest_instances
     169        = WekaFindInstanceKNN.kNearestNeighbours(query_doc_id_segment,query_arousal_val,query_valence_val,k_nearest_num);
     170
     171       
     172    Vector expanded_query_result = new Vector();
     173
     174    int nearest_instances_len = nearest_instances.size();
     175   
     176    int clamped_expanded_k_nearest_num = Math.min(expanded_k_nearest_num,nearest_instances_len);
     177
     178    double pos_penalty = 0.1;
     179   
     180    for (int ei=0; ei<clamped_expanded_k_nearest_num; ei++) {
     181        Instance instance = nearest_instances.instance(ei);
    178182        logger.info("\tProcessing returned instance: " + instance);
    179183       
    180         //Attribute doc_id_segment_att = instance.attribute(0);
    181         //String matching_doc_id_segment = instance.attribute(0).value(0);
    182184        String matching_doc_id_segment = instance.stringValue(0);
    183         //double arousal_val    = instance.attribute(1);
    184         //double valence_val    = instance.attribute(2);
    185 
    186 
    187         Pattern p = Pattern.compile("^(\\w+)-(\\d+)$");
    188         Matcher m = p.matcher(matching_doc_id_segment);
     185
     186       
     187        //Pattern p = Pattern.compile("^(\\w+)-(\\d+)$");
     188        Matcher m = doc_seg_re.matcher(matching_doc_id_segment);
    189189        if (m.matches()) {
    190190
    191191        String matching_doc_id = m.group(1);
    192192        int matching_segment_offset = Integer.parseInt(m.group(2));
    193        
    194         double matching_rank = 0.9;
     193
     194        if (matching_doc_id.equals(doc_id)) {
     195            continue;
     196        }
     197       
     198        double matching_arousal_val = instance.value(1);
     199        double matching_valence_val = instance.value(2);
     200       
     201        double matching_diff = (Math.abs(query_arousal_val - matching_arousal_val)
     202                    + Math.abs(query_valence_val - matching_valence_val))/4.0;
     203        double matching_rank = 1.0 - matching_diff - (pos_penalty * (double)ei);
    195204
    196205        logger.info("\tAdding in: matching_doc_id = " + matching_doc_id);
    197206        WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(matching_doc_id,matching_rank,matching_segment_offset);
    198         query_result_.add(wekaDB_doc_info);
     207        expanded_query_result.add(wekaDB_doc_info);
    199208        }
    200209        else {
    201         logger.error("Returned AV k-nearest neighbour match '"+doc_id_segment+"' could not be parsed as <doc-id>-<segment>" );
     210        logger.error("Returned AV k-nearest neighbour match '"+matching_doc_id_segment+"' could not be parsed as <doc-id>-<segment>" );
     211        }         
     212    }
     213   
     214    query_result_ = new Vector();
     215
     216    int i = 0;
     217    while (i < k_nearest_num) {
     218        if (i >= expanded_query_result.size()) {
     219        break;
    202220        }
    203     }
    204    
    205     /*
    206     int num_matches_within_track = 6;
    207 
    208 
    209     first_entry = addQueryResult(first_entry,root_doc_id,rankVector,offsetVector);
    210     // and now reset vectors to empty to be ready for next chain of values
    211     rankVector = new Vector<Double>();
    212     offsetVector = new Vector<Integer>();
    213 
    214 
    215     rankVector.add(rank);
    216     offsetVector.add(target_frame);
    217    
    218     addQueryResult(first_entry,root_doc_id,rankVector,offsetVector);
    219     */
    220 
     221       
     222        query_result_.add(expanded_query_result.get(i));
     223        i++;
     224    }
     225
     226    Collections.sort(query_result_);   
    221227    }
    222228   
  • gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaFindInstanceKNN.java

    r36853 r36859  
    9393    public static Instances kNearestNeighbours(String doc_id_segment, double arousal_val,double valence_val, int k_nearest_num)
    9494    {
     95    System.err.println("**** wekaFindInstnaceKNN::kNearestNeighbours() called with:");
     96    System.err.print(  "**** doc_id_segment = " + doc_id_segment);
     97    System.err.print(  " arousal_val = "   + arousal_val);
     98    System.err.print(  " valence_val = "   + valence_val);
     99    System.err.println(" k_nearest_num = " + k_nearest_num);
    95100   
    96101    Instance sample_instance = new DenseInstance(3);
     
    103108    //sample_instance.setValue(2, -0.118439);
    104109
    105     //String segment_str = Integer.toString(segment);
    106     //sample_instance.setValue(0, doc_id +"-" + segment_str);
    107110
    108111    sample_instance.setValue(0, doc_id_segment);
    109 
     112   
    110113    sample_instance.setValue(1, arousal_val);
    111114    sample_instance.setValue(2, valence_val);
Note: See TracChangeset for help on using the changeset viewer.