Changeset 36857 for gs3-extensions


Ignore:
Timestamp:
2022-10-22T10:09:15+13:00 (18 months ago)
Author:
davidb
Message:

Service now showing some basic functionality to retrieval songs

Location:
gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3
Files:
1 edited
1 moved

Legend:

Unmodified
Added
Removed
  • gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/service/GS2WekaDBSearch.java

    r36856 r36857  
    11/*
    2  *    GS2WekaAVRecommender.java
     2 *    GS2WekaDBSearch.java
    33 *    Copyright (C) 2011 New Zealand Digital Library, http://www.nzdl.org
    44 *
     
    4343// 'Search'
    4444
    45 public class GS2WekaAVRecommender extends AbstractGS2AudioSearch {
    46 
    47     static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2WekaAVRecommender.class.getName ());
     45public class GS2WekaDBSearch extends AbstractGS2AudioSearch {
     46
     47    static Logger logger = Logger.getLogger (org.greenstone.gsdl3.service.GS2WekaDBSearch.class.getName ());
    4848
    4949    protected static final String OFFSET_PARAM  = "offset";
     
    5252    protected static final String MAXDOCS_PARAM = "maxDocs";
    5353
    54     protected static final String WEKA_MODEL_DEFAULT_DIRECTORY = "weka-model";
    55     protected static final String KNN_MODEL_FILENAME = "av-and-features-knn.ser";
     54    //protected static final String WEKA_MODEL_DEFAULT_DIRECTORY = "weka-model";
     55    //protected static final String KNN_MODEL_FILENAME = "av-and-features-knn.ser";
     56
     57    protected static final String WEKA_DB_DEFAULT_DIRECTORY = "wekaDB";
     58    protected static final String KNN_FEATURES_FILENAME = "av-features.csv";
     59
    5660   
    5761    protected WekaDBWrapper wekadb_src = null;
    5862
    59     public GS2WekaAVRecommender() {
    60    
     63    public GS2WekaDBSearch() {
     64
    6165    if(this.wekadb_src == null) {
     66        logger.info("Initializing WekaDBWrapper");
    6267        this.wekadb_src = new WekaDBWrapper();
    6368        }
     
    6671    /** do the actual query */
    6772    protected Element processAudioQuery (Element request) {
    68     // MG needs to be synchronized (this inspiration for this class)
    69     // Since it is not known how concurrent Weka can be, play it safe for
    70     // now and also require synchronization
    71 
     73
     74    // As the MG version needs to be java-synchronized (this inspiration for this class)
     75    // And since it is not known how concurrent (thread-safe) Weka can be ...
     76    // => Play it safe for now and restrict access to 'wekadb_src' using synchronized also
    7277        synchronized(this.wekadb_src) {
    7378        // Create a new (empty) result message ('doc' is in ServiceRack.java)
     
    99104        String index = (String) params.get (INDEX_PARAM);
    100105        if (index == null) {
    101         index = WEKA_MODEL_DEFAULT_DIRECTORY;
    102         }
    103        
    104         // The location of the Weka model index
     106        index = WEKA_DB_DEFAULT_DIRECTORY;
     107        }
     108       
     109        // The location of the Weka db index
    105110        String toplevel_index_dir = GSFile.collectionIndexDir (this.site_home, this.cluster_name);
    106         String weka_model_index_dir = toplevel_index_dir + File.separatorChar + index;
     111        String weka_db_index_dir = toplevel_index_dir + File.separatorChar + index;
    107112        String assoc_index_dir = toplevel_index_dir + File.separatorChar + "assoc"; // ****
    108113
    109         // set the Weka model query parameters to the values the user has specified
     114        // set the Weka DB query parameters to the values the user has specified
    110115        setStandardQueryParams (params); // ****
    111116       
    112         this.wekadb_src.runQuery (weka_model_index_dir, KNN_MODEL_FILENAME, assoc_index_dir, query);
     117        this.wekadb_src.runQuery (weka_db_index_dir, KNN_FEATURES_FILENAME, assoc_index_dir, query);
    113118        Vector docs = this.wekadb_src.getQueryResult ();
    114119
    115120        if (docs.isEmpty()) {
    116121        // something has gone wrong
    117         GSXML.addError (result, "Couldn't query the Weka model", GSXML.ERROR_TYPE_SYSTEM);
     122        GSXML.addError (result, "Couldn't query the Weka DB", GSXML.ERROR_TYPE_SYSTEM);
    118123        return result;
    119124        }
  • gs3-extensions/mars-src/trunk/src/java/org/greenstone/gsdl3/util/WekaDBWrapper.java

    r35177 r36857  
    2222import java.util.Vector;
    2323import java.util.Collections;
     24import java.util.regex.Pattern;
     25import java.util.regex.Matcher;
     26
     27import weka.core.Attribute;
     28import weka.core.Instance;
     29import weka.core.Instances;
    2430
    2531import org.apache.log4j.*;
     32
     33import org.greenstone.gsdl3.util.WekaFindInstanceKNN;
    2634
    2735/** Java wrapper class for access to the Weka
     
    107115     * base_dir must end with a file separator (OS dependant)
    108116     */
     117
     118
    109119    public void runQuery(String wekaDB_index_dir, String knn_model_file,
     120             String assoc_index_dir, String query_string) {
     121
     122    // combine index_dir with audiodb fileanem
     123
     124    String full_knn_model_filename  = wekaDB_index_dir + File.separatorChar + knn_model_file;
     125
     126    //String full_chr12_filename = assoc_index_dir + File.separatorChar
     127    //    + query_string + File.separatorChar + "doc.chr12";
     128
     129    System.err.println("**** full knn model filename  = " + full_knn_model_filename);
     130
     131    // Example returned result from Weka KNN
     132    // => first line is the input instance ('filename+segment',Arousal,Valence)
     133    //    following (indented lines) nearest neighbour matches in same format
     134    //
     135    // ds_22716_5743-6,-0.549489,-0.118439
     136    //  ds_22716_5743-6,-0.549489,-0.118439
     137    //  ds_31008_6550-30,-0.549489,-0.118439
     138    //  ds_72651_26831-6,-0.549489,-0.118439
     139    //  ds_26196_9214-18,-0.549489,-0.118439
     140
     141
     142    WekaFindInstanceKNN.init(full_knn_model_filename);
     143
     144    //Instances nearest_instances = WekaFindInstanceKNN.kNearestNeighbours("ds_22716_5743",6,-0.549489,-0.118439,3);
     145    String doc_id = query_string;
     146
     147    int segment = 6;
     148    //String segment_str = Integer.toString(segment);
     149    String doc_id_segment =  doc_id +"-" + segment;
     150
     151    double arousal_val = -0.549489;
     152    double valence_val =  -0.118439;
     153    int k_nearest_num = 3;
     154   
     155    System.err.println("doc_id_segment = " + doc_id_segment);
     156
     157   
     158    Instances nearest_instances = WekaFindInstanceKNN.kNearestNeighbours(doc_id_segment,arousal_val,valence_val,k_nearest_num);
     159
     160       
     161    query_result_ = new Vector();
     162
     163    /*
     164    WekaDBDocInfo wekaDB_doc_info1 = new WekaDBDocInfo("ds_20415_2337",0.9,4);
     165    query_result_.add(wekaDB_doc_info1);
     166
     167    WekaDBDocInfo wekaDB_doc_info2 = new WekaDBDocInfo("ds_51017_15513",0.87,1);
     168    query_result_.add(wekaDB_doc_info2);
     169
     170    WekaDBDocInfo wekaDB_doc_info3 = new WekaDBDocInfo("ds_20415_2337",0.82,6);
     171    query_result_.add(wekaDB_doc_info3);
     172    */
     173
     174    int clamped_k_nearest_num = Math.min(k_nearest_num,nearest_instances.size());
     175   
     176    for (int i=0; i<clamped_k_nearest_num; i++) {
     177        Instance instance = nearest_instances.instance(i);
     178        logger.info("\tProcessing returned instance: " + instance);
     179       
     180        //Attribute doc_id_segment_att = instance.attribute(0);
     181        //String matching_doc_id_segment = instance.attribute(0).value(0);
     182        String matching_doc_id_segment = instance.stringValue(0);
     183        //double arousal_val    = instance.attribute(1);
     184        //double valence_val    = instance.attribute(2);
     185
     186
     187        Pattern p = Pattern.compile("^(\\w+)-(\\d+)$");
     188        Matcher m = p.matcher(matching_doc_id_segment);
     189        if (m.matches()) {
     190
     191        String matching_doc_id = m.group(1);
     192        int matching_segment_offset = Integer.parseInt(m.group(2));
     193       
     194        double matching_rank = 0.9;
     195
     196        logger.info("\tAdding in: matching_doc_id = " + matching_doc_id);
     197        WekaDBDocInfo wekaDB_doc_info = new WekaDBDocInfo(matching_doc_id,matching_rank,matching_segment_offset);
     198        query_result_.add(wekaDB_doc_info);
     199        }
     200        else {
     201        logger.error("Returned AV k-nearest neighbour match '"+doc_id_segment+"' could not be parsed as <doc-id>-<segment>" );
     202        }
     203    }
     204   
     205    /*
     206    int num_matches_within_track = 6;
     207
     208
     209    first_entry = addQueryResult(first_entry,root_doc_id,rankVector,offsetVector);
     210    // and now reset vectors to empty to be ready for next chain of values
     211    rankVector = new Vector<Double>();
     212    offsetVector = new Vector<Integer>();
     213
     214
     215    rankVector.add(rank);
     216    offsetVector.add(target_frame);
     217   
     218    addQueryResult(first_entry,root_doc_id,rankVector,offsetVector);
     219    */
     220
     221    }
     222   
     223    public void runQueryOLD(String wekaDB_index_dir, String knn_model_file,
    110224             String assoc_index_dir, String query_string) {
    111225
     
    120234    // ****
    121235    String [] cmd_array = new String[] {
    122         "java -jar weka.jar",
     236        "java", "-jar", "weka.jar",
    123237        "-d", full_knn_model_filename,
    124238        "-Q", "nsequence",
Note: See TracChangeset for help on using the changeset viewer.