Changeset 33377
- Timestamp:
- 2019-07-31T19:04:00+12:00 (5 years ago)
- Location:
- gs3-extensions/maori-lang-detection
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/maori-lang-detection/README.txt
r33358 r33377 262 262 https://stackoverflow.com/questions/36516363/sentence-detection-with-opennlp 263 263 264 -
gs3-extensions/maori-lang-detection/gen_SentenceDetection_model.sh
r33357 r33377 12 12 # because the 2011 one appears to have fewer accidentally incorporated English sentences 13 13 14 15 # Need to run this script from the top level folder of this extension 14 16 15 17 if [ ! -z $1 ]; then … … 68 70 #tail -100 $infile 69 71 72 # Ensure OPENNLP_HOME is set 73 if [ "x$OPENNLP_HOME" = "x" ]; then 74 echo "OPENNLP_HOME not set, attempting to set it to the local apache-opennlp (v1.9.1). ENSURE THIS EXISTS OR SET OPENNLP_HOME YOURSELF!" 75 #if [ -d apache-opennlp-* ]; then 76 cd apache-opennlp-* 77 if [ "x$?" = "x0" ]; then 78 export OPENNLP_HOME=`pwd` 79 cd .. 80 else 81 echo "No OPENNLP_HOME set and could not find a subfolder 'apache-opennlp-...' to set it to." 82 echo "Set OPENNLP_HOME yourself before running this script. Exitting..." 83 exit 84 fi 85 fi 70 86 71 87 mkdir -p $OPENNLP_HOME/training_data … … 100 116 # Note that I tried manually inserting \t, after copying the original line with tabspacing had no effect. Still no difference. 101 117 # Note 2: echo doesn't appear to preserve copied tab spaces. 102 118 # Answer: echo doesn't treat \n as newline and \t as tab and so on, unless the -e flag is passed in: 119 # echo -e "100000\tYWCA Boarding house : ÄwhinaServices and support Kei te pÅ«manawa o TÄmaki Makaurau a YMCA." | awk -F "\t" '{ print $2 }' 103 120 104 121 # 2. Create mri sentences model from training sentences file 105 122 #$OPENNLP_HOME/bin/opennlp SentenceDetectorTrainer -model mri-sent_trained.bin -lang en -data mri-sent.train -encoding UTF-8 106 123 107 if [ "x$OPENNLP_HOME" = "x" ]; then108 echo "OPENNLP_HOME not set, attempting to set it to apache-opennlp-1.9.1 (ENSURE THIS EXISTS OR SET OPENNLP_HOME YOURSELF!)"109 if [ -d apache-opennlp-* ]; then110 cd apache-opennlp-*111 export OPENNLP_HOME=`pwd`112 cd ..113 else114 echo "No OPENNLP_HOME set and could not find a subfolder 'apache-opennlp-...' to set it to."115 echo "Set OPENNLP_HOME yourself before running this script. Exitting..."116 fi117 fi118 124 119 125 mkdir -p $OPENNLP_HOME/models … … 132 138 echo "****************************" 133 139 echo "" 140
Note:
See TracChangeset
for help on using the changeset viewer.