- Timestamp:
- 2019-10-14T22:40:22+13:00 (4 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs3-extensions/maori-lang-detection/hdfs-cc-work/scripts/batchcrawl.sh
r33566 r33567 81 81 crawlId=$2 82 82 83 echo "Processing crawlId: $crawlId"83 echo "Processing siteDir $siteDir with crawlId: $crawlId" 84 84 85 85 if [ -d "$outputDir/$crawlId" ]; then … … 111 111 112 112 prepareForCrawl $siteDir $crawlId 113 break113 #break 114 114 done 115 115 } … … 141 141 crawlId=`printf %05d $COUNTER` 142 142 #echo $COUNTER - $crawlId 143 siteDir=$sitesDir/$crawlId 144 echo "siteDir $siteDir (crawlId $crawlId)" 143 # $siteDir needs / at end to work with existing code 144 siteDir=$sitesDir/$crawlId/ 145 #echo "siteDir $siteDir (crawlId $crawlId)" 146 prepareForCrawl $siteDir $crawlId 145 147 done 146 148 147 149 } 148 150 151 function printUsage() { 152 echo "Run as:" 153 echo " $0 -all|<ids>" 154 echo " where an id is a folder name in to_crawl/sites" 155 echo " and ids can be a comma separated list of" 156 echo " individual ids and/or ranges" 157 echo " Examples:" 158 echo " $0 00008-00022,00025,00026,00028-00034" 159 echo " $0 -all" 160 echo " $0 00312" 161 162 } 163 149 164 function parseArgs() { 165 150 166 # for i in $*; do 151 167 # echo "Pinky" … … 154 170 155 171 args="$1" 156 echo "Got arg string: $args" 157 172 #echo "Got arg string: $args" 173 174 if [ "x$args" = "x" ]; then 175 printUsage 176 exit 0 177 fi 178 158 179 # works - split args on comma 159 180 # https://stackoverflow.com/questions/918886/how-do-i-split-a-string-on-a-delimiter-in-bash 160 181 IFS=', ' read -ra IDS <<< "$args" 161 182 for id in "${IDS[@]}"; do 162 163 if [[ $id == *"-"* ]]; then 183 echo "id: |$id|" 184 if [ "x$id" = "x-all" ]; then 185 echo "crawlAll" 186 crawlAll 187 elif [[ $id == *"-"* ]]; then 188 # https://stackoverflow.com/questions/229551/how-to-check-if-a-string-contains-a-substring-in-bash 164 189 echo "$id is a range" 165 190 startCrawlId=${id%%-*} 166 191 endCrawlId=${id##*-} 167 192 echo "crawlRange $startCrawlId $endCrawlId" 168 crawlRange $startCrawlId $endCrawlId 193 crawlRange $startCrawlId $endCrawlId 194 169 195 else 170 196 echo "$id is singleton" 171 197 crawlId=$id 172 siteDir=$sitesDir/$crawlId 198 # $siteDir needs / at end to work with existing code 199 siteDir=$sitesDir/$crawlId/ 173 200 echo "prepareForCrawl $siteDir $crawlId" 174 #prepareForCrawl $siteDir $crawlId201 prepareForCrawl $siteDir $crawlId 175 202 fi 176 203 done 177 178 179 180 } 181 204 } 205 206 207 208 # Passing as string instead of individual arguments 209 # Because one can only have 9 individual args without using shift, see 210 # https://www.unix.com/shell-programming-and-scripting/57225-there-limit-no-arguments-shell-script.html 211 212 parseArgs "$*" 213 214 # old. testing 182 215 #crawlRange "00010" "00015" 183 216 #parseArgs "00008-00022,00025,00026,00028-00034" 184 parseArgs "$*"185 # can only have 9 args without using shift, see186 # https://stackoverflow.com/questions/229551/how-to-check-if-a-string-contains-a-substring-in-bash187 #parseArgs horse cow dog cat
Note:
See TracChangeset
for help on using the changeset viewer.