source: gs2-extensions/ngramj/src/wiki/wiki2xml_all.sh@ 25141

Last change on this file since 25141 was 25141, checked in by papitha, 12 years ago

NGRAMJ PERL MODULE ADDED /MAORI LANGUAGE GUESSING WORKING WELL!!

File size: 548 bytes
Line 
1#!/bin/sh
2# Runs wiki2xml_command on all the files extracted by xmldump2files.py
3#
4# Evan Jones <[email protected]>
5# April, 2008
6# Released under a BSD licence.
7# http://evanjones.ca/software/wikipedia2text.html
8
9WIKI2XML="wiki2xml/php/wiki2xml_command.php"
10
11if [ -z $1 ]; then
12 echo wiki2xml_all.sh [directory]
13 exit 1
14fi
15
16# "Infinite" loops can happen in the parser: limit it to 2 minutes per file
17ulimit -t 120
18
19for i in `find $1 -type f | grep '\.txt$'`; do
20 OUT=`echo $i | sed 's/\.txt/.xml/'`
21 echo $i
22 php $WIKI2XML $i $OUT
23done
Note: See TracBrowser for help on using the repository browser.