Changeset 28312 for gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest2.java
- Timestamp:
- 2013-09-26T11:13:14+12:00 (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/HadoopGreenstoneIngest2.java
r28192 r28312 13 13 import java.net.InetAddress; 14 14 import java.nio.channels.FileChannel; 15 import java.util.Collection; 15 16 import java.util.Iterator; 17 import java.util.List; 16 18 import java.util.Map; 19 import java.util.StringTokenizer; 17 20 import java.util.regex.Matcher; 18 21 import java.util.regex.Pattern; … … 21 24 import org.apache.hadoop.conf.*; 22 25 import org.apache.hadoop.io.*; 26 import org.apache.hadoop.mapred.ClusterStatus; 27 import org.apache.hadoop.mapred.JobClient; 28 import org.apache.hadoop.mapred.JobConf; 23 29 import org.apache.hadoop.mapreduce.*; 24 30 import org.apache.hadoop.mapreduce.Mapper.Context; … … 127 133 extends FileInputFormat<Text, Text> 128 134 { 135 136 private String[] getActiveServersList(JobContext context) 137 { 138 String [] servers = null; 139 try 140 { 141 JobClient jc = new JobClient((JobConf)context.getConfiguration()); 142 ClusterStatus status = jc.getClusterStatus(true); 143 Collection<String> atc = status.getActiveTrackerNames(); 144 servers = new String[atc.size()]; 145 int s = 0; 146 for (String serverInfo : atc) 147 { 148 StringTokenizer st = new StringTokenizer(serverInfo, ":"); 149 String trackerName = st.nextToken(); 150 StringTokenizer st1 = new StringTokenizer(trackerName, "_"); 151 st1.nextToken(); 152 servers[s++] = st1.nextToken(); 153 } 154 } 155 catch (IOException e) 156 { 157 e.printStackTrace(); 158 } 159 System.err.print("Servers: "); 160 String sep = ""; 161 for (Object obj : servers) 162 { 163 System.err.print(sep + obj.toString()); 164 sep = ", "; 165 } 166 System.err.println(""); 167 return servers; 168 } 169 /** getActiveServersList() **/ 170 171 /** 172 */ 173 public List<InputSplit> getSplits(JobContext job) 174 throws IOException 175 { 176 System.err.println("GSFileInputFormat::getSplits()"); 177 // get splits 178 List<InputSplit> original_splits = super.getSplits(job); 179 // Get active servers 180 String[] servers = getActiveServersList(job); 181 if(servers == null) 182 { 183 return null; 184 } 185 // done 186 System.err.println("Splits: "); 187 for (InputSplit obj : original_splits) 188 { 189 System.err.println(obj.toString()); 190 } 191 return original_splits; 192 } 193 /** getSplits() **/ 194 129 195 /** 130 196 * Don't split the files
Note:
See TracChangeset
for help on using the changeset viewer.