source: gs2-extensions/parallel-building/trunk/src/src/java/org/nzdl/gsdl/GSPartitioner.java@ 27910

Last change on this file since 27910 was 27910, checked in by jmt12, 11 years ago

Extended the existing HadoopGreenstoneIngest with proper Reduce phase - this also required the creation of several new classes to handle the partitioning and grouping of Map phase output, and a class to wrap access to the txt2(t)db processes.

File size: 1.9 KB
Line 
1/******************************************************************************
2 *
3 * A component of the Greenstone digital library software from the New Zealand
4 * Digital Library Project at the # University of Waikato, New Zealand.
5 * Copyright (C) 2006 New Zealand Digital Library Project
6 *
7 * This program is free software; you can redistribute it and/or modify it
8 * under the terms of the GNU General Public License as published by the Free
9 * Software Foundation; either version 2 of the License, or (at your option)
10 * any later version.
11 *
12 * This program is distributed in the hope that it will be useful, but
13 * WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 * or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
15 * for more details.
16 *
17 * You should have received a copy of the GNU General Public License along
18 * with this program; if not, write to the Free Software Foundation, Inc.,
19 * 675 Mass Ave, Cambridge, MA 02139, USA.
20 *****************************************************************************/
21/** @author jmt12, GSDL **/
22package org.nzdl.gsdl;
23
24import org.apache.hadoop.io.Text;
25import org.apache.hadoop.mapreduce.Partitioner;
26
27/** @class GSPartitioner
28 */
29public class GSPartitioner
30 extends Partitioner<Text, Text>
31{
32
33 /** @function getPartition()
34 */
35 public int getPartition(Text key, Text value, int num_reduce_tasks)
36 {
37 int index = 0;
38 String key_string = key.toString();
39 String[] key_parts = key_string.split(" ");
40 String type = key_parts[0];
41 if (type.equals("datestamp"))
42 {
43 index = 1;
44 }
45 if (type.equals("doc"))
46 {
47 index = 2;
48 }
49 if (type.equals("rss"))
50 {
51 index = 3;
52 }
53 if (type.equals("src"))
54 {
55 index = 4;
56 }
57 return index % num_reduce_tasks;
58 }
59 /** getPartition(Text, Text, int) **/
60
61}
62/** class GSPartitioner **/
Note: See TracBrowser for help on using the repository browser.