/** *######################################################################### * * A component of the Gatherer application, part of the Greenstone digital * library suite from the New Zealand Digital Library Project at the * University of Waikato, New Zealand. * *

* * Author: John Thompson, Greenstone Digital Library, University of Waikato * *

* * Copyright (C) 1999 New Zealand Digital Library Project * *

* * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation; either version 2 of the License, or * (at your option) any later version. * *

* * This program is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. * *

* * You should have received a copy of the GNU General Public License * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. *######################################################################## */ /* GPL_HEADER */ package org.greenstone.gatherer.util; /************************************************************************************** * Title: Gatherer * Description: The Gatherer: a tool for gathering and enriching a digital collection. * Company: The University of Waikato * Written: 28/08/02 * Revised: * @author John Thompson, 9826509 * @author Michael Gilleland, Merriam Park Software * @version 2.3 **************************************************************************************/ /** Determines the MED between two metadata element names.
* Adapted from code by Michael Gilleland, Merriam Park Software, as detailed in a short essay called "Levenshtein Distance, in Three Flavors" available at http://www.merriampark.com/ld.htm. */ public class MED { //**************************** // Get minimum of three values //**************************** static private int Minimum (int a, int b, int c) { int mi = a; if (b < mi) { mi = b; } if (c < mi) { mi = c; } return mi; } //***************************** // Compute Levenshtein distance //***************************** static public int LD (String s, String t) { int d[][]; // matrix int n; // length of s int m; // length of t int i; // iterates through s int j; // iterates through t char s_i; // ith character of s char t_j; // jth character of t int cost; // cost // Step 1 // Set n to be the length of s. // Set m to be the length of t. // If n = 0, return m and exit. // If m = 0, return n and exit. // Construct a matrix containing 0..m rows and 0..n columns. n = s.length (); m = t.length (); if (n == 0) { return m; } if (m == 0) { return n; } d = new int[n+1][m+1]; // Step 2 // Initialize the first row to 0..n. // Initialize the first column to 0..m. for (i = 0; i <= n; i++) { d[i][0] = i; } for (j = 0; j <= m; j++) { d[0][j] = j; } // Step 3 // Examine each character of s (i from 1 to n). for (i = 1; i <= n; i++) { s_i = s.charAt (i - 1); // Step 4 // Examine each character of t (j from 1 to m). for (j = 1; j <= m; j++) { t_j = t.charAt (j - 1); // Step 5 // If s[i] equals t[j], the cost is 0. // If s[i] doesn't equal t[j], the cost is 1. if (s_i == t_j) { cost = 0; } else { cost = 1; } // Step 6 // Set cell d[i,j] of the matrix equal to the minimum of: // a. The cell immediately above plus 1: d[i-1,j] + 1. // b. The cell immediately to the left plus 1: d[i,j-1] + 1. // c. The cell diagonally above and to the left plus the cost: d[i-1,j-1] + cost. d[i][j] = Minimum (d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1] + cost); } } // Step 7 // After the iteration steps (3, 4, 5, 6) are complete, the distance is found in cell d[n,m]. int result = d[n][m]; d = null; return result; } }