/**
*#########################################################################
*
* A component of the Gatherer application, part of the Greenstone digital
* library suite from the New Zealand Digital Library Project at the
* University of Waikato, New Zealand.
*
*
*
* Author: John Thompson, Greenstone Digital Library, University of Waikato
*
*
*
* Copyright (C) 1999 New Zealand Digital Library Project
*
*
*
* This program is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
*
*
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
*
*
* You should have received a copy of the GNU General Public License
* along with this program; if not, write to the Free Software
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
*########################################################################
*/
/* GPL_HEADER */
package org.greenstone.gatherer.util;
/**************************************************************************************
* Title: Gatherer
* Description: The Gatherer: a tool for gathering and enriching a digital collection.
* Company: The University of Waikato
* Written: 28/08/02
* Revised:
* @author John Thompson, 9826509
* @author Michael Gilleland, Merriam Park Software
* @version 2.3
**************************************************************************************/
/** Determines the MED between two metadata element names.
* Adapted from code by Michael Gilleland, Merriam Park Software, as detailed in a short essay called "Levenshtein Distance, in Three Flavors" available at http://www.merriampark.com/ld.htm.
*/
public class MED {
//****************************
// Get minimum of three values
//****************************
static private int Minimum (int a, int b, int c) {
int mi = a;
if (b < mi) {
mi = b;
}
if (c < mi) {
mi = c;
}
return mi;
}
//*****************************
// Compute Levenshtein distance
//*****************************
static public int LD (String s, String t) {
int d[][]; // matrix
int n; // length of s
int m; // length of t
int i; // iterates through s
int j; // iterates through t
char s_i; // ith character of s
char t_j; // jth character of t
int cost; // cost
// Step 1
// Set n to be the length of s.
// Set m to be the length of t.
// If n = 0, return m and exit.
// If m = 0, return n and exit.
// Construct a matrix containing 0..m rows and 0..n columns.
n = s.length ();
m = t.length ();
if (n == 0) {
return m;
}
if (m == 0) {
return n;
}
d = new int[n+1][m+1];
// Step 2
// Initialize the first row to 0..n.
// Initialize the first column to 0..m.
for (i = 0; i <= n; i++) {
d[i][0] = i;
}
for (j = 0; j <= m; j++) {
d[0][j] = j;
}
// Step 3
// Examine each character of s (i from 1 to n).
for (i = 1; i <= n; i++) {
s_i = s.charAt (i - 1);
// Step 4
// Examine each character of t (j from 1 to m).
for (j = 1; j <= m; j++) {
t_j = t.charAt (j - 1);
// Step 5
// If s[i] equals t[j], the cost is 0.
// If s[i] doesn't equal t[j], the cost is 1.
if (s_i == t_j) {
cost = 0;
}
else {
cost = 1;
}
// Step 6
// Set cell d[i,j] of the matrix equal to the minimum of:
// a. The cell immediately above plus 1: d[i-1,j] + 1.
// b. The cell immediately to the left plus 1: d[i,j-1] + 1.
// c. The cell diagonally above and to the left plus the cost: d[i-1,j-1] + cost.
d[i][j] = Minimum (d[i-1][j]+1, d[i][j-1]+1, d[i-1][j-1] + cost);
}
}
// Step 7
// After the iteration steps (3, 4, 5, 6) are complete, the distance is found in cell d[n,m].
int result = d[n][m];
d = null;
return result;
}
}