1 | #!/usr/bin/env ruby
|
---|
2 | =begin
|
---|
3 | #
|
---|
4 | # Copyright (c) 2001,2003 Akinori MUSHA <[email protected]>
|
---|
5 | #
|
---|
6 | # All rights reserved. You can redistribute and/or modify it under
|
---|
7 | # the same terms as Ruby.
|
---|
8 | #
|
---|
9 | # $Idaemons: /home/cvs/rb/abbrev.rb,v 1.2 2001/05/30 09:37:45 knu Exp $
|
---|
10 | # $RoughId: abbrev.rb,v 1.4 2003/10/14 19:45:42 knu Exp $
|
---|
11 | # $Id: abbrev.rb 11708 2007-02-12 23:01:19Z shyouhei $
|
---|
12 | =end
|
---|
13 |
|
---|
14 | # Calculate the set of unique abbreviations for a given set of strings.
|
---|
15 | #
|
---|
16 | # require 'abbrev'
|
---|
17 | # require 'pp'
|
---|
18 | #
|
---|
19 | # pp Abbrev::abbrev(['ruby', 'rules']).sort
|
---|
20 | #
|
---|
21 | # <i>Generates:</i>
|
---|
22 | #
|
---|
23 | # [["rub", "ruby"],
|
---|
24 | # ["ruby", "ruby"],
|
---|
25 | # ["rul", "rules"],
|
---|
26 | # ["rule", "rules"],
|
---|
27 | # ["rules", "rules"]]
|
---|
28 | #
|
---|
29 | # Also adds an +abbrev+ method to class +Array+.
|
---|
30 |
|
---|
31 | module Abbrev
|
---|
32 |
|
---|
33 | # Given a set of strings, calculate the set of unambiguous
|
---|
34 | # abbreviations for those strings, and return a hash where the keys
|
---|
35 | # are all the possible abbreviations and the values are the full
|
---|
36 | # strings. Thus, given input of "car" and "cone", the keys pointing
|
---|
37 | # to "car" would be "ca" and "car", while those pointing to "cone"
|
---|
38 | # would be "co", "con", and "cone".
|
---|
39 | #
|
---|
40 | # The optional +pattern+ parameter is a pattern or a string. Only
|
---|
41 | # those input strings matching the pattern, or begging the string,
|
---|
42 | # are considered for inclusion in the output hash
|
---|
43 |
|
---|
44 | def abbrev(words, pattern = nil)
|
---|
45 | table = {}
|
---|
46 | seen = Hash.new(0)
|
---|
47 |
|
---|
48 | if pattern.is_a?(String)
|
---|
49 | pattern = /^#{Regexp.quote(pattern)}/ # regard as a prefix
|
---|
50 | end
|
---|
51 |
|
---|
52 | words.each do |word|
|
---|
53 | next if (abbrev = word).empty?
|
---|
54 | while (len = abbrev.rindex(/[\w\W]\z/)) > 0
|
---|
55 | abbrev = word[0,len]
|
---|
56 |
|
---|
57 | next if pattern && pattern !~ abbrev
|
---|
58 |
|
---|
59 | case seen[abbrev] += 1
|
---|
60 | when 1
|
---|
61 | table[abbrev] = word
|
---|
62 | when 2
|
---|
63 | table.delete(abbrev)
|
---|
64 | else
|
---|
65 | break
|
---|
66 | end
|
---|
67 | end
|
---|
68 | end
|
---|
69 |
|
---|
70 | words.each do |word|
|
---|
71 | next if pattern && pattern !~ word
|
---|
72 |
|
---|
73 | table[word] = word
|
---|
74 | end
|
---|
75 |
|
---|
76 | table
|
---|
77 | end
|
---|
78 |
|
---|
79 | module_function :abbrev
|
---|
80 | end
|
---|
81 |
|
---|
82 | class Array
|
---|
83 | # Calculates the set of unambiguous abbreviations for the strings in
|
---|
84 | # +self+. If passed a pattern or a string, only the strings matching
|
---|
85 | # the pattern or starting with the string are considered.
|
---|
86 | #
|
---|
87 | # %w{ car cone }.abbrev #=> { "ca" => "car", "car" => "car",
|
---|
88 | # "co" => "cone", "con" => cone",
|
---|
89 | # "cone" => "cone" }
|
---|
90 | def abbrev(pattern = nil)
|
---|
91 | Abbrev::abbrev(self, pattern)
|
---|
92 | end
|
---|
93 | end
|
---|
94 |
|
---|
95 | if $0 == __FILE__
|
---|
96 | while line = gets
|
---|
97 | hash = line.split.abbrev
|
---|
98 |
|
---|
99 | hash.sort.each do |k, v|
|
---|
100 | puts "#{k} => #{v}"
|
---|
101 | end
|
---|
102 | end
|
---|
103 | end
|
---|