1   /*
2    * CharacterCounter
3    * Copyright (C) 2008 Christian Schenk
4    *
5    * This program is free software; you can redistribute it and/or
6    * modify it under the terms of the GNU General Public License
7    * as published by the Free Software Foundation; either version 2
8    * of the License, or (at your option) any later version.
9    * 
10   * This program is distributed in the hope that it will be useful,
11   * but WITHOUT ANY WARRANTY; without even the implied warranty of
12   * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
13   * GNU General Public License for more details.
14   * 
15   * You should have received a copy of the GNU General Public License
16   * along with this program; if not, write to the Free Software
17   * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
18   */
19  package org.christianschenk.cc;
20  
21  import java.util.HashMap;
22  import java.util.Map;
23  import java.util.Set;
24  
25  import org.christianschenk.cc.util.StringIterator;
26  
27  /**
28   * Counts characters of strings.
29   * 
30   * @author Christian Schenk
31   */
32  public class CharacterCounter {
33  
34  	/** char -> count */
35  	private final Map<Character, Integer> counter;
36  	/** all counted characters */
37  	private int overallCharacterCount;
38  	private final boolean casesensitive;
39  	/** only the chars in here will be counted */
40  	private String alphabet;
41  
42  	/**
43  	 * Constructor
44  	 */
45  	public CharacterCounter() {
46  		this(false);
47  	}
48  
49  	/**
50  	 * Constructor
51  	 * 
52  	 * @param casesensitive
53  	 *            true if characters should be handled case sensitive, otherwise false
54  	 */
55  	public CharacterCounter(final boolean casesensitive) {
56  		this.counter = new HashMap<Character, Integer>();
57  		this.overallCharacterCount = 0;
58  		this.casesensitive = casesensitive;
59  		this.alphabet = "";
60  	}
61  
62  	/**
63  	 * The characters contained in the given string will be counted if they're in the alphabet.
64  	 */
65  	public void addString(final String str) {
66  		for (final char c : new StringIterator((this.casesensitive ? str : str.toLowerCase()))) {
67  			if (this.inAlphabet(c) == false) continue;
68  			this.addCharacter(c);
69  		}
70  	}
71  
72  	private void addCharacter(final char c) {
73  		this.counter.put(c, this.getCount(c) + 1);
74  		this.overallCharacterCount++;
75  	}
76  
77  	private boolean inAlphabet(final char c) {
78  		if (this.alphabet.length() == 0) return true;
79  		if (this.alphabet.contains("" + c)) return true;
80  		return false;
81  	}
82  
83  	/**
84  	 * Returns the set of distinct characters that where counted so far.
85  	 */
86  	public Set<Character> getCharacters() {
87  		return this.counter.keySet();
88  	}
89  
90  	/**
91  	 * Returns the count for a given character.
92  	 */
93  	public int getCount(final char c) {
94  		final Integer charCount = this.counter.get(c);
95  		if (charCount == null) return 0;
96  		return charCount;
97  	}
98  
99  	/**
100 	 * Returns the probability that the given character is in a string.
101 	 */
102 	public double getProbability(final char c) {
103 		if (this.overallCharacterCount == 0) return 0;
104 		return (double) this.getCount(c) / (double) this.overallCharacterCount;
105 	}
106 
107 	/**
108 	 * Sets the alphabet - only these characters will be counted.
109 	 */
110 	public void setAlphabet(final String alphabet) {
111 		this.alphabet = (this.casesensitive ? alphabet : alphabet.toLowerCase());
112 		this.alphabet = this.alphabet.trim();
113 	}
114 
115 	/**
116 	 * Prints some statistics.
117 	 */
118 	public void printStats() {
119 		for (final char c : this.counter.keySet()) {
120 			System.out.println(c + ": " + this.getCount(c) + " (" + this.getProbability(c) + ")");
121 		}
122 	}
123 }