001    /*
002     * Licensed to the Apache Software Foundation (ASF) under one or more
003     * contributor license agreements.  See the NOTICE file distributed with
004     * this work for additional information regarding copyright ownership.
005     * The ASF licenses this file to You under the Apache License, Version 2.0
006     * (the "License"); you may not use this file except in compliance with
007     * the License.  You may obtain a copy of the License at
008     *
009     *      http://www.apache.org/licenses/LICENSE-2.0
010     *
011     * Unless required by applicable law or agreed to in writing, software
012     * distributed under the License is distributed on an "AS IS" BASIS,
013     * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014     * See the License for the specific language governing permissions and
015     * limitations under the License.
016     */
017    package org.apache.commons.math.stat;
018    
019    import java.io.Serializable;
020    import java.text.NumberFormat;
021    import java.util.Iterator;
022    import java.util.Comparator;
023    import java.util.TreeMap;
024    
025    import org.apache.commons.math.MathRuntimeException;
026    import org.apache.commons.math.exception.util.LocalizedFormats;
027    
028    /**
029     * Maintains a frequency distribution.
030     * <p>
031     * Accepts int, long, char or Comparable values.  New values added must be
032     * comparable to those that have been added, otherwise the add method will
033     * throw an IllegalArgumentException.</p>
034     * <p>
035     * Integer values (int, long, Integer, Long) are not distinguished by type --
036     * i.e. <code>addValue(Long.valueOf(2)), addValue(2), addValue(2l)</code> all have
037     * the same effect (similarly for arguments to <code>getCount,</code> etc.).</p>
038     * <p>
039     * char values are converted by <code>addValue</code> to Character instances.
040     * As such, these values are not comparable to integral values, so attempts
041     * to combine integral types with chars in a frequency distribution will fail.
042     * </p>
043     * <p>
044     * The values are ordered using the default (natural order), unless a
045     * <code>Comparator</code> is supplied in the constructor.</p>
046     *
047     * @version $Revision: 1054186 $ $Date: 2011-01-01 03:28:46 +0100 (sam. 01 janv. 2011) $
048     */
049    public class Frequency implements Serializable {
050    
051        /** Serializable version identifier */
052        private static final long serialVersionUID = -3845586908418844111L;
053    
054        /** underlying collection */
055        private final TreeMap<Comparable<?>, Long> freqTable;
056    
057        /**
058         * Default constructor.
059         */
060        public Frequency() {
061            freqTable = new TreeMap<Comparable<?>, Long>();
062        }
063    
064        /**
065         * Constructor allowing values Comparator to be specified.
066         *
067         * @param comparator Comparator used to order values
068         */
069        @SuppressWarnings("unchecked") // TODO is the cast OK?
070        public Frequency(Comparator<?> comparator) {
071            freqTable = new TreeMap<Comparable<?>, Long>((Comparator<? super Comparable<?>>) comparator);
072        }
073    
074        /**
075         * Return a string representation of this frequency
076         * distribution.
077         *
078         * @return a string representation.
079         */
080        @Override
081        public String toString() {
082            NumberFormat nf = NumberFormat.getPercentInstance();
083            StringBuilder outBuffer = new StringBuilder();
084            outBuffer.append("Value \t Freq. \t Pct. \t Cum Pct. \n");
085            Iterator<Comparable<?>> iter = freqTable.keySet().iterator();
086            while (iter.hasNext()) {
087                Comparable<?> value = iter.next();
088                outBuffer.append(value);
089                outBuffer.append('\t');
090                outBuffer.append(getCount(value));
091                outBuffer.append('\t');
092                outBuffer.append(nf.format(getPct(value)));
093                outBuffer.append('\t');
094                outBuffer.append(nf.format(getCumPct(value)));
095                outBuffer.append('\n');
096            }
097            return outBuffer.toString();
098        }
099    
100        /**
101         * Adds 1 to the frequency count for v.
102         * <p>
103         * If other objects have already been added to this Frequency, v must
104         * be comparable to those that have already been added.
105         * </p>
106         *
107         * @param v the value to add.
108         * @throws IllegalArgumentException if <code>v</code> is not Comparable,
109         *         or is not comparable with previous entries
110         * @deprecated use {@link #addValue(Comparable)} instead
111         */
112        @Deprecated
113        public void addValue(Object v) {
114            if (v instanceof Comparable<?>){
115                addValue((Comparable<?>) v);
116            } else {
117                throw MathRuntimeException.createIllegalArgumentException(
118                      LocalizedFormats.CLASS_DOESNT_IMPLEMENT_COMPARABLE,
119                      v.getClass().getName());
120            }
121        }
122    
123        /**
124         * Adds 1 to the frequency count for v.
125         * <p>
126         * If other objects have already been added to this Frequency, v must
127         * be comparable to those that have already been added.
128         * </p>
129         *
130         * @param v the value to add.
131         * @throws IllegalArgumentException if <code>v</code> is not comparable with previous entries
132         */
133        public void addValue(Comparable<?> v){
134            Comparable<?> obj = v;
135            if (v instanceof Integer) {
136               obj = Long.valueOf(((Integer) v).longValue());
137            }
138            try {
139                Long count = freqTable.get(obj);
140                if (count == null) {
141                    freqTable.put(obj, Long.valueOf(1));
142                } else {
143                    freqTable.put(obj, Long.valueOf(count.longValue() + 1));
144                }
145            } catch (ClassCastException ex) {
146                //TreeMap will throw ClassCastException if v is not comparable
147                throw MathRuntimeException.createIllegalArgumentException(
148                      LocalizedFormats.INSTANCES_NOT_COMPARABLE_TO_EXISTING_VALUES,
149                      v.getClass().getName());
150            }
151        }
152    
153        /**
154         * Adds 1 to the frequency count for v.
155         *
156         * @param v the value to add.
157         */
158        public void addValue(int v) {
159            addValue(Long.valueOf(v));
160        }
161    
162        /**
163         * Adds 1 to the frequency count for v.
164         *
165         * @param v the value to add.
166         * @deprecated to be removed in math 3.0
167         */
168        @Deprecated
169        public void addValue(Integer v) {
170            addValue(Long.valueOf(v.longValue()));
171        }
172    
173        /**
174         * Adds 1 to the frequency count for v.
175         *
176         * @param v the value to add.
177         */
178        public void addValue(long v) {
179            addValue(Long.valueOf(v));
180        }
181    
182        /**
183         * Adds 1 to the frequency count for v.
184         *
185         * @param v the value to add.
186         */
187        public void addValue(char v) {
188            addValue(Character.valueOf(v));
189        }
190    
191        /** Clears the frequency table */
192        public void clear() {
193            freqTable.clear();
194        }
195    
196        /**
197         * Returns an Iterator over the set of values that have been added.
198         * <p>
199         * If added values are integral (i.e., integers, longs, Integers, or Longs),
200         * they are converted to Longs when they are added, so the objects returned
201         * by the Iterator will in this case be Longs.</p>
202         *
203         * @return values Iterator
204         */
205        public Iterator<Comparable<?>> valuesIterator() {
206            return freqTable.keySet().iterator();
207        }
208    
209        //-------------------------------------------------------------------------
210    
211        /**
212         * Returns the sum of all frequencies.
213         *
214         * @return the total frequency count.
215         */
216        public long getSumFreq() {
217            long result = 0;
218            Iterator<Long> iterator = freqTable.values().iterator();
219            while (iterator.hasNext())  {
220                result += iterator.next().longValue();
221            }
222            return result;
223        }
224    
225        /**
226         * Returns the number of values = v.
227         * Returns 0 if the value is not comparable.
228         *
229         * @param v the value to lookup.
230         * @return the frequency of v.
231         * @deprecated replaced by {@link #getCount(Comparable)} as of 2.0
232         */
233        @Deprecated
234        public long getCount(Object v) {
235            return getCount((Comparable<?>) v);
236        }
237    
238        /**
239         * Returns the number of values = v.
240         * Returns 0 if the value is not comparable.
241         *
242         * @param v the value to lookup.
243         * @return the frequency of v.
244         */
245        public long getCount(Comparable<?> v) {
246            if (v instanceof Integer) {
247                return getCount(((Integer) v).longValue());
248            }
249            long result = 0;
250            try {
251                Long count =  freqTable.get(v);
252                if (count != null) {
253                    result = count.longValue();
254                }
255            } catch (ClassCastException ex) {
256                // ignore and return 0 -- ClassCastException will be thrown if value is not comparable
257            }
258            return result;
259        }
260    
261        /**
262         * Returns the number of values = v.
263         *
264         * @param v the value to lookup.
265         * @return the frequency of v.
266         */
267        public long getCount(int v) {
268            return getCount(Long.valueOf(v));
269        }
270    
271        /**
272         * Returns the number of values = v.
273         *
274         * @param v the value to lookup.
275         * @return the frequency of v.
276         */
277        public long getCount(long v) {
278            return getCount(Long.valueOf(v));
279        }
280    
281        /**
282         * Returns the number of values = v.
283         *
284         * @param v the value to lookup.
285         * @return the frequency of v.
286         */
287        public long getCount(char v) {
288            return getCount(Character.valueOf(v));
289        }
290    
291        /**
292         * Returns the number of values in the frequency table.
293         *
294         * @return the number of unique values that have been added to the frequency table.
295         * @see #valuesIterator()
296         */
297        public int getUniqueCount(){
298            return freqTable.keySet().size();
299        }
300    
301        //-------------------------------------------------------------
302    
303        /**
304          * Returns the percentage of values that are equal to v
305         * (as a proportion between 0 and 1).
306         * <p>
307         * Returns <code>Double.NaN</code> if no values have been added.</p>
308         *
309         * @param v the value to lookup
310         * @return the proportion of values equal to v
311         * @deprecated replaced by {@link #getPct(Comparable)} as of 2.0
312         */
313        @Deprecated
314        public double getPct(Object v) {
315            return getPct((Comparable<?>) v);
316        }
317    
318        /**
319         * Returns the percentage of values that are equal to v
320         * (as a proportion between 0 and 1).
321         * <p>
322         * Returns <code>Double.NaN</code> if no values have been added.</p>
323         *
324         * @param v the value to lookup
325         * @return the proportion of values equal to v
326         */
327        public double getPct(Comparable<?> v) {
328            final long sumFreq = getSumFreq();
329            if (sumFreq == 0) {
330                return Double.NaN;
331            }
332            return (double) getCount(v) / (double) sumFreq;
333        }
334    
335        /**
336         * Returns the percentage of values that are equal to v
337         * (as a proportion between 0 and 1).
338         *
339         * @param v the value to lookup
340         * @return the proportion of values equal to v
341         */
342        public double getPct(int v) {
343            return getPct(Long.valueOf(v));
344        }
345    
346        /**
347         * Returns the percentage of values that are equal to v
348         * (as a proportion between 0 and 1).
349         *
350         * @param v the value to lookup
351         * @return the proportion of values equal to v
352         */
353        public double getPct(long v) {
354            return getPct(Long.valueOf(v));
355        }
356    
357        /**
358         * Returns the percentage of values that are equal to v
359         * (as a proportion between 0 and 1).
360         *
361         * @param v the value to lookup
362         * @return the proportion of values equal to v
363         */
364        public double getPct(char v) {
365            return getPct(Character.valueOf(v));
366        }
367    
368        //-----------------------------------------------------------------------------------------
369    
370        /**
371         * Returns the cumulative frequency of values less than or equal to v.
372         * <p>
373         * Returns 0 if v is not comparable to the values set.</p>
374         *
375         * @param v the value to lookup.
376         * @return the proportion of values equal to v
377         * @deprecated replaced by {@link #getCumFreq(Comparable)} as of 2.0
378         */
379        @Deprecated
380        public long getCumFreq(Object v) {
381            return getCumFreq((Comparable<?>) v);
382        }
383    
384        /**
385         * Returns the cumulative frequency of values less than or equal to v.
386         * <p>
387         * Returns 0 if v is not comparable to the values set.</p>
388         *
389         * @param v the value to lookup.
390         * @return the proportion of values equal to v
391         */
392        public long getCumFreq(Comparable<?> v) {
393            if (getSumFreq() == 0) {
394                return 0;
395            }
396            if (v instanceof Integer) {
397                return getCumFreq(((Integer) v).longValue());
398            }
399            @SuppressWarnings("unchecked") // OK, freqTable is Comparable<?>
400            Comparator<Comparable<?>> c = (Comparator<Comparable<?>>) freqTable.comparator();
401            if (c == null) {
402                c = new NaturalComparator();
403            }
404            long result = 0;
405    
406            try {
407                Long value = freqTable.get(v);
408                if (value != null) {
409                    result = value.longValue();
410                }
411            } catch (ClassCastException ex) {
412                return result;   // v is not comparable
413            }
414    
415            if (c.compare(v, freqTable.firstKey()) < 0) {
416                return 0;  // v is comparable, but less than first value
417            }
418    
419            if (c.compare(v, freqTable.lastKey()) >= 0) {
420                return getSumFreq();    // v is comparable, but greater than the last value
421            }
422    
423            Iterator<Comparable<?>> values = valuesIterator();
424            while (values.hasNext()) {
425                Comparable<?> nextValue = values.next();
426                if (c.compare(v, nextValue) > 0) {
427                    result += getCount(nextValue);
428                } else {
429                    return result;
430                }
431            }
432            return result;
433        }
434    
435         /**
436         * Returns the cumulative frequency of values less than or equal to v.
437         * <p>
438         * Returns 0 if v is not comparable to the values set.</p>
439         *
440         * @param v the value to lookup
441         * @return the proportion of values equal to v
442         */
443        public long getCumFreq(int v) {
444            return getCumFreq(Long.valueOf(v));
445        }
446    
447         /**
448         * Returns the cumulative frequency of values less than or equal to v.
449         * <p>
450         * Returns 0 if v is not comparable to the values set.</p>
451         *
452         * @param v the value to lookup
453         * @return the proportion of values equal to v
454         */
455        public long getCumFreq(long v) {
456            return getCumFreq(Long.valueOf(v));
457        }
458    
459        /**
460         * Returns the cumulative frequency of values less than or equal to v.
461         * <p>
462         * Returns 0 if v is not comparable to the values set.</p>
463         *
464         * @param v the value to lookup
465         * @return the proportion of values equal to v
466         */
467        public long getCumFreq(char v) {
468            return getCumFreq(Character.valueOf(v));
469        }
470    
471        //----------------------------------------------------------------------------------------------
472    
473        /**
474         * Returns the cumulative percentage of values less than or equal to v
475         * (as a proportion between 0 and 1).
476         * <p>
477         * Returns <code>Double.NaN</code> if no values have been added.
478         * Returns 0 if at least one value has been added, but v is not comparable
479         * to the values set.</p>
480         *
481         * @param v the value to lookup
482         * @return the proportion of values less than or equal to v
483         * @deprecated replaced by {@link #getCumPct(Comparable)} as of 2.0
484         */
485        @Deprecated
486        public double getCumPct(Object v) {
487            return getCumPct((Comparable<?>) v);
488    
489        }
490    
491        /**
492         * Returns the cumulative percentage of values less than or equal to v
493         * (as a proportion between 0 and 1).
494         * <p>
495         * Returns <code>Double.NaN</code> if no values have been added.
496         * Returns 0 if at least one value has been added, but v is not comparable
497         * to the values set.</p>
498         *
499         * @param v the value to lookup
500         * @return the proportion of values less than or equal to v
501         */
502        public double getCumPct(Comparable<?> v) {
503            final long sumFreq = getSumFreq();
504            if (sumFreq == 0) {
505                return Double.NaN;
506            }
507            return (double) getCumFreq(v) / (double) sumFreq;
508        }
509    
510        /**
511         * Returns the cumulative percentage of values less than or equal to v
512         * (as a proportion between 0 and 1).
513         * <p>
514         * Returns 0 if v is not comparable to the values set.</p>
515         *
516         * @param v the value to lookup
517         * @return the proportion of values less than or equal to v
518         */
519        public double getCumPct(int v) {
520            return getCumPct(Long.valueOf(v));
521        }
522    
523        /**
524         * Returns the cumulative percentage of values less than or equal to v
525         * (as a proportion between 0 and 1).
526         * <p>
527         * Returns 0 if v is not comparable to the values set.</p>
528         *
529         * @param v the value to lookup
530         * @return the proportion of values less than or equal to v
531         */
532        public double getCumPct(long v) {
533            return getCumPct(Long.valueOf(v));
534        }
535    
536        /**
537         * Returns the cumulative percentage of values less than or equal to v
538         * (as a proportion between 0 and 1).
539         * <p>
540         * Returns 0 if v is not comparable to the values set.</p>
541         *
542         * @param v the value to lookup
543         * @return the proportion of values less than or equal to v
544         */
545        public double getCumPct(char v) {
546            return getCumPct(Character.valueOf(v));
547        }
548    
549        /**
550         * A Comparator that compares comparable objects using the
551         * natural order.  Copied from Commons Collections ComparableComparator.
552         */
553        private static class NaturalComparator<T extends Comparable<T>> implements Comparator<Comparable<T>>, Serializable {
554    
555            /** Serializable version identifier */
556            private static final long serialVersionUID = -3852193713161395148L;
557    
558            /**
559             * Compare the two {@link Comparable Comparable} arguments.
560             * This method is equivalent to:
561             * <pre>(({@link Comparable Comparable})o1).{@link Comparable#compareTo compareTo}(o2)</pre>
562             *
563             * @param  o1 the first object
564             * @param  o2 the second object
565             * @return  result of comparison
566             * @throws NullPointerException when <i>o1</i> is <code>null</code>,
567             *         or when <code>((Comparable)o1).compareTo(o2)</code> does
568             * @throws ClassCastException when <i>o1</i> is not a {@link Comparable Comparable},
569             *         or when <code>((Comparable)o1).compareTo(o2)</code> does
570             */
571            @SuppressWarnings("unchecked") // cast to (T) may throw ClassCastException, see Javadoc
572            public int compare(Comparable<T> o1, Comparable<T> o2) {
573                return o1.compareTo((T) o2);
574            }
575        }
576    
577        /** {@inheritDoc} */
578        @Override
579        public int hashCode() {
580            final int prime = 31;
581            int result = 1;
582            result = prime * result +
583                     ((freqTable == null) ? 0 : freqTable.hashCode());
584            return result;
585        }
586    
587        /** {@inheritDoc} */
588        @Override
589        public boolean equals(Object obj) {
590            if (this == obj)
591                return true;
592            if (!(obj instanceof Frequency))
593                return false;
594            Frequency other = (Frequency) obj;
595            if (freqTable == null) {
596                if (other.freqTable != null)
597                    return false;
598            } else if (!freqTable.equals(other.freqTable))
599                return false;
600            return true;
601        }
602    
603    }