001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math.random; 019 020 import java.io.IOException; 021 import java.io.File; 022 import java.net.URL; 023 import java.util.List; 024 025 import org.apache.commons.math.stat.descriptive.StatisticalSummary; 026 import org.apache.commons.math.stat.descriptive.SummaryStatistics; 027 028 /** 029 * Represents an <a href="http://random.mat.sbg.ac.at/~ste/dipl/node11.html"> 030 * empirical probability distribution</a> -- a probability distribution derived 031 * from observed data without making any assumptions about the functional form 032 * of the population distribution that the data come from.<p> 033 * Implementations of this interface maintain data structures, called 034 * <i>distribution digests</i>, that describe empirical distributions and 035 * support the following operations: <ul> 036 * <li>loading the distribution from a file of observed data values</li> 037 * <li>dividing the input data into "bin ranges" and reporting bin frequency 038 * counts (data for histogram)</li> 039 * <li>reporting univariate statistics describing the full set of data values 040 * as well as the observations within each bin</li> 041 * <li>generating random values from the distribution</li> 042 * </ul> 043 * Applications can use <code>EmpiricalDistribution</code> implementations to 044 * build grouped frequency histograms representing the input data or to 045 * generate random values "like" those in the input file -- i.e., the values 046 * generated will follow the distribution of the values in the file.</p> 047 * 048 * @version $Revision: 817128 $ $Date: 2009-09-21 03:30:53 +0200 (lun. 21 sept. 2009) $ 049 */ 050 public interface EmpiricalDistribution { 051 052 /** 053 * Computes the empirical distribution from the provided 054 * array of numbers. 055 * 056 * @param dataArray the data array 057 */ 058 void load(double[] dataArray); 059 060 /** 061 * Computes the empirical distribution from the input file. 062 * 063 * @param file the input file 064 * @throws IOException if an IO error occurs 065 */ 066 void load(File file) throws IOException; 067 068 /** 069 * Computes the empirical distribution using data read from a URL. 070 * 071 * @param url url of the input file 072 * @throws IOException if an IO error occurs 073 */ 074 void load(URL url) throws IOException; 075 076 /** 077 * Generates a random value from this distribution. 078 * <strong>Preconditions:</strong><ul> 079 * <li>the distribution must be loaded before invoking this method</li></ul> 080 * @return the random value. 081 * 082 * @throws IllegalStateException if the distribution has not been loaded 083 */ 084 double getNextValue() throws IllegalStateException; 085 086 087 /** 088 * Returns a 089 * {@link org.apache.commons.math.stat.descriptive.StatisticalSummary} 090 * describing this distribution. 091 * <strong>Preconditions:</strong><ul> 092 * <li>the distribution must be loaded before invoking this method</li> 093 * </ul> 094 * 095 * @return the sample statistics 096 * @throws IllegalStateException if the distribution has not been loaded 097 */ 098 StatisticalSummary getSampleStats() throws IllegalStateException; 099 100 /** 101 * Property indicating whether or not the distribution has been loaded. 102 * 103 * @return true if the distribution has been loaded 104 */ 105 boolean isLoaded(); 106 107 /** 108 * Returns the number of bins. 109 * 110 * @return the number of bins 111 */ 112 int getBinCount(); 113 114 /** 115 * Returns a list of 116 * {@link org.apache.commons.math.stat.descriptive.SummaryStatistics} 117 * containing statistics describing the values in each of the bins. The 118 * List is indexed on the bin number. 119 * 120 * @return List of bin statistics 121 */ 122 List<SummaryStatistics> getBinStats(); 123 124 /** 125 * Returns the array of upper bounds for the bins. Bins are: <br/> 126 * [min,upperBounds[0]],(upperBounds[0],upperBounds[1]],..., 127 * (upperBounds[binCount-2], upperBounds[binCount-1] = max]. 128 * 129 * @return array of bin upper bounds 130 */ 131 double[] getUpperBounds(); 132 133 }