001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 018 package org.apache.commons.math.distribution; 019 020 import java.io.Serializable; 021 022 import org.apache.commons.math.MathRuntimeException; 023 import org.apache.commons.math.exception.util.LocalizedFormats; 024 import org.apache.commons.math.util.MathUtils; 025 import org.apache.commons.math.util.FastMath; 026 027 /** 028 * The default implementation of {@link HypergeometricDistribution}. 029 * 030 * @version $Revision: 1054524 $ $Date: 2011-01-03 05:59:18 +0100 (lun. 03 janv. 2011) $ 031 */ 032 public class HypergeometricDistributionImpl extends AbstractIntegerDistribution 033 implements HypergeometricDistribution, Serializable { 034 035 /** Serializable version identifier */ 036 private static final long serialVersionUID = -436928820673516179L; 037 038 /** The number of successes in the population. */ 039 private int numberOfSuccesses; 040 041 /** The population size. */ 042 private int populationSize; 043 044 /** The sample size. */ 045 private int sampleSize; 046 047 /** 048 * Construct a new hypergeometric distribution with the given the population 049 * size, the number of successes in the population, and the sample size. 050 * 051 * @param populationSize the population size. 052 * @param numberOfSuccesses number of successes in the population. 053 * @param sampleSize the sample size. 054 */ 055 public HypergeometricDistributionImpl(int populationSize, 056 int numberOfSuccesses, int sampleSize) { 057 super(); 058 if (numberOfSuccesses > populationSize) { 059 throw MathRuntimeException 060 .createIllegalArgumentException( 061 LocalizedFormats.NUMBER_OF_SUCCESS_LARGER_THAN_POPULATION_SIZE, 062 numberOfSuccesses, populationSize); 063 } 064 if (sampleSize > populationSize) { 065 throw MathRuntimeException 066 .createIllegalArgumentException( 067 LocalizedFormats.SAMPLE_SIZE_LARGER_THAN_POPULATION_SIZE, 068 sampleSize, populationSize); 069 } 070 071 setPopulationSizeInternal(populationSize); 072 setSampleSizeInternal(sampleSize); 073 setNumberOfSuccessesInternal(numberOfSuccesses); 074 } 075 076 /** 077 * For this distribution, X, this method returns P(X ≤ x). 078 * 079 * @param x the value at which the PDF is evaluated. 080 * @return PDF for this distribution. 081 */ 082 @Override 083 public double cumulativeProbability(int x) { 084 double ret; 085 086 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 087 if (x < domain[0]) { 088 ret = 0.0; 089 } else if (x >= domain[1]) { 090 ret = 1.0; 091 } else { 092 ret = innerCumulativeProbability(domain[0], x, 1, populationSize, 093 numberOfSuccesses, sampleSize); 094 } 095 096 return ret; 097 } 098 099 /** 100 * Return the domain for the given hypergeometric distribution parameters. 101 * 102 * @param n the population size. 103 * @param m number of successes in the population. 104 * @param k the sample size. 105 * @return a two element array containing the lower and upper bounds of the 106 * hypergeometric distribution. 107 */ 108 private int[] getDomain(int n, int m, int k) { 109 return new int[] { getLowerDomain(n, m, k), getUpperDomain(m, k) }; 110 } 111 112 /** 113 * Access the domain value lower bound, based on <code>p</code>, used to 114 * bracket a PDF root. 115 * 116 * @param p the desired probability for the critical value 117 * @return domain value lower bound, i.e. P(X < <i>lower bound</i>) < 118 * <code>p</code> 119 */ 120 @Override 121 protected int getDomainLowerBound(double p) { 122 return getLowerDomain(populationSize, numberOfSuccesses, sampleSize); 123 } 124 125 /** 126 * Access the domain value upper bound, based on <code>p</code>, used to 127 * bracket a PDF root. 128 * 129 * @param p the desired probability for the critical value 130 * @return domain value upper bound, i.e. P(X < <i>upper bound</i>) > 131 * <code>p</code> 132 */ 133 @Override 134 protected int getDomainUpperBound(double p) { 135 return getUpperDomain(sampleSize, numberOfSuccesses); 136 } 137 138 /** 139 * Return the lowest domain value for the given hypergeometric distribution 140 * parameters. 141 * 142 * @param n the population size. 143 * @param m number of successes in the population. 144 * @param k the sample size. 145 * @return the lowest domain value of the hypergeometric distribution. 146 */ 147 private int getLowerDomain(int n, int m, int k) { 148 return FastMath.max(0, m - (n - k)); 149 } 150 151 /** 152 * Access the number of successes. 153 * 154 * @return the number of successes. 155 */ 156 public int getNumberOfSuccesses() { 157 return numberOfSuccesses; 158 } 159 160 /** 161 * Access the population size. 162 * 163 * @return the population size. 164 */ 165 public int getPopulationSize() { 166 return populationSize; 167 } 168 169 /** 170 * Access the sample size. 171 * 172 * @return the sample size. 173 */ 174 public int getSampleSize() { 175 return sampleSize; 176 } 177 178 /** 179 * Return the highest domain value for the given hypergeometric distribution 180 * parameters. 181 * 182 * @param m number of successes in the population. 183 * @param k the sample size. 184 * @return the highest domain value of the hypergeometric distribution. 185 */ 186 private int getUpperDomain(int m, int k) { 187 return FastMath.min(k, m); 188 } 189 190 /** 191 * For this distribution, X, this method returns P(X = x). 192 * 193 * @param x the value at which the PMF is evaluated. 194 * @return PMF for this distribution. 195 */ 196 public double probability(int x) { 197 double ret; 198 199 int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 200 if (x < domain[0] || x > domain[1]) { 201 ret = 0.0; 202 } else { 203 double p = (double) sampleSize / (double) populationSize; 204 double q = (double) (populationSize - sampleSize) / (double) populationSize; 205 double p1 = SaddlePointExpansion.logBinomialProbability(x, 206 numberOfSuccesses, p, q); 207 double p2 = 208 SaddlePointExpansion.logBinomialProbability(sampleSize - x, 209 populationSize - numberOfSuccesses, p, q); 210 double p3 = 211 SaddlePointExpansion.logBinomialProbability(sampleSize, populationSize, p, q); 212 ret = FastMath.exp(p1 + p2 - p3); 213 } 214 215 return ret; 216 } 217 218 /** 219 * For the distribution, X, defined by the given hypergeometric distribution 220 * parameters, this method returns P(X = x). 221 * 222 * @param n the population size. 223 * @param m number of successes in the population. 224 * @param k the sample size. 225 * @param x the value at which the PMF is evaluated. 226 * @return PMF for the distribution. 227 */ 228 private double probability(int n, int m, int k, int x) { 229 return FastMath.exp(MathUtils.binomialCoefficientLog(m, x) + 230 MathUtils.binomialCoefficientLog(n - m, k - x) - 231 MathUtils.binomialCoefficientLog(n, k)); 232 } 233 234 /** 235 * Modify the number of successes. 236 * 237 * @param num the new number of successes. 238 * @throws IllegalArgumentException if <code>num</code> is negative. 239 * @deprecated as of 2.1 (class will become immutable in 3.0) 240 */ 241 @Deprecated 242 public void setNumberOfSuccesses(int num) { 243 setNumberOfSuccessesInternal(num); 244 } 245 246 /** 247 * Modify the number of successes. 248 * 249 * @param num the new number of successes. 250 * @throws IllegalArgumentException if <code>num</code> is negative. 251 */ 252 private void setNumberOfSuccessesInternal(int num) { 253 if (num < 0) { 254 throw MathRuntimeException.createIllegalArgumentException( 255 LocalizedFormats.NEGATIVE_NUMBER_OF_SUCCESSES, num); 256 } 257 numberOfSuccesses = num; 258 } 259 260 /** 261 * Modify the population size. 262 * 263 * @param size the new population size. 264 * @throws IllegalArgumentException if <code>size</code> is not positive. 265 * @deprecated as of 2.1 (class will become immutable in 3.0) 266 */ 267 @Deprecated 268 public void setPopulationSize(int size) { 269 setPopulationSizeInternal(size); 270 } 271 272 /** 273 * Modify the population size. 274 * 275 * @param size the new population size. 276 * @throws IllegalArgumentException if <code>size</code> is not positive. 277 */ 278 private void setPopulationSizeInternal(int size) { 279 if (size <= 0) { 280 throw MathRuntimeException.createIllegalArgumentException( 281 LocalizedFormats.NOT_POSITIVE_POPULATION_SIZE, size); 282 } 283 populationSize = size; 284 } 285 286 /** 287 * Modify the sample size. 288 * 289 * @param size the new sample size. 290 * @throws IllegalArgumentException if <code>size</code> is negative. 291 * @deprecated as of 2.1 (class will become immutable in 3.0) 292 */ 293 @Deprecated 294 public void setSampleSize(int size) { 295 setSampleSizeInternal(size); 296 } 297 /** 298 * Modify the sample size. 299 * 300 * @param size the new sample size. 301 * @throws IllegalArgumentException if <code>size</code> is negative. 302 */ 303 private void setSampleSizeInternal(int size) { 304 if (size < 0) { 305 throw MathRuntimeException.createIllegalArgumentException( 306 LocalizedFormats.NOT_POSITIVE_SAMPLE_SIZE, size); 307 } 308 sampleSize = size; 309 } 310 311 /** 312 * For this distribution, X, this method returns P(X ≥ x). 313 * 314 * @param x the value at which the CDF is evaluated. 315 * @return upper tail CDF for this distribution. 316 * @since 1.1 317 */ 318 public double upperCumulativeProbability(int x) { 319 double ret; 320 321 final int[] domain = getDomain(populationSize, numberOfSuccesses, sampleSize); 322 if (x < domain[0]) { 323 ret = 1.0; 324 } else if (x > domain[1]) { 325 ret = 0.0; 326 } else { 327 ret = innerCumulativeProbability(domain[1], x, -1, populationSize, numberOfSuccesses, sampleSize); 328 } 329 330 return ret; 331 } 332 333 /** 334 * For this distribution, X, this method returns P(x0 ≤ X ≤ x1). This 335 * probability is computed by summing the point probabilities for the values 336 * x0, x0 + 1, x0 + 2, ..., x1, in the order directed by dx. 337 * 338 * @param x0 the inclusive, lower bound 339 * @param x1 the inclusive, upper bound 340 * @param dx the direction of summation. 1 indicates summing from x0 to x1. 341 * 0 indicates summing from x1 to x0. 342 * @param n the population size. 343 * @param m number of successes in the population. 344 * @param k the sample size. 345 * @return P(x0 ≤ X ≤ x1). 346 */ 347 private double innerCumulativeProbability(int x0, int x1, int dx, int n, 348 int m, int k) { 349 double ret = probability(n, m, k, x0); 350 while (x0 != x1) { 351 x0 += dx; 352 ret += probability(n, m, k, x0); 353 } 354 return ret; 355 } 356 357 /** 358 * Returns the lower bound for the support for the distribution. 359 * 360 * For population size <code>N</code>, 361 * number of successes <code>m</code>, and 362 * sample size <code>n</code>, 363 * the lower bound of the support is 364 * <code>max(0, n + m - N)</code> 365 * 366 * @return lower bound of the support 367 * @since 2.2 368 */ 369 public int getSupportLowerBound() { 370 return FastMath.max(0, 371 getSampleSize() + getNumberOfSuccesses() - getPopulationSize()); 372 } 373 374 /** 375 * Returns the upper bound for the support of the distribution. 376 * 377 * For number of successes <code>m</code> and 378 * sample size <code>n</code>, 379 * the upper bound of the support is 380 * <code>min(m, n)</code> 381 * 382 * @return upper bound of the support 383 * @since 2.2 384 */ 385 public int getSupportUpperBound() { 386 return FastMath.min(getNumberOfSuccesses(), getSampleSize()); 387 } 388 389 /** 390 * Returns the mean. 391 * 392 * For population size <code>N</code>, 393 * number of successes <code>m</code>, and 394 * sample size <code>n</code>, the mean is 395 * <code>n * m / N</code> 396 * 397 * @return the mean 398 * @since 2.2 399 */ 400 protected double getNumericalMean() { 401 return (double)(getSampleSize() * getNumberOfSuccesses()) / (double)getPopulationSize(); 402 } 403 404 /** 405 * Returns the variance. 406 * 407 * For population size <code>N</code>, 408 * number of successes <code>m</code>, and 409 * sample size <code>n</code>, the variance is 410 * <code>[ n * m * (N - n) * (N - m) ] / [ N^2 * (N - 1) ]</code> 411 * 412 * @return the variance 413 * @since 2.2 414 */ 415 public double getNumericalVariance() { 416 final double N = getPopulationSize(); 417 final double m = getNumberOfSuccesses(); 418 final double n = getSampleSize(); 419 return ( n * m * (N - n) * (N - m) ) / ( (N*N * (N - 1)) ); 420 } 421 }