001 /* 002 * Licensed to the Apache Software Foundation (ASF) under one or more 003 * contributor license agreements. See the NOTICE file distributed with 004 * this work for additional information regarding copyright ownership. 005 * The ASF licenses this file to You under the Apache License, Version 2.0 006 * (the "License"); you may not use this file except in compliance with 007 * the License. You may obtain a copy of the License at 008 * 009 * http://www.apache.org/licenses/LICENSE-2.0 010 * 011 * Unless required by applicable law or agreed to in writing, software 012 * distributed under the License is distributed on an "AS IS" BASIS, 013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 014 * See the License for the specific language governing permissions and 015 * limitations under the License. 016 */ 017 package org.apache.commons.math.stat.descriptive.moment; 018 019 import java.io.Serializable; 020 021 import org.apache.commons.math.exception.NullArgumentException; 022 import org.apache.commons.math.exception.util.LocalizedFormats; 023 import org.apache.commons.math.stat.descriptive.WeightedEvaluation; 024 import org.apache.commons.math.stat.descriptive.AbstractStorelessUnivariateStatistic; 025 026 /** 027 * Computes the variance of the available values. By default, the unbiased 028 * "sample variance" definitional formula is used: 029 * <p> 030 * variance = sum((x_i - mean)^2) / (n - 1) </p> 031 * <p> 032 * where mean is the {@link Mean} and <code>n</code> is the number 033 * of sample observations.</p> 034 * <p> 035 * The definitional formula does not have good numerical properties, so 036 * this implementation does not compute the statistic using the definitional 037 * formula. <ul> 038 * <li> The <code>getResult</code> method computes the variance using 039 * updating formulas based on West's algorithm, as described in 040 * <a href="http://doi.acm.org/10.1145/359146.359152"> Chan, T. F. and 041 * J. G. Lewis 1979, <i>Communications of the ACM</i>, 042 * vol. 22 no. 9, pp. 526-531.</a></li> 043 * <li> The <code>evaluate</code> methods leverage the fact that they have the 044 * full array of values in memory to execute a two-pass algorithm. 045 * Specifically, these methods use the "corrected two-pass algorithm" from 046 * Chan, Golub, Levesque, <i>Algorithms for Computing the Sample Variance</i>, 047 * American Statistician, vol. 37, no. 3 (1983) pp. 242-247.</li></ul> 048 * Note that adding values using <code>increment</code> or 049 * <code>incrementAll</code> and then executing <code>getResult</code> will 050 * sometimes give a different, less accurate, result than executing 051 * <code>evaluate</code> with the full array of values. The former approach 052 * should only be used when the full array of values is not available.</p> 053 * <p> 054 * The "population variance" ( sum((x_i - mean)^2) / n ) can also 055 * be computed using this statistic. The <code>isBiasCorrected</code> 056 * property determines whether the "population" or "sample" value is 057 * returned by the <code>evaluate</code> and <code>getResult</code> methods. 058 * To compute population variances, set this property to <code>false.</code> 059 * </p> 060 * <p> 061 * <strong>Note that this implementation is not synchronized.</strong> If 062 * multiple threads access an instance of this class concurrently, and at least 063 * one of the threads invokes the <code>increment()</code> or 064 * <code>clear()</code> method, it must be synchronized externally.</p> 065 * 066 * @version $Revision: 1006299 $ $Date: 2010-10-10 16:47:17 +0200 (dim. 10 oct. 2010) $ 067 */ 068 public class Variance extends AbstractStorelessUnivariateStatistic implements Serializable, WeightedEvaluation { 069 070 /** Serializable version identifier */ 071 private static final long serialVersionUID = -9111962718267217978L; 072 073 /** SecondMoment is used in incremental calculation of Variance*/ 074 protected SecondMoment moment = null; 075 076 /** 077 * Boolean test to determine if this Variance should also increment 078 * the second moment, this evaluates to false when this Variance is 079 * constructed with an external SecondMoment as a parameter. 080 */ 081 protected boolean incMoment = true; 082 083 /** 084 * Determines whether or not bias correction is applied when computing the 085 * value of the statisic. True means that bias is corrected. See 086 * {@link Variance} for details on the formula. 087 */ 088 private boolean isBiasCorrected = true; 089 090 /** 091 * Constructs a Variance with default (true) <code>isBiasCorrected</code> 092 * property. 093 */ 094 public Variance() { 095 moment = new SecondMoment(); 096 } 097 098 /** 099 * Constructs a Variance based on an external second moment. 100 * 101 * @param m2 the SecondMoment (Third or Fourth moments work 102 * here as well.) 103 */ 104 public Variance(final SecondMoment m2) { 105 incMoment = false; 106 this.moment = m2; 107 } 108 109 /** 110 * Constructs a Variance with the specified <code>isBiasCorrected</code> 111 * property 112 * 113 * @param isBiasCorrected setting for bias correction - true means 114 * bias will be corrected and is equivalent to using the argumentless 115 * constructor 116 */ 117 public Variance(boolean isBiasCorrected) { 118 moment = new SecondMoment(); 119 this.isBiasCorrected = isBiasCorrected; 120 } 121 122 /** 123 * Constructs a Variance with the specified <code>isBiasCorrected</code> 124 * property and the supplied external second moment. 125 * 126 * @param isBiasCorrected setting for bias correction - true means 127 * bias will be corrected 128 * @param m2 the SecondMoment (Third or Fourth moments work 129 * here as well.) 130 */ 131 public Variance(boolean isBiasCorrected, SecondMoment m2) { 132 incMoment = false; 133 this.moment = m2; 134 this.isBiasCorrected = isBiasCorrected; 135 } 136 137 /** 138 * Copy constructor, creates a new {@code Variance} identical 139 * to the {@code original} 140 * 141 * @param original the {@code Variance} instance to copy 142 */ 143 public Variance(Variance original) { 144 copy(original, this); 145 } 146 147 /** 148 * {@inheritDoc} 149 * <p>If all values are available, it is more accurate to use 150 * {@link #evaluate(double[])} rather than adding values one at a time 151 * using this method and then executing {@link #getResult}, since 152 * <code>evaluate</code> leverages the fact that is has the full 153 * list of values together to execute a two-pass algorithm. 154 * See {@link Variance}.</p> 155 */ 156 @Override 157 public void increment(final double d) { 158 if (incMoment) { 159 moment.increment(d); 160 } 161 } 162 163 /** 164 * {@inheritDoc} 165 */ 166 @Override 167 public double getResult() { 168 if (moment.n == 0) { 169 return Double.NaN; 170 } else if (moment.n == 1) { 171 return 0d; 172 } else { 173 if (isBiasCorrected) { 174 return moment.m2 / (moment.n - 1d); 175 } else { 176 return moment.m2 / (moment.n); 177 } 178 } 179 } 180 181 /** 182 * {@inheritDoc} 183 */ 184 public long getN() { 185 return moment.getN(); 186 } 187 188 /** 189 * {@inheritDoc} 190 */ 191 @Override 192 public void clear() { 193 if (incMoment) { 194 moment.clear(); 195 } 196 } 197 198 /** 199 * Returns the variance of the entries in the input array, or 200 * <code>Double.NaN</code> if the array is empty. 201 * <p> 202 * See {@link Variance} for details on the computing algorithm.</p> 203 * <p> 204 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 205 * <p> 206 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 207 * <p> 208 * Does not change the internal state of the statistic.</p> 209 * 210 * @param values the input array 211 * @return the variance of the values or Double.NaN if length = 0 212 * @throws IllegalArgumentException if the array is null 213 */ 214 @Override 215 public double evaluate(final double[] values) { 216 if (values == null) { 217 throw new NullArgumentException(LocalizedFormats.INPUT_ARRAY); 218 } 219 return evaluate(values, 0, values.length); 220 } 221 222 /** 223 * Returns the variance of the entries in the specified portion of 224 * the input array, or <code>Double.NaN</code> if the designated subarray 225 * is empty. 226 * <p> 227 * See {@link Variance} for details on the computing algorithm.</p> 228 * <p> 229 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 230 * <p> 231 * Does not change the internal state of the statistic.</p> 232 * <p> 233 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 234 * 235 * @param values the input array 236 * @param begin index of the first array element to include 237 * @param length the number of elements to include 238 * @return the variance of the values or Double.NaN if length = 0 239 * @throws IllegalArgumentException if the array is null or the array index 240 * parameters are not valid 241 */ 242 @Override 243 public double evaluate(final double[] values, final int begin, final int length) { 244 245 double var = Double.NaN; 246 247 if (test(values, begin, length)) { 248 clear(); 249 if (length == 1) { 250 var = 0.0; 251 } else if (length > 1) { 252 Mean mean = new Mean(); 253 double m = mean.evaluate(values, begin, length); 254 var = evaluate(values, m, begin, length); 255 } 256 } 257 return var; 258 } 259 260 /** 261 * <p>Returns the weighted variance of the entries in the specified portion of 262 * the input array, or <code>Double.NaN</code> if the designated subarray 263 * is empty.</p> 264 * <p> 265 * Uses the formula <pre> 266 * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) 267 * </pre> 268 * where weightedMean is the weighted mean</p> 269 * <p> 270 * This formula will not return the same result as the unweighted variance when all 271 * weights are equal, unless all weights are equal to 1. The formula assumes that 272 * weights are to be treated as "expansion values," as will be the case if for example 273 * the weights represent frequency counts. To normalize weights so that the denominator 274 * in the variance computation equals the length of the input vector minus one, use <pre> 275 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code> 276 * </pre> 277 * <p> 278 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 279 * <p> 280 * Throws <code>IllegalArgumentException</code> if any of the following are true: 281 * <ul><li>the values array is null</li> 282 * <li>the weights array is null</li> 283 * <li>the weights array does not have the same length as the values array</li> 284 * <li>the weights array contains one or more infinite values</li> 285 * <li>the weights array contains one or more NaN values</li> 286 * <li>the weights array contains negative values</li> 287 * <li>the start and length arguments do not determine a valid array</li> 288 * </ul></p> 289 * <p> 290 * Does not change the internal state of the statistic.</p> 291 * <p> 292 * Throws <code>IllegalArgumentException</code> if either array is null.</p> 293 * 294 * @param values the input array 295 * @param weights the weights array 296 * @param begin index of the first array element to include 297 * @param length the number of elements to include 298 * @return the weighted variance of the values or Double.NaN if length = 0 299 * @throws IllegalArgumentException if the parameters are not valid 300 * @since 2.1 301 */ 302 public double evaluate(final double[] values, final double[] weights, 303 final int begin, final int length) { 304 305 double var = Double.NaN; 306 307 if (test(values, weights,begin, length)) { 308 clear(); 309 if (length == 1) { 310 var = 0.0; 311 } else if (length > 1) { 312 Mean mean = new Mean(); 313 double m = mean.evaluate(values, weights, begin, length); 314 var = evaluate(values, weights, m, begin, length); 315 } 316 } 317 return var; 318 } 319 320 /** 321 * <p> 322 * Returns the weighted variance of the entries in the the input array.</p> 323 * <p> 324 * Uses the formula <pre> 325 * Σ(weights[i]*(values[i] - weightedMean)<sup>2</sup>)/(Σ(weights[i]) - 1) 326 * </pre> 327 * where weightedMean is the weighted mean</p> 328 * <p> 329 * This formula will not return the same result as the unweighted variance when all 330 * weights are equal, unless all weights are equal to 1. The formula assumes that 331 * weights are to be treated as "expansion values," as will be the case if for example 332 * the weights represent frequency counts. To normalize weights so that the denominator 333 * in the variance computation equals the length of the input vector minus one, use <pre> 334 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length)); </code> 335 * </pre> 336 * <p> 337 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 338 * <p> 339 * Throws <code>IllegalArgumentException</code> if any of the following are true: 340 * <ul><li>the values array is null</li> 341 * <li>the weights array is null</li> 342 * <li>the weights array does not have the same length as the values array</li> 343 * <li>the weights array contains one or more infinite values</li> 344 * <li>the weights array contains one or more NaN values</li> 345 * <li>the weights array contains negative values</li> 346 * </ul></p> 347 * <p> 348 * Does not change the internal state of the statistic.</p> 349 * <p> 350 * Throws <code>IllegalArgumentException</code> if either array is null.</p> 351 * 352 * @param values the input array 353 * @param weights the weights array 354 * @return the weighted variance of the values 355 * @throws IllegalArgumentException if the parameters are not valid 356 * @since 2.1 357 */ 358 public double evaluate(final double[] values, final double[] weights) { 359 return evaluate(values, weights, 0, values.length); 360 } 361 362 /** 363 * Returns the variance of the entries in the specified portion of 364 * the input array, using the precomputed mean value. Returns 365 * <code>Double.NaN</code> if the designated subarray is empty. 366 * <p> 367 * See {@link Variance} for details on the computing algorithm.</p> 368 * <p> 369 * The formula used assumes that the supplied mean value is the arithmetic 370 * mean of the sample data, not a known population parameter. This method 371 * is supplied only to save computation when the mean has already been 372 * computed.</p> 373 * <p> 374 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 375 * <p> 376 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 377 * <p> 378 * Does not change the internal state of the statistic.</p> 379 * 380 * @param values the input array 381 * @param mean the precomputed mean value 382 * @param begin index of the first array element to include 383 * @param length the number of elements to include 384 * @return the variance of the values or Double.NaN if length = 0 385 * @throws IllegalArgumentException if the array is null or the array index 386 * parameters are not valid 387 */ 388 public double evaluate(final double[] values, final double mean, 389 final int begin, final int length) { 390 391 double var = Double.NaN; 392 393 if (test(values, begin, length)) { 394 if (length == 1) { 395 var = 0.0; 396 } else if (length > 1) { 397 double accum = 0.0; 398 double dev = 0.0; 399 double accum2 = 0.0; 400 for (int i = begin; i < begin + length; i++) { 401 dev = values[i] - mean; 402 accum += dev * dev; 403 accum2 += dev; 404 } 405 double len = length; 406 if (isBiasCorrected) { 407 var = (accum - (accum2 * accum2 / len)) / (len - 1.0); 408 } else { 409 var = (accum - (accum2 * accum2 / len)) / len; 410 } 411 } 412 } 413 return var; 414 } 415 416 /** 417 * Returns the variance of the entries in the input array, using the 418 * precomputed mean value. Returns <code>Double.NaN</code> if the array 419 * is empty. 420 * <p> 421 * See {@link Variance} for details on the computing algorithm.</p> 422 * <p> 423 * If <code>isBiasCorrected</code> is <code>true</code> the formula used 424 * assumes that the supplied mean value is the arithmetic mean of the 425 * sample data, not a known population parameter. If the mean is a known 426 * population parameter, or if the "population" version of the variance is 427 * desired, set <code>isBiasCorrected</code> to <code>false</code> before 428 * invoking this method.</p> 429 * <p> 430 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 431 * <p> 432 * Throws <code>IllegalArgumentException</code> if the array is null.</p> 433 * <p> 434 * Does not change the internal state of the statistic.</p> 435 * 436 * @param values the input array 437 * @param mean the precomputed mean value 438 * @return the variance of the values or Double.NaN if the array is empty 439 * @throws IllegalArgumentException if the array is null 440 */ 441 public double evaluate(final double[] values, final double mean) { 442 return evaluate(values, mean, 0, values.length); 443 } 444 445 /** 446 * Returns the weighted variance of the entries in the specified portion of 447 * the input array, using the precomputed weighted mean value. Returns 448 * <code>Double.NaN</code> if the designated subarray is empty. 449 * <p> 450 * Uses the formula <pre> 451 * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) 452 * </pre></p> 453 * <p> 454 * The formula used assumes that the supplied mean value is the weighted arithmetic 455 * mean of the sample data, not a known population parameter. This method 456 * is supplied only to save computation when the mean has already been 457 * computed.</p> 458 * <p> 459 * This formula will not return the same result as the unweighted variance when all 460 * weights are equal, unless all weights are equal to 1. The formula assumes that 461 * weights are to be treated as "expansion values," as will be the case if for example 462 * the weights represent frequency counts. To normalize weights so that the denominator 463 * in the variance computation equals the length of the input vector minus one, use <pre> 464 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code> 465 * </pre> 466 * <p> 467 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 468 * <p> 469 * Throws <code>IllegalArgumentException</code> if any of the following are true: 470 * <ul><li>the values array is null</li> 471 * <li>the weights array is null</li> 472 * <li>the weights array does not have the same length as the values array</li> 473 * <li>the weights array contains one or more infinite values</li> 474 * <li>the weights array contains one or more NaN values</li> 475 * <li>the weights array contains negative values</li> 476 * <li>the start and length arguments do not determine a valid array</li> 477 * </ul></p> 478 * <p> 479 * Does not change the internal state of the statistic.</p> 480 * 481 * @param values the input array 482 * @param weights the weights array 483 * @param mean the precomputed weighted mean value 484 * @param begin index of the first array element to include 485 * @param length the number of elements to include 486 * @return the variance of the values or Double.NaN if length = 0 487 * @throws IllegalArgumentException if the parameters are not valid 488 * @since 2.1 489 */ 490 public double evaluate(final double[] values, final double[] weights, 491 final double mean, final int begin, final int length) { 492 493 double var = Double.NaN; 494 495 if (test(values, weights, begin, length)) { 496 if (length == 1) { 497 var = 0.0; 498 } else if (length > 1) { 499 double accum = 0.0; 500 double dev = 0.0; 501 double accum2 = 0.0; 502 for (int i = begin; i < begin + length; i++) { 503 dev = values[i] - mean; 504 accum += weights[i] * (dev * dev); 505 accum2 += weights[i] * dev; 506 } 507 508 double sumWts = 0; 509 for (int i = 0; i < weights.length; i++) { 510 sumWts += weights[i]; 511 } 512 513 if (isBiasCorrected) { 514 var = (accum - (accum2 * accum2 / sumWts)) / (sumWts - 1.0); 515 } else { 516 var = (accum - (accum2 * accum2 / sumWts)) / sumWts; 517 } 518 } 519 } 520 return var; 521 } 522 523 /** 524 * <p>Returns the weighted variance of the values in the input array, using 525 * the precomputed weighted mean value.</p> 526 * <p> 527 * Uses the formula <pre> 528 * Σ(weights[i]*(values[i] - mean)<sup>2</sup>)/(Σ(weights[i]) - 1) 529 * </pre></p> 530 * <p> 531 * The formula used assumes that the supplied mean value is the weighted arithmetic 532 * mean of the sample data, not a known population parameter. This method 533 * is supplied only to save computation when the mean has already been 534 * computed.</p> 535 * <p> 536 * This formula will not return the same result as the unweighted variance when all 537 * weights are equal, unless all weights are equal to 1. The formula assumes that 538 * weights are to be treated as "expansion values," as will be the case if for example 539 * the weights represent frequency counts. To normalize weights so that the denominator 540 * in the variance computation equals the length of the input vector minus one, use <pre> 541 * <code>evaluate(values, MathUtils.normalizeArray(weights, values.length), mean); </code> 542 * </pre> 543 * <p> 544 * Returns 0 for a single-value (i.e. length = 1) sample.</p> 545 * <p> 546 * Throws <code>IllegalArgumentException</code> if any of the following are true: 547 * <ul><li>the values array is null</li> 548 * <li>the weights array is null</li> 549 * <li>the weights array does not have the same length as the values array</li> 550 * <li>the weights array contains one or more infinite values</li> 551 * <li>the weights array contains one or more NaN values</li> 552 * <li>the weights array contains negative values</li> 553 * </ul></p> 554 * <p> 555 * Does not change the internal state of the statistic.</p> 556 * 557 * @param values the input array 558 * @param weights the weights array 559 * @param mean the precomputed weighted mean value 560 * @return the variance of the values or Double.NaN if length = 0 561 * @throws IllegalArgumentException if the parameters are not valid 562 * @since 2.1 563 */ 564 public double evaluate(final double[] values, final double[] weights, final double mean) { 565 return evaluate(values, weights, mean, 0, values.length); 566 } 567 568 /** 569 * @return Returns the isBiasCorrected. 570 */ 571 public boolean isBiasCorrected() { 572 return isBiasCorrected; 573 } 574 575 /** 576 * @param biasCorrected The isBiasCorrected to set. 577 */ 578 public void setBiasCorrected(boolean biasCorrected) { 579 this.isBiasCorrected = biasCorrected; 580 } 581 582 /** 583 * {@inheritDoc} 584 */ 585 @Override 586 public Variance copy() { 587 Variance result = new Variance(); 588 copy(this, result); 589 return result; 590 } 591 592 /** 593 * Copies source to dest. 594 * <p>Neither source nor dest can be null.</p> 595 * 596 * @param source Variance to copy 597 * @param dest Variance to copy to 598 * @throws NullPointerException if either source or dest is null 599 */ 600 public static void copy(Variance source, Variance dest) { 601 if (source == null || 602 dest == null) { 603 throw new NullArgumentException(); 604 } 605 dest.setData(source.getDataRef()); 606 dest.moment = source.moment.copy(); 607 dest.isBiasCorrected = source.isBiasCorrected; 608 dest.incMoment = source.incMoment; 609 } 610 }