Welcome to Statsmodels’s Documentation¶
statsmodels
is a Python module that provides classes and functions for the estimation
of many different statistical models, as well as for conducting statistical tests, and statistical
data exploration. An extensive list of result statistics are available for each estimator.
The results are tested against existing statistical packages to ensure that they are correct. The
package is released under the open source Modified BSD (3-clause) license.
The online documentation is hosted at statsmodels.org.
Minimal Examples¶
Since version 0.5.0
of statsmodels
, you can use R-style formulas
together with pandas
data frames to fit your models. Here is a simple
example using ordinary least squares:
In [1]: import numpy as np
In [2]: import statsmodels.api as sm
ImportErrorTraceback (most recent call last)
<ipython-input-2-085740203b77> in <module>()
----> 1 import statsmodels.api as sm
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/api.py in <module>()
5 from . import regression
6 from .regression.linear_model import OLS, GLS, WLS, GLSAR
----> 7 from .regression.recursive_ls import RecursiveLS
8 from .regression.quantile_regression import QuantReg
9 from .regression.mixed_linear_model import MixedLM
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/regression/recursive_ls.py in <module>()
14 from statsmodels.regression.linear_model import OLS
15 from statsmodels.tools.data import _is_using_pandas
---> 16 from statsmodels.tsa.statespace.mlemodel import (
17 MLEModel, MLEResults, MLEResultsWrapper)
18 from statsmodels.tools.tools import Bunch
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/mlemodel.py in <module>()
16 from scipy.stats import norm
17
---> 18 from .simulation_smoother import SimulationSmoother
19 from .kalman_smoother import SmootherResults
20 from .kalman_filter import (INVERT_UNIVARIATE, SOLVE_LU)
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/simulation_smoother.py in <module>()
8
9 import numpy as np
---> 10 from .kalman_smoother import KalmanSmoother
11 from . import tools
12
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/kalman_smoother.py in <module>()
9 import numpy as np
10
---> 11 from statsmodels.tsa.statespace.representation import OptionWrapper
12 from statsmodels.tsa.statespace.kalman_filter import (KalmanFilter,
13 FilterResults)
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/representation.py in <module>()
8
9 import numpy as np
---> 10 from .tools import (
11 find_best_blas_type, validate_matrix_shape, validate_vector_shape
12 )
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/tools.py in <module>()
205 'z': _statespace.zcopy_index_vector
206 })
--> 207 set_mode(compatibility=None)
208
209
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/tools.py in set_mode(compatibility)
57 if not compatibility:
58 from scipy.linalg import cython_blas
---> 59 from . import (_representation, _kalman_filter, _kalman_smoother,
60 _simulation_smoother, _tools)
61 compatibility_mode = False
ImportError: cannot import name _representation
In [3]: import statsmodels.formula.api as smf
# Load data
In [4]: dat = sm.datasets.get_rdataset("Guerry", "HistData").data
NameErrorTraceback (most recent call last)
<ipython-input-4-7c82dba641f7> in <module>()
----> 1 dat = sm.datasets.get_rdataset("Guerry", "HistData").data
NameError: name 'sm' is not defined
# Fit regression model (using the natural log of one of the regressors)
In [5]: results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()
NameErrorTraceback (most recent call last)
<ipython-input-5-2abecb481881> in <module>()
----> 1 results = smf.ols('Lottery ~ Literacy + np.log(Pop1831)', data=dat).fit()
NameError: name 'dat' is not defined
# Inspect the results
In [6]: print(results.summary())
NameErrorTraceback (most recent call last)
<ipython-input-6-5481b4ed0635> in <module>()
----> 1 print(results.summary())
NameError: name 'results' is not defined
You can also use numpy
arrays instead of formulas:
In [7]: import numpy as np
In [8]: import statsmodels.api as sm
ImportErrorTraceback (most recent call last)
<ipython-input-8-085740203b77> in <module>()
----> 1 import statsmodels.api as sm
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/api.py in <module>()
5 from . import regression
6 from .regression.linear_model import OLS, GLS, WLS, GLSAR
----> 7 from .regression.recursive_ls import RecursiveLS
8 from .regression.quantile_regression import QuantReg
9 from .regression.mixed_linear_model import MixedLM
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/regression/recursive_ls.py in <module>()
14 from statsmodels.regression.linear_model import OLS
15 from statsmodels.tools.data import _is_using_pandas
---> 16 from statsmodels.tsa.statespace.mlemodel import (
17 MLEModel, MLEResults, MLEResultsWrapper)
18 from statsmodels.tools.tools import Bunch
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/mlemodel.py in <module>()
16 from scipy.stats import norm
17
---> 18 from .simulation_smoother import SimulationSmoother
19 from .kalman_smoother import SmootherResults
20 from .kalman_filter import (INVERT_UNIVARIATE, SOLVE_LU)
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/simulation_smoother.py in <module>()
8
9 import numpy as np
---> 10 from .kalman_smoother import KalmanSmoother
11 from . import tools
12
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/kalman_smoother.py in <module>()
9 import numpy as np
10
---> 11 from statsmodels.tsa.statespace.representation import OptionWrapper
12 from statsmodels.tsa.statespace.kalman_filter import (KalmanFilter,
13 FilterResults)
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/representation.py in <module>()
8
9 import numpy as np
---> 10 from .tools import (
11 find_best_blas_type, validate_matrix_shape, validate_vector_shape
12 )
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/tools.py in <module>()
205 'z': _statespace.zcopy_index_vector
206 })
--> 207 set_mode(compatibility=None)
208
209
/builddir/build/BUILD/statsmodels-0.9.0/statsmodels/tsa/statespace/tools.py in set_mode(compatibility)
57 if not compatibility:
58 from scipy.linalg import cython_blas
---> 59 from . import (_representation, _kalman_filter, _kalman_smoother,
60 _simulation_smoother, _tools)
61 compatibility_mode = False
ImportError: cannot import name _representation
# Generate artificial data (2 regressors + constant)
In [9]: nobs = 100
In [10]: X = np.random.random((nobs, 2))
In [11]: X = sm.add_constant(X)
NameErrorTraceback (most recent call last)
<ipython-input-11-deab01e1d1cc> in <module>()
----> 1 X = sm.add_constant(X)
NameError: name 'sm' is not defined
In [12]: beta = [1, .1, .5]
In [13]: e = np.random.random(nobs)
In [14]: y = np.dot(X, beta) + e
ValueErrorTraceback (most recent call last)
<ipython-input-14-7891702e438d> in <module>()
----> 1 y = np.dot(X, beta) + e
ValueError: shapes (100,2) and (3,) not aligned: 2 (dim 1) != 3 (dim 0)
# Fit regression model
In [15]: results = sm.OLS(y, X).fit()
NameErrorTraceback (most recent call last)
<ipython-input-15-451001e9af2e> in <module>()
----> 1 results = sm.OLS(y, X).fit()
NameError: name 'sm' is not defined
# Inspect the results
In [16]: print(results.summary())
NameErrorTraceback (most recent call last)
<ipython-input-16-5481b4ed0635> in <module>()
----> 1 print(results.summary())
NameError: name 'results' is not defined
Have a look at dir(results) to see available results. Attributes are described in results.__doc__ and results methods have their own docstrings.
Citation¶
When using statsmodels in scientific publication, please consider using the following citation:
Seabold, Skipper, and Josef Perktold. “Statsmodels: Econometric and statistical modeling with python.” Proceedings of the 9th Python in Science Conference. 2010.
Bibtex entry:
@inproceedings{seabold2010statsmodels,
title={Statsmodels: Econometric and statistical modeling with python},
author={Seabold, Skipper and Perktold, Josef},
booktitle={9th Python in Science Conference},
year={2010},
}
Basic Documentation¶
Information about the structure and development of statsmodels:
Table of Contents¶
- Linear Regression
- Generalized Linear Models
- Generalized Estimating Equations
- Robust Linear Models
- Linear Mixed Effects Models
- Regression with Discrete Dependent Variable
- Examples
- Technical Documentation
- Module Reference
- statsmodels.discrete.discrete_model.Logit
- statsmodels.discrete.discrete_model.Probit
- statsmodels.discrete.discrete_model.MNLogit
- statsmodels.discrete.discrete_model.Poisson
- statsmodels.discrete.discrete_model.NegativeBinomial
- statsmodels.discrete.discrete_model.NegativeBinomialP
- statsmodels.discrete.discrete_model.GeneralizedPoisson
- statsmodels.discrete.count_model.ZeroInflatedPoisson
- statsmodels.discrete.count_model.ZeroInflatedNegativeBinomialP
- statsmodels.discrete.count_model.ZeroInflatedGeneralizedPoisson
- statsmodels.discrete.discrete_model.LogitResults
- statsmodels.discrete.discrete_model.ProbitResults
- statsmodels.discrete.discrete_model.CountResults
- statsmodels.discrete.discrete_model.MultinomialResults
- statsmodels.discrete.discrete_model.NegativeBinomialResults
- statsmodels.discrete.discrete_model.GeneralizedPoissonResults
- statsmodels.discrete.count_model.ZeroInflatedPoissonResults
- statsmodels.discrete.count_model.ZeroInflatedNegativeBinomialResults
- statsmodels.discrete.count_model.ZeroInflatedGeneralizedPoissonResults
- statsmodels.discrete.discrete_model.DiscreteModel
- statsmodels.discrete.discrete_model.DiscreteResults
- statsmodels.discrete.discrete_model.BinaryModel
- statsmodels.discrete.discrete_model.BinaryResults
- statsmodels.discrete.discrete_model.CountModel
- statsmodels.discrete.discrete_model.MultinomialModel
- statsmodels.discrete.count_model.GenericZeroInflated
- Generalized Linear Mixed Effects Models
- ANOVA
- Time Series analysis
tsa
- Descriptive Statistics and Tests
- Estimation
- Vector Autogressive Processes (VAR)
- statsmodels.tsa.vector_ar.var_model.LagOrderResults
- statsmodels.tsa.vector_ar.var_model.VAR
- statsmodels.tsa.vector_ar.var_model.VARProcess
- statsmodels.tsa.vector_ar.var_model.VARResults
- statsmodels.tsa.vector_ar.irf.IRAnalysis
- statsmodels.tsa.vector_ar.var_model.FEVD
- statsmodels.tsa.vector_ar.hypothesis_test_results.HypothesisTestResults
- statsmodels.tsa.vector_ar.hypothesis_test_results.CausalityTestResults
- statsmodels.tsa.vector_ar.hypothesis_test_results.NormalityTestResults
- statsmodels.tsa.vector_ar.hypothesis_test_results.WhitenessTestResults
- statsmodels.tsa.vector_ar.dynamic.DynamicVAR
- Vector Error Correction Models (VECM)
- Regime switching models
- ARMA Process
- statsmodels.tsa.arima_process.ArmaProcess
- statsmodels.tsa.arima_process.ar2arma
- statsmodels.tsa.arima_process.arma2ar
- statsmodels.tsa.arima_process.arma2ma
- statsmodels.tsa.arima_process.arma_acf
- statsmodels.tsa.arima_process.arma_acovf
- statsmodels.tsa.arima_process.arma_generate_sample
- statsmodels.tsa.arima_process.arma_impulse_response
- statsmodels.tsa.arima_process.arma_pacf
- statsmodels.tsa.arima_process.arma_periodogram
- statsmodels.tsa.arima_process.deconvolve
- statsmodels.tsa.arima_process.index2lpol
- statsmodels.tsa.arima_process.lpol2index
- statsmodels.tsa.arima_process.lpol_fiar
- statsmodels.tsa.arima_process.lpol_fima
- statsmodels.tsa.arima_process.lpol_sdiff
- Time Series Filters
- statsmodels.tsa.filters.bk_filter.bkfilter
- statsmodels.tsa.filters.hp_filter.hpfilter
- statsmodels.tsa.filters.cf_filter.cffilter
- statsmodels.tsa.filters.filtertools.convolution_filter
- statsmodels.tsa.filters.filtertools.recursive_filter
- statsmodels.tsa.filters.filtertools.miso_lfilter
- statsmodels.tsa.filters.filtertools.fftconvolve3
- statsmodels.tsa.filters.filtertools.fftconvolveinv
- statsmodels.tsa.seasonal.seasonal_decompose
- TSA Tools
- VARMA Process
- Interpolation
- Time Series Analysis by State Space Methods
statespace
- Vector Autoregressions
tsa.vector_ar
- Methods for Survival and Duration Analysis
- Statistics
stats
- Residual Diagnostics and Specification Tests
- statsmodels.stats.stattools.durbin_watson
- statsmodels.stats.stattools.jarque_bera
- statsmodels.stats.stattools.omni_normtest
- statsmodels.stats.stattools.medcouple
- statsmodels.stats.stattools.robust_skewness
- statsmodels.stats.stattools.robust_kurtosis
- statsmodels.stats.stattools.expected_robust_kurtosis
- Outliers and influence measures
- Sandwich Robust Covariances
- statsmodels.stats.sandwich_covariance.cov_hac
- statsmodels.stats.sandwich_covariance.cov_nw_panel
- statsmodels.stats.sandwich_covariance.cov_nw_groupsum
- statsmodels.stats.sandwich_covariance.cov_cluster
- statsmodels.stats.sandwich_covariance.cov_cluster_2groups
- statsmodels.stats.sandwich_covariance.cov_white_simple
- statsmodels.stats.sandwich_covariance.cov_hc0
- statsmodels.stats.sandwich_covariance.cov_hc1
- statsmodels.stats.sandwich_covariance.cov_hc2
- statsmodels.stats.sandwich_covariance.cov_hc3
- statsmodels.stats.sandwich_covariance.se_cov
- Goodness of Fit Tests and Measures
- Non-Parametric Tests
- statsmodels.sandbox.stats.runs.mcnemar
- statsmodels.sandbox.stats.runs.symmetry_bowker
- statsmodels.sandbox.stats.runs.median_test_ksample
- statsmodels.sandbox.stats.runs.runstest_1samp
- statsmodels.sandbox.stats.runs.runstest_2samp
- statsmodels.sandbox.stats.runs.cochrans_q
- statsmodels.sandbox.stats.runs.Runs
- statsmodels.stats.descriptivestats.sign_test
- Interrater Reliability and Agreement
- Multiple Tests and Multiple Comparison Procedures
- statsmodels.stats.multitest.multipletests
- statsmodels.stats.multitest.fdrcorrection
- statsmodels.sandbox.stats.multicomp.GroupsStats
- statsmodels.sandbox.stats.multicomp.MultiComparison
- statsmodels.sandbox.stats.multicomp.TukeyHSDResults
- statsmodels.stats.multicomp.pairwise_tukeyhsd
- statsmodels.stats.multitest.local_fdr
- statsmodels.stats.multitest.fdrcorrection_twostage
- statsmodels.stats.multitest.NullDistribution
- statsmodels.stats.multitest.RegressionFDR
- statsmodels.sandbox.stats.multicomp.varcorrection_pairs_unbalanced
- statsmodels.sandbox.stats.multicomp.varcorrection_pairs_unequal
- statsmodels.sandbox.stats.multicomp.varcorrection_unbalanced
- statsmodels.sandbox.stats.multicomp.varcorrection_unequal
- statsmodels.sandbox.stats.multicomp.StepDown
- statsmodels.sandbox.stats.multicomp.catstack
- statsmodels.sandbox.stats.multicomp.ccols
- statsmodels.sandbox.stats.multicomp.compare_ordered
- statsmodels.sandbox.stats.multicomp.distance_st_range
- statsmodels.sandbox.stats.multicomp.ecdf
- statsmodels.sandbox.stats.multicomp.get_tukeyQcrit
- statsmodels.sandbox.stats.multicomp.homogeneous_subsets
- statsmodels.sandbox.stats.multicomp.maxzero
- statsmodels.sandbox.stats.multicomp.maxzerodown
- statsmodels.sandbox.stats.multicomp.mcfdr
- statsmodels.sandbox.stats.multicomp.qcrit
- statsmodels.sandbox.stats.multicomp.randmvn
- statsmodels.sandbox.stats.multicomp.rankdata
- statsmodels.sandbox.stats.multicomp.rejectionline
- statsmodels.sandbox.stats.multicomp.set_partition
- statsmodels.sandbox.stats.multicomp.set_remove_subs
- statsmodels.sandbox.stats.multicomp.tiecorrect
- Basic Statistics and t-Tests with frequency weights
- statsmodels.stats.weightstats.DescrStatsW
- statsmodels.stats.weightstats.CompareMeans
- statsmodels.stats.weightstats.ttest_ind
- statsmodels.stats.weightstats.ttost_ind
- statsmodels.stats.weightstats.ttost_paired
- statsmodels.stats.weightstats.ztest
- statsmodels.stats.weightstats.ztost
- statsmodels.stats.weightstats.zconfint
- statsmodels.stats.weightstats._tconfint_generic
- statsmodels.stats.weightstats._tstat_generic
- statsmodels.stats.weightstats._zconfint_generic
- statsmodels.stats.weightstats._zstat_generic
- statsmodels.stats.weightstats._zstat_generic2
- Power and Sample Size Calculations
- statsmodels.stats.power.TTestIndPower
- statsmodels.stats.power.TTestPower
- statsmodels.stats.power.GofChisquarePower
- statsmodels.stats.power.NormalIndPower
- statsmodels.stats.power.FTestAnovaPower
- statsmodels.stats.power.FTestPower
- statsmodels.stats.power.tt_solve_power
- statsmodels.stats.power.tt_ind_solve_power
- statsmodels.stats.power.zt_ind_solve_power
- Proportion
- statsmodels.stats.proportion.proportion_confint
- statsmodels.stats.proportion.proportion_effectsize
- statsmodels.stats.proportion.binom_test
- statsmodels.stats.proportion.binom_test_reject_interval
- statsmodels.stats.proportion.binom_tost
- statsmodels.stats.proportion.binom_tost_reject_interval
- statsmodels.stats.proportion.multinomial_proportions_confint
- statsmodels.stats.proportion.proportions_ztest
- statsmodels.stats.proportion.proportions_ztost
- statsmodels.stats.proportion.proportions_chisquare
- statsmodels.stats.proportion.proportions_chisquare_allpairs
- statsmodels.stats.proportion.proportions_chisquare_pairscontrol
- statsmodels.stats.proportion.proportion_effectsize
- statsmodels.stats.proportion.power_binom_tost
- statsmodels.stats.proportion.power_ztost_prop
- statsmodels.stats.proportion.samplesize_confint_proportion
- Moment Helpers
- statsmodels.stats.correlation_tools.corr_clipped
- statsmodels.stats.correlation_tools.corr_nearest
- statsmodels.stats.correlation_tools.corr_nearest_factor
- statsmodels.stats.correlation_tools.corr_thresholded
- statsmodels.stats.correlation_tools.cov_nearest
- statsmodels.stats.correlation_tools.cov_nearest_factor_homog
- statsmodels.stats.correlation_tools.FactoredPSDMatrix
- statsmodels.stats.moment_helpers.cum2mc
- statsmodels.stats.moment_helpers.mc2mnc
- statsmodels.stats.moment_helpers.mc2mvsk
- statsmodels.stats.moment_helpers.mnc2cum
- statsmodels.stats.moment_helpers.mnc2mc
- statsmodels.stats.moment_helpers.mnc2mvsk
- statsmodels.stats.moment_helpers.mvsk2mc
- statsmodels.stats.moment_helpers.mvsk2mnc
- statsmodels.stats.moment_helpers.cov2corr
- statsmodels.stats.moment_helpers.corr2cov
- statsmodels.stats.moment_helpers.se_cov
- Mediation Analysis
- Residual Diagnostics and Specification Tests
- Nonparametric Methods
nonparametric
- Kernel density estimation
- Kernel regression
- References
- Module Reference
- statsmodels.nonparametric.kernel_density.KDEMultivariate
- statsmodels.nonparametric.kernel_density.KDEMultivariateConditional
- statsmodels.nonparametric.kernel_density.EstimatorSettings
- statsmodels.nonparametric.kernel_regression.KernelReg
- statsmodels.nonparametric.kernel_regression.KernelCensoredReg
- statsmodels.nonparametric.bandwidths.bw_scott
- statsmodels.nonparametric.bandwidths.bw_silverman
- statsmodels.nonparametric.bandwidths.select_bandwidth
- Generalized Method of Moments
gmm
- Module Reference
- statsmodels.sandbox.regression.gmm.GMM
- statsmodels.sandbox.regression.gmm.GMMResults
- statsmodels.sandbox.regression.gmm.IV2SLS
- statsmodels.sandbox.regression.gmm.IVGMM
- statsmodels.sandbox.regression.gmm.IVGMMResults
- statsmodels.sandbox.regression.gmm.IVRegressionResults
- statsmodels.sandbox.regression.gmm.LinearIVGMM
- statsmodels.sandbox.regression.gmm.NonlinearIVGMM
- Module Reference
- Contingency tables
- Multiple Imputation with Chained Equations
- Multivariate Statistics
multivariate
- Empirical Likelihood
emplike
- Other Models
miscmodels
- Distributions
- Empirical Distributions
- Distribution Extras
- statsmodels.sandbox.distributions.extras.SkewNorm_gen
- statsmodels.sandbox.distributions.extras.SkewNorm2_gen
- statsmodels.sandbox.distributions.extras.ACSkewT_gen
- statsmodels.sandbox.distributions.extras.skewnorm2
- statsmodels.sandbox.distributions.extras.pdf_moments_st
- statsmodels.sandbox.distributions.extras.pdf_mvsk
- statsmodels.sandbox.distributions.extras.pdf_moments
- statsmodels.sandbox.distributions.extras.NormExpan_gen
- statsmodels.sandbox.distributions.extras.mvstdnormcdf
- statsmodels.sandbox.distributions.extras.mvnormcdf
- Univariate Distributions by non-linear Transformations
- statsmodels.sandbox.distributions.transformed.TransfTwo_gen
- statsmodels.sandbox.distributions.transformed.Transf_gen
- statsmodels.sandbox.distributions.transformed.ExpTransf_gen
- statsmodels.sandbox.distributions.transformed.LogTransf_gen
- statsmodels.sandbox.distributions.transformed.SquareFunc
- statsmodels.sandbox.distributions.transformed.absnormalg
- statsmodels.sandbox.distributions.transformed.invdnormalg
- statsmodels.sandbox.distributions.transformed.loggammaexpg
- statsmodels.sandbox.distributions.transformed.lognormalg
- statsmodels.sandbox.distributions.transformed.negsquarenormalg
- statsmodels.sandbox.distributions.transformed.squarenormalg
- statsmodels.sandbox.distributions.transformed.squaretg
- Graphics
- Input-Output
iolib
- Examples
- Module Reference
- statsmodels.iolib.foreign.StataReader
- statsmodels.iolib.foreign.StataWriter
- statsmodels.iolib.foreign.genfromdta
- statsmodels.iolib.foreign.savetxt
- statsmodels.iolib.table.SimpleTable
- statsmodels.iolib.table.csv2st
- statsmodels.iolib.smpickle.save_pickle
- statsmodels.iolib.smpickle.load_pickle
- statsmodels.iolib.summary.Summary
- statsmodels.iolib.summary2.Summary
- Tools
- The Datasets Package
- Sandbox