{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"# Statsmodels OLS fit"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import pandas as pd"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"data = pd.DataFrame({\n",
" 'x': [1, 2, 4, 5, 6, 7.7, 8.2, 9, 10, 11],\n",
" 'y': [3, 3.6, 4.2, 4.1, 5, 4.9, 5.5, 6, 6.1, 6.5],\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import statsmodels.formula.api as smf"
]
},
{
"cell_type": "code",
"execution_count": 4,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"model = smf.ols('y ~ x', data=data)"
]
},
{
"cell_type": "code",
"execution_count": 5,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"res = model.fit()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"0 2.741911\n",
"1 4.422732\n",
"2 6.103553\n",
"3 36.358325\n",
"dtype: float64"
]
},
"execution_count": 6,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"res.predict(pd.DataFrame({'x': [0, 5, 10, 100]}))"
]
},
{
"cell_type": "code",
"execution_count": 7,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/scipy/stats/stats.py:1535: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=10\n",
" \"anyway, n=%i\" % int(n))\n"
]
},
{
"data": {
"text/html": [
"
\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | y | R-squared: | 0.962 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.957 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 201.4 | \n",
"
\n",
"\n",
" Date: | Sun, 28 Jun 2020 | Prob (F-statistic): | 5.91e-07 | \n",
"
\n",
"\n",
" Time: | 09:20:50 | Log-Likelihood: | 1.2199 | \n",
"
\n",
"\n",
" No. Observations: | 10 | AIC: | 1.560 | \n",
"
\n",
"\n",
" Df Residuals: | 8 | BIC: | 2.165 | \n",
"
\n",
"\n",
" Df Model: | 1 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 2.7419 | 0.169 | 16.201 | 0.000 | 2.352 | 3.132 | \n",
"
\n",
"\n",
" x | 0.3362 | 0.024 | 14.193 | 0.000 | 0.282 | 0.391 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 2.060 | Durbin-Watson: | 2.907 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.357 | Jarque-Bera (JB): | 1.180 | \n",
"
\n",
"\n",
" Skew: | -0.805 | Prob(JB): | 0.554 | \n",
"
\n",
"\n",
" Kurtosis: | 2.512 | Cond. No. | 16.2 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: y R-squared: 0.962\n",
"Model: OLS Adj. R-squared: 0.957\n",
"Method: Least Squares F-statistic: 201.4\n",
"Date: Sun, 28 Jun 2020 Prob (F-statistic): 5.91e-07\n",
"Time: 09:20:50 Log-Likelihood: 1.2199\n",
"No. Observations: 10 AIC: 1.560\n",
"Df Residuals: 8 BIC: 2.165\n",
"Df Model: 1 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 2.7419 0.169 16.201 0.000 2.352 3.132\n",
"x 0.3362 0.024 14.193 0.000 0.282 0.391\n",
"==============================================================================\n",
"Omnibus: 2.060 Durbin-Watson: 2.907\n",
"Prob(Omnibus): 0.357 Jarque-Bera (JB): 1.180\n",
"Skew: -0.805 Prob(JB): 0.554\n",
"Kurtosis: 2.512 Cond. No. 16.2\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 7,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"res.summary()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt"
]
},
{
"cell_type": "code",
"execution_count": 9,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"[]"
]
},
"execution_count": 9,
"metadata": {
},
"output_type": "execute_result"
},
{
"data": {
"image/png": "",
"text/plain": [
""
]
},
"execution_count": 9,
"metadata": {
"image/png": {
"height": 424,
"width": 710
},
"needs_background": "light"
},
"output_type": "execute_result"
}
],
"source": [
"fig = data.plot.scatter('x', 'y', s=40, grid=True)\n",
"xx = np.linspace(-2, 15, 100)\n",
"yy = res.predict(pd.DataFrame({'x': xx}))\n",
"fig.plot(xx, yy, color='green')"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"data2 = pd.DataFrame({\n",
" 'x1': [1, 2, 4, 5, 6, 7.7, 8.2, 9, 10, 11],\n",
" 'x2': [4, 5, 4, 5, 6, 7.7, 7, 7.9, 8, 8.1],\n",
" 'y': [3, 3.6, 4.2, 4.1, 5, 4.9, 5.5, 6, 6.1, 6.5],\n",
"})"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"model2 = smf.ols('y ~ x1 + x2', data=data2)\n",
"res2 = model2.fit()"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {
"collapsed": false
},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/usr/local/lib/python3.6/dist-packages/scipy/stats/stats.py:1535: UserWarning: kurtosistest only valid for n>=20 ... continuing anyway, n=10\n",
" \"anyway, n=%i\" % int(n))\n"
]
},
{
"data": {
"text/html": [
"\n",
"OLS Regression Results\n",
"\n",
" Dep. Variable: | y | R-squared: | 0.962 | \n",
"
\n",
"\n",
" Model: | OLS | Adj. R-squared: | 0.951 | \n",
"
\n",
"\n",
" Method: | Least Squares | F-statistic: | 88.75 | \n",
"
\n",
"\n",
" Date: | Sun, 28 Jun 2020 | Prob (F-statistic): | 1.06e-05 | \n",
"
\n",
"\n",
" Time: | 09:20:54 | Log-Likelihood: | 1.2542 | \n",
"
\n",
"\n",
" No. Observations: | 10 | AIC: | 3.492 | \n",
"
\n",
"\n",
" Df Residuals: | 7 | BIC: | 4.399 | \n",
"
\n",
"\n",
" Df Model: | 2 | | | \n",
"
\n",
"\n",
" Covariance Type: | nonrobust | | | \n",
"
\n",
"
\n",
"\n",
"\n",
" | coef | std err | t | P>|t| | [0.025 | 0.975] | \n",
"
\n",
"\n",
" Intercept | 2.8433 | 0.496 | 5.730 | 0.001 | 1.670 | 4.017 | \n",
"
\n",
"\n",
" x1 | 0.3503 | 0.069 | 5.063 | 0.001 | 0.187 | 0.514 | \n",
"
\n",
"\n",
" x2 | -0.0306 | 0.139 | -0.219 | 0.833 | -0.360 | 0.299 | \n",
"
\n",
"
\n",
"\n",
"\n",
" Omnibus: | 1.553 | Durbin-Watson: | 2.877 | \n",
"
\n",
"\n",
" Prob(Omnibus): | 0.460 | Jarque-Bera (JB): | 0.992 | \n",
"
\n",
"\n",
" Skew: | -0.707 | Prob(JB): | 0.609 | \n",
"
\n",
"\n",
" Kurtosis: | 2.383 | Cond. No. | 61.4 | \n",
"
\n",
"
Warnings:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified."
],
"text/plain": [
"\n",
"\"\"\"\n",
" OLS Regression Results \n",
"==============================================================================\n",
"Dep. Variable: y R-squared: 0.962\n",
"Model: OLS Adj. R-squared: 0.951\n",
"Method: Least Squares F-statistic: 88.75\n",
"Date: Sun, 28 Jun 2020 Prob (F-statistic): 1.06e-05\n",
"Time: 09:20:54 Log-Likelihood: 1.2542\n",
"No. Observations: 10 AIC: 3.492\n",
"Df Residuals: 7 BIC: 4.399\n",
"Df Model: 2 \n",
"Covariance Type: nonrobust \n",
"==============================================================================\n",
" coef std err t P>|t| [0.025 0.975]\n",
"------------------------------------------------------------------------------\n",
"Intercept 2.8433 0.496 5.730 0.001 1.670 4.017\n",
"x1 0.3503 0.069 5.063 0.001 0.187 0.514\n",
"x2 -0.0306 0.139 -0.219 0.833 -0.360 0.299\n",
"==============================================================================\n",
"Omnibus: 1.553 Durbin-Watson: 2.877\n",
"Prob(Omnibus): 0.460 Jarque-Bera (JB): 0.992\n",
"Skew: -0.707 Prob(JB): 0.609\n",
"Kurtosis: 2.383 Cond. No. 61.4\n",
"==============================================================================\n",
"\n",
"Warnings:\n",
"[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.\n",
"\"\"\""
]
},
"execution_count": 12,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"res2.summary()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (system-wide)",
"language": "python",
"metadata": {
"cocalc": {
"description": "Python 3 programming language",
"priority": 100,
"url": "https://www.python.org/"
}
},
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.9"
}
},
"nbformat": 4,
"nbformat_minor": 4
}