{ "cells": [ { "cell_type": "code", "execution_count": 186, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "# import libraries\n", "import numpy as np\n", "import pandas as pd\n", "import seaborn as sn\n", "import matplotlib.pyplot as plt\n", "from matplotlib import rcParams\n", "from matplotlib.cm import rainbow\n", "%matplotlib inline\n", "import statsmodels.api as sm\n", "import scipy.stats as st\n", "import warnings\n", "warnings.filterwarnings('ignore')" ] }, { "cell_type": "code", "execution_count": 187, "metadata": { "collapsed": false }, "outputs": [ ], "source": [ "# Sklearn library for implementing Machine Learning models and processing of data\n", "from sklearn.model_selection import train_test_split\n", "from sklearn import preprocessing\n", "from sklearn.preprocessing import StandardScaler\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.utils import shuffle\n", "from sklearn.preprocessing import LabelEncoder\n", "from sklearn.neighbors import KNeighborsClassifier\n", "from sklearn.svm import SVC\n", "from sklearn.tree import DecisionTreeClassifier\n", "from sklearn.ensemble import RandomForestClassifier" ] }, { "cell_type": "code", "execution_count": 188, "metadata": { "collapsed": false }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Cancer data set dimensions : (569, 31)\n" ] }, { "data": { "text/html": [ "
\n", " | radius.mean | \n", "texture.mean | \n", "perimeter.mean | \n", "area.mean | \n", "smoothness.mean | \n", "compactness.mean | \n", "concavity.mean | \n", "concave points.mean | \n", "symmetry.mean | \n", "fractal dimension | \n", "... | \n", "texture.w | \n", "perimeter.w | \n", "area.w | \n", "smoothness.w | \n", "compactness.w | \n", "concavity.w | \n", "concave points.w | \n", "symmetry.w | \n", "fractal dimension.w | \n", "Diagnosis | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
0 | \n", "17.99 | \n", "10.38 | \n", "122.80 | \n", "1001.0 | \n", "0.11840 | \n", "0.27760 | \n", "0.3001 | \n", "0.14710 | \n", "0.2419 | \n", "0.07871 | \n", "... | \n", "17.33 | \n", "184.60 | \n", "2019.0 | \n", "0.1622 | \n", "0.6656 | \n", "0.7119 | \n", "0.2654 | \n", "0.4601 | \n", "0.11890 | \n", "M | \n", "
1 | \n", "20.57 | \n", "17.77 | \n", "132.90 | \n", "1326.0 | \n", "0.08474 | \n", "0.07864 | \n", "0.0869 | \n", "0.07017 | \n", "0.1812 | \n", "0.05667 | \n", "... | \n", "23.41 | \n", "158.80 | \n", "1956.0 | \n", "0.1238 | \n", "0.1866 | \n", "0.2416 | \n", "0.1860 | \n", "0.2750 | \n", "0.08902 | \n", "M | \n", "
2 | \n", "19.69 | \n", "21.25 | \n", "130.00 | \n", "1203.0 | \n", "0.10960 | \n", "0.15990 | \n", "0.1974 | \n", "0.12790 | \n", "0.2069 | \n", "0.05999 | \n", "... | \n", "25.53 | \n", "152.50 | \n", "1709.0 | \n", "0.1444 | \n", "0.4245 | \n", "0.4504 | \n", "0.2430 | \n", "0.3613 | \n", "0.08758 | \n", "M | \n", "
3 | \n", "11.42 | \n", "20.38 | \n", "77.58 | \n", "386.1 | \n", "0.14250 | \n", "0.28390 | \n", "0.2414 | \n", "0.10520 | \n", "0.2597 | \n", "0.09744 | \n", "... | \n", "26.50 | \n", "98.87 | \n", "567.7 | \n", "0.2098 | \n", "0.8663 | \n", "0.6869 | \n", "0.2575 | \n", "0.6638 | \n", "0.17300 | \n", "M | \n", "
4 | \n", "20.29 | \n", "14.34 | \n", "135.10 | \n", "1297.0 | \n", "0.10030 | \n", "0.13280 | \n", "0.1980 | \n", "0.10430 | \n", "0.1809 | \n", "0.05883 | \n", "... | \n", "16.67 | \n", "152.20 | \n", "1575.0 | \n", "0.1374 | \n", "0.2050 | \n", "0.4000 | \n", "0.1625 | \n", "0.2364 | \n", "0.07678 | \n", "M | \n", "
5 rows × 31 columns
\n", "