{
"cells": [
{
"cell_type": "markdown",
"metadata": {
"collapsed": false
},
"source": [
"# PyCaret in CoCalc\n",
"\n",
"https://pycaret.org/\n",
"\n",
"Kernel: Python 3 (system-wide) in Ubuntu 20.04\n",
"\n",
"Example: https://github.com/pycaret/pycaret/blob/master/examples/PyCaret%202%20Anomaly%20Detection.ipynb"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/plain": [
"'2.1.2'"
]
},
"execution_count": 1,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"from pycaret.utils import version\n",
"version()"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"
\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Col1 | \n",
" Col2 | \n",
" Col3 | \n",
" Col4 | \n",
" Col5 | \n",
" Col6 | \n",
" Col7 | \n",
" Col8 | \n",
" Col9 | \n",
" Col10 | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.263995 | \n",
" 0.764929 | \n",
" 0.138424 | \n",
" 0.935242 | \n",
" 0.605867 | \n",
" 0.518790 | \n",
" 0.912225 | \n",
" 0.608234 | \n",
" 0.723782 | \n",
" 0.733591 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.546092 | \n",
" 0.653975 | \n",
" 0.065575 | \n",
" 0.227772 | \n",
" 0.845269 | \n",
" 0.837066 | \n",
" 0.272379 | \n",
" 0.331679 | \n",
" 0.429297 | \n",
" 0.367422 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.336714 | \n",
" 0.538842 | \n",
" 0.192801 | \n",
" 0.553563 | \n",
" 0.074515 | \n",
" 0.332993 | \n",
" 0.365792 | \n",
" 0.861309 | \n",
" 0.899017 | \n",
" 0.088600 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.092108 | \n",
" 0.995017 | \n",
" 0.014465 | \n",
" 0.176371 | \n",
" 0.241530 | \n",
" 0.514724 | \n",
" 0.562208 | \n",
" 0.158963 | \n",
" 0.073715 | \n",
" 0.208463 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.325261 | \n",
" 0.805968 | \n",
" 0.957033 | \n",
" 0.331665 | \n",
" 0.307923 | \n",
" 0.355315 | \n",
" 0.501899 | \n",
" 0.558449 | \n",
" 0.885169 | \n",
" 0.182754 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Col1 Col2 Col3 Col4 Col5 Col6 Col7 \\\n",
"0 0.263995 0.764929 0.138424 0.935242 0.605867 0.518790 0.912225 \n",
"1 0.546092 0.653975 0.065575 0.227772 0.845269 0.837066 0.272379 \n",
"2 0.336714 0.538842 0.192801 0.553563 0.074515 0.332993 0.365792 \n",
"3 0.092108 0.995017 0.014465 0.176371 0.241530 0.514724 0.562208 \n",
"4 0.325261 0.805968 0.957033 0.331665 0.307923 0.355315 0.501899 \n",
"\n",
" Col8 Col9 Col10 \n",
"0 0.608234 0.723782 0.733591 \n",
"1 0.331679 0.429297 0.367422 \n",
"2 0.861309 0.899017 0.088600 \n",
"3 0.158963 0.073715 0.208463 \n",
"4 0.558449 0.885169 0.182754 "
]
},
"execution_count": 2,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"from pycaret.datasets import get_data\n",
"data = get_data('anomaly')\n"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {
"cocalc": {
"outputs": {
"5": {
"name": "input",
"opts": {
"password": false,
"prompt": ""
},
"output_type": "stream"
}
}
},
"collapsed": false
},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "af7d6d64a96b455ba125468d6c61d894",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"IntProgress(value=0, description='Processing: ', max=4)"
]
},
"execution_count": 3,
"metadata": {
},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Initiated | \n",
" . . . . . . . . . . . . . . . . . . | \n",
" 10:22:36 | \n",
"
\n",
" \n",
" Status | \n",
" . . . . . . . . . . . . . . . . . . | \n",
" Loading Dependencies | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" \n",
" \n",
"Initiated . . . . . . . . . . . . . . . . . . 10:22:36\n",
"Status . . . . . . . . . . . . . . . . . . Loading Dependencies"
]
},
"execution_count": 3,
"metadata": {
},
"output_type": "execute_result",
"transient": {
"display_id": "monitor"
}
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" | \n",
" | \n",
" | \n",
"
\n",
" \n",
" \n",
" \n",
" Initiated | \n",
" . . . . . . . . . . . . . . . . . . | \n",
" 10:22:36 | \n",
"
\n",
" \n",
" Status | \n",
" . . . . . . . . . . . . . . . . . . | \n",
" Preparing Data for Modeling | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" \n",
" \n",
"Initiated . . . . . . . . . . . . . . . . . . 10:22:36\n",
"Status . . . . . . . . . . . . . . . . . . Preparing Data for Modeling"
]
},
"execution_count": 3,
"metadata": {
},
"output_type": "execute_result",
"transient": {
"display_id": "monitor"
}
},
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "067b8230d9754542b66a1853f3870452",
"version_major": 2,
"version_minor": 0
},
"text/plain": [
"Text(value=\"Following data types have been inferred automatically, if they are correct press enter to continue…"
]
},
"execution_count": 3,
"metadata": {
},
"output_type": "execute_result"
},
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Data Type | \n",
"
\n",
" \n",
" \n",
" \n",
" Col1 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col2 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col3 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col4 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col5 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col6 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col7 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col8 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col9 | \n",
" Numeric | \n",
"
\n",
" \n",
" Col10 | \n",
" Numeric | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Data Type\n",
"Col1 Numeric\n",
"Col2 Numeric\n",
"Col3 Numeric\n",
"Col4 Numeric\n",
"Col5 Numeric\n",
"Col6 Numeric\n",
"Col7 Numeric\n",
"Col8 Numeric\n",
"Col9 Numeric\n",
"Col10 Numeric"
]
},
"execution_count": 3,
"metadata": {
},
"output_type": "execute_result"
},
{
"name": "stdout",
"output_type": "stream",
"text": " "
}
],
"source": [
"from pycaret.anomaly import *\n",
"ano1 = setup(data, session_id=123, log_experiment=True, experiment_name='anomaly1')\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"models()"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"iforest = create_model('iforest')"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"knn = create_model('knn', fraction = 0.1)"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
"iforest_results = assign_model(iforest)\n",
"iforest_results.head()"
]
},
{
"cell_type": "code",
"execution_count": 11,
"metadata": {
"collapsed": false
},
"outputs": [
{
"data": {
"text/html": [
"\n",
"\n",
"
\n",
" \n",
" \n",
" | \n",
" Col1 | \n",
" Col2 | \n",
" Col3 | \n",
" Col4 | \n",
" Col5 | \n",
" Col6 | \n",
" Col7 | \n",
" Col8 | \n",
" Col9 | \n",
" Col10 | \n",
" Label | \n",
" Score | \n",
"
\n",
" \n",
" \n",
" \n",
" 0 | \n",
" 0.263995 | \n",
" 0.764929 | \n",
" 0.138424 | \n",
" 0.935242 | \n",
" 0.605867 | \n",
" 0.518790 | \n",
" 0.912225 | \n",
" 0.608234 | \n",
" 0.723782 | \n",
" 0.733591 | \n",
" 0 | \n",
" -0.035865 | \n",
"
\n",
" \n",
" 1 | \n",
" 0.546092 | \n",
" 0.653975 | \n",
" 0.065575 | \n",
" 0.227772 | \n",
" 0.845269 | \n",
" 0.837066 | \n",
" 0.272379 | \n",
" 0.331679 | \n",
" 0.429297 | \n",
" 0.367422 | \n",
" 0 | \n",
" -0.084927 | \n",
"
\n",
" \n",
" 2 | \n",
" 0.336714 | \n",
" 0.538842 | \n",
" 0.192801 | \n",
" 0.553563 | \n",
" 0.074515 | \n",
" 0.332993 | \n",
" 0.365792 | \n",
" 0.861309 | \n",
" 0.899017 | \n",
" 0.088600 | \n",
" 1 | \n",
" 0.025356 | \n",
"
\n",
" \n",
" 3 | \n",
" 0.092108 | \n",
" 0.995017 | \n",
" 0.014465 | \n",
" 0.176371 | \n",
" 0.241530 | \n",
" 0.514724 | \n",
" 0.562208 | \n",
" 0.158963 | \n",
" 0.073715 | \n",
" 0.208463 | \n",
" 1 | \n",
" 0.042415 | \n",
"
\n",
" \n",
" 4 | \n",
" 0.325261 | \n",
" 0.805968 | \n",
" 0.957033 | \n",
" 0.331665 | \n",
" 0.307923 | \n",
" 0.355315 | \n",
" 0.501899 | \n",
" 0.558449 | \n",
" 0.885169 | \n",
" 0.182754 | \n",
" 0 | \n",
" -0.023408 | \n",
"
\n",
" \n",
"
\n",
"
"
],
"text/plain": [
" Col1 Col2 Col3 Col4 Col5 Col6 Col7 \\\n",
"0 0.263995 0.764929 0.138424 0.935242 0.605867 0.518790 0.912225 \n",
"1 0.546092 0.653975 0.065575 0.227772 0.845269 0.837066 0.272379 \n",
"2 0.336714 0.538842 0.192801 0.553563 0.074515 0.332993 0.365792 \n",
"3 0.092108 0.995017 0.014465 0.176371 0.241530 0.514724 0.562208 \n",
"4 0.325261 0.805968 0.957033 0.331665 0.307923 0.355315 0.501899 \n",
"\n",
" Col8 Col9 Col10 Label Score \n",
"0 0.608234 0.723782 0.733591 0 -0.035865 \n",
"1 0.331679 0.429297 0.367422 0 -0.084927 \n",
"2 0.861309 0.899017 0.088600 1 0.025356 \n",
"3 0.158963 0.073715 0.208463 1 0.042415 \n",
"4 0.558449 0.885169 0.182754 0 -0.023408 "
]
},
"execution_count": 11,
"metadata": {
},
"output_type": "execute_result"
}
],
"source": [
"pred_new = predict_model(iforest, data=data)\n",
"pred_new.head()\n"
]
},
{
"cell_type": "code",
"execution_count": 0,
"metadata": {
"collapsed": false
},
"outputs": [
],
"source": [
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (system-wide)",
"language": "python",
"metadata": {
"cocalc": {
"description": "Python 3 programming language",
"priority": 100,
"url": "https://www.python.org/"
}
},
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.8.5"
}
},
"nbformat": 4,
"nbformat_minor": 4
}