{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "from IPython.core.interactiveshell import InteractiveShell\n", "import numpy as np\n", "import pandas as pd\n", "import matplotlib.pyplot as plt # for plotting \n", "import seaborn as sns # for plotting\n", "from sklearn.ensemble import RandomForestClassifier\n", "from sklearn.metrics import accuracy_score" ] }, { "cell_type": "code", "execution_count": 2, "metadata": {}, "outputs": [], "source": [ "# set up some notebook display defaults\n", "InteractiveShell.ast_node_interactivity = \"all\"\n", "%matplotlib inline\n", "plt.style.use('default')\n", "sns.set()\n", "pd.options.display.float_format = '{:,.2f}'.format" ] }, { "cell_type": "code", "execution_count": 3, "metadata": {}, "outputs": [], "source": [ "# paths to datasets\n", "kaggle_trn = \"./data/titanic/train.csv\"\n", "kaggle_tst = \"./data/titanic/test.csv\"\n", "kaggle_tst_2 = \"./data/titanic/test_2.csv\"\n", "rek_k_tst = \"./data/titanic/rek_test.csv\"\n", "rek_k_tst2 = \"./data/titanic/rek_test_2.csv\"\n", "kaggle_trg = \"./data/titanic/target.csv\"\n", "kaggle_trg_2 = \"./data/titanic/target_2.csv\"" ] }, { "cell_type": "code", "execution_count": 4, "metadata": {}, "outputs": [], "source": [ "# load the datasets currently of interest\n", "k_trn = pd.read_csv(kaggle_trn)\n", "k_tst = pd.read_csv(rek_k_tst2)" ] }, { "cell_type": "code", "execution_count": 5, "metadata": {}, "outputs": [], "source": [ "Y = k_trn['Survived']\n", "\n", "features = ['Pclass', 'Sex', 'SibSp', 'Parch']\n", "X = pd.get_dummies(k_trn[features])\n", "X_test = pd.get_dummies(k_tst[features])" ] }, { "cell_type": "code", "execution_count": 6, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "RandomForestClassifier(max_depth=5, random_state=1)" ] }, "execution_count": 6, "metadata": {}, "output_type": "execute_result" } ], "source": [ "model = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=1)\n", "model.fit(X, Y)\n", "predictions = model.predict(X_test)" ] }, { "cell_type": "code", "execution_count": 7, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "array([0, 1, 0, 0, 1], dtype=int64)" ] }, "execution_count": 7, "metadata": {}, "output_type": "execute_result" } ], "source": [ "predictions[0:5]" ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0.7751196172248804" ] }, "execution_count": 8, "metadata": {}, "output_type": "execute_result" } ], "source": [ "accuracy_score(k_tst[\"Survived\"], predictions)" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "interpreter": { "hash": "a27d3f2bf68df5402465348834a2195030d3fc5bfc8e594e2a17c8c7e2447c85" }, "kernelspec": { "display_name": "Python 3.9.2 64-bit ('ds-3.9': conda)", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.9.2" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }