Pregunta B
Pregunta B
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"toc_visible": true
},
"kernelspec": {
"name": "python3",
},
"language_info": {
"name": "python"
},
"cells": [
"cell_type": "code",
"source": [
"\n",
"import sklearn\n",
"import joblib\n",
"\n",
"datos = pd.read_csv(\"titanic.csv\")\n",
"print(datos)"
],
"metadata": {
"id": "pJVtfeiiO97v",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "62015fa4-6f87-4069-dfc1-ae5ab2d1681c"
},
"execution_count": 5,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"0 1 1 0 3 \n",
"1 2 2 1 1 \n",
"2 3 3 1 3 \n",
"3 4 4 1 1 \n",
"4 5 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"\n",
"\n",
}
]
},
"cell_type": "code",
"source": [
"datos.head()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 449
},
"id": "UAszVV7AG_m0",
"outputId": "1dfa826f-e54e-4ac3-d993-08e10be32abe"
},
"execution_count": 6,
"outputs": [
"output_type": "execute_result",
"data": {
"text/plain": [
"0 1 1 0 3 \n",
"1 2 2 1 1 \n",
"2 3 3 1 3 \n",
"3 4 4 1 1 \n",
"4 5 5 0 3 \n",
"\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"\n",
],
"text/html": [
"\n",
" <div>\n",
"<style scoped>\n",
" }\n",
"\n",
" }\n",
"\n",
" }\n",
"</style>\n",
" <thead>\n",
" <th></th>\n",
" <th>PassengerId</th>\n",
" <th>Survived</th>\n",
" <th>Pclass</th>\n",
" <th>Name</th>\n",
" <th>Sex</th>\n",
" <th>Age</th>\n",
" <th>SibSp</th>\n",
" <th>Parch</th>\n",
" <th>Ticket</th>\n",
" <th>Fare</th>\n",
" <th>Cabin</th>\n",
" <th>Embarked</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>22.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>7.2500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>38.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>71.2833</td>\n",
" <td>C85</td>\n",
" <td>C</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>3</td>\n",
" <td>female</td>\n",
" <td>26.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>7.9250</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>female</td>\n",
" <td>35.0</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>113803</td>\n",
" <td>53.1000</td>\n",
" <td>C123</td>\n",
" <td>S</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>5</td>\n",
" <td>5</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>male</td>\n",
" <td>35.0</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>373450</td>\n",
" <td>8.0500</td>\n",
" <td>NaN</td>\n",
" <td>S</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>\n",
"\n",
" style=\"display:none;\">\n",
"\n",
" </button>\n",
"\n",
" <style>\n",
" display:flex;\n",
" }\n",
"\n",
" }\n",
"\n",
" box-shadow: 0px 1px 2px rgba(60, 64, 67, 0.3), 0px 1px 3px 1px rgba(60, 64, 67, 0.15);\
n",
" }\n",
"\n",
" }\n",
"\n",
" }\n",
"\n",
" }\n",
" </style>\n",
"\n",
" <script>\n",
"\n",
" const docLinkHtml = 'Like what you see? Visit the ' +\n",
" element.appendChild(docLink);\n",
" }\n",
" </script>\n",
" </div>\n",
"\n",
"\n",
"<div id=\"df-8d6b80eb-e09e-4525-95bd-da535c827163\">\n",
" style=\"display:none;\">\n",
"\n",
" width=\"24px\">\n",
" <g>\n",
" <path d=\"M19 3H5c-1.1 0-2 .9-2 2v14c0 1.1.9 2 2 2h14c1.1 0 2-.9 2-2V5c0-1.1-.9-2-
2-2zM9 17H7v-7h2v7zm4 0h-2V7h2v10zm4 0h-2v-4h2v4z\"/>\n",
" </g>\n",
"</svg>\n",
" </button>\n",
"\n",
"<style>\n",
" }\n",
"\n",
" }\n",
"\n",
" }\n",
"\n",
" box-shadow: 0 1px 2px rgba(60, 64, 67, 0.3), 0 1px 3px 1px rgba(60, 64, 67, 0.15);\n",
" }\n",
"\n",
" .colab-df-quickchart-complete:disabled,\n",
" }\n",
"\n",
" animation:\n",
" }\n",
"\n",
" 0% {\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
" }\n",
"</style>\n",
"\n",
" <script>\n",
" quickchartButtonEl.classList.add('colab-df-spinner');\n",
" }\n",
" quickchartButtonEl.classList.remove('colab-df-spinner');\n",
" quickchartButtonEl.classList.add('colab-df-quickchart-complete');\n",
" }\n",
" })();\n",
" </script>\n",
"</div>\n",
"\n",
" </div>\n",
" </div>\n"
],
"application/vnd.google.colaboratory.intrinsic+json": {
"type": "dataframe",
"variable_name": "datos",
},
"metadata": {},
"execution_count": 6
},
"cell_type": "code",
"source": [
"print(datos)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "kEsJVE_YBf2Y",
"outputId": "d8529887-e83a-4abe-83ac-fa5c06152a7a"
},
"execution_count": 7,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"0 1 1 0 3 \n",
"1 2 2 1 1 \n",
"2 3 3 1 3 \n",
"3 4 4 1 1 \n",
"4 5 5 0 3 \n",
"\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"\n",
"\n",
},
"cell_type": "markdown",
"source": [
"###Validamos los Datos nulos. Cuantos nulos hay por cada campo?"
],
"metadata": {
"id": "f2U75-rn-QUv"
},
"cell_type": "code",
"source": [
"datos.isnull().sum()"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "1QsQJJycHHGe",
"outputId": "8d80b334-eed6-4052-98b9-a160f554dd61"
},
"execution_count": 8,
"outputs": [
"output_type": "execute_result",
"data": {
"text/plain": [
"Unnamed: 0 0\n",
"PassengerId 0\n",
"Survived 0\n",
"Pclass 0\n",
"Name 0\n",
"Sex 0\n",
"Age 263\n",
"SibSp 0\n",
"Parch 0\n",
"Ticket 0\n",
"Fare 1\n",
"Cabin 1014\n",
"Embarked 2\n",
"dtype: int64"
]
},
"metadata": {},
"execution_count": 8
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "bqXp9z8U-d3X"
},
"cell_type": "code",
"source": [
],
"metadata": {
"id": "U3065ksQHaF4"
},
"execution_count": 9,
"outputs": []
},
"cell_type": "code",
"source": [
"print(datos)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "64dNhD-PBTWx",
"outputId": "35fb451d-94c7-4125-fc83-0fb72a21e5fd"
},
"execution_count": 10,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
" Name Sex Age SibSp \\\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... female 38.0 1 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) female 35.0 1 \n",
"\n",
"\n",
}
]
},
"cell_type": "markdown",
"source": [
"##LLenar los datos nulos encontrados en la columna edad por la media de estos."
],
"metadata": {
"id": "uDNTo4ChJVt6"
},
"cell_type": "code",
"source": [
"datos['Age'].fillna(datos['Age'].mean(), inplace=True)\n",
"datos['Fare'].fillna(datos['Fare'].mean(), inplace=True)\n",
"#datos['Embarked'].fillna(datos['Embarked'].mean(), inplace=True)"
],
"metadata": {
"id": "X9oRwHOtHnVz"
},
"execution_count": 11,
"outputs": []
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "uWQUBCMMJzWP"
},
"cell_type": "code",
"source": [
"print(datos['Embarked'].mode())"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "W4mkzbWbIMDa",
"outputId": "cfc13ae0-89d7-4199-fde3-65c7a4e524df"
},
"execution_count": 12,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"0 S\n",
},
{
"cell_type": "markdown",
"source": [
"## Reemplazar en la columna Embarked los datos nulos con el dato de la moda."
],
"metadata": {
"id": "mY4wCJUiKIea"
},
"cell_type": "code",
"source": [
"datos['Embarked'].fillna(datos['Embarked'].mode()[0], inplace=True)"
],
"metadata": {
"id": "E9KbpD6dIyMc"
},
"execution_count": 13,
"outputs": []
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "yhSIJ7k7OIMq"
},
{
"cell_type": "code",
"source": [
"print(datos)"
],
"metadata": {
"id": "lyDrkKghJGUb",
"colab": {
"base_uri": "https://localhost:8080/"
},
"outputId": "cca2e001-6100-451d-e063-8114cec9951e"
},
"execution_count": 14,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"0 1 0 3 \n",
"1 2 1 1 \n",
"2 3 1 3 \n",
"3 4 1 1 \n",
"4 5 0 3 \n",
"\n",
"1 Cumings, Mrs. John Bradley (Florence Briggs Th... 1 38.000000 \n",
"3 Futrelle, Mrs. Jacques Heath (Lily May Peel) 1 35.000000 \n",
"\n",
"\n",
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "GgsMgQySTtSL"
},
"cell_type": "code",
"source": [
"#Y = datos['Survived']"
],
"metadata": {
"id": "8mXzWuYgKtep"
},
"execution_count": 15,
"outputs": []
},
"cell_type": "markdown",
"source": [
"#Despues del preprocesamiento pasamos a hacer la predicción si una persona sobrevive o no
sobrevive"
],
"metadata": {
"id": "2DsUGjDUUNhF"
},
"cell_type": "markdown",
"source": [],
"metadata": {
"id": "icO21PZ1Tshz"
},
"source": [
"\n",
"\n",
"y = datos['Survived']\n",
"print(datos.columns)"
],
"cell_type": "code",
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "TkL5rdgQPa9q",
"outputId": "8eb275f8-0a71-4d24-f4a3-88b93a09e0a6"
},
"execution_count": 16,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
" dtype='object')\n"
},
"cell_type": "code",
"source": [
"\n",
"\n"
],
"metadata": {
"id": "QckOtN2RRInN"
},
"execution_count": 17,
"outputs": []
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "37saVOWldUkF"
},
"cell_type": "code",
"source": [
"model = LogisticRegression()\n",
"model.fit(X_train, y_train)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/",
"height": 234
},
"id": "aPEPc-w8dYGE",
"outputId": "8f4420b3-0298-4687-ccfa-5d6d75862a4e"
},
"execution_count": 18,
"outputs": [
"output_type": "stream",
"name": "stderr",
"text": [
"/usr/local/lib/python3.10/dist-packages/sklearn/linear_model/_logistic.py:458:
ConvergenceWarning: lbfgs failed to converge (status=1):\n",
"STOP: TOTAL NO. of ITERATIONS REACHED LIMIT.\n",
"\n",
"Increase the number of iterations (max_iter) or scale the data as shown in:\n",
" https://scikit-learn.org/stable/modules/preprocessing.html\n",
" https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression\n",
},
"output_type": "execute_result",
"data": {
"text/plain": [
"LogisticRegression()"
],
"text/html": [
},
"metadata": {},
"execution_count": 18
},
{
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "tzt1W5ECgTkm"
},
"cell_type": "code",
"source": [
"X_train_prediction = model.predict(X_train)\n",
"print(accuracy)\n"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "xQSCpnyEgWCf",
"outputId": "66a6f76c-90a4-4a4f-bba6-8987b5b135fd"
},
"execution_count": 19,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"0.8586437440305635\n"
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "CRZyG0GCW874"
},
"cell_type": "code",
"source": [
"X_test_prediction = model.predict(X_test)\n",
"print(test_data_accuracy)"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LSxlMccoXDoA",
"outputId": "597d0638-4cb5-4284-c733-3eaeed6026b3"
},
"execution_count": 20,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"0.8625954198473282\n"
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "9-_vTDSSSV8V"
},
"cell_type": "code",
"source": [
"joblib.dump(model, 'modelo_entrenado')"
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "5LKJYKY5Sdcw",
"outputId": "370ab248-bb90-4090-f488-251924e6c526"
},
"execution_count": 21,
"outputs": [
"output_type": "execute_result",
"data": {
"text/plain": [
"['modelo_entrenado']"
},
"metadata": {},
"execution_count": 21
},
"cell_type": "markdown",
"source": [],
"metadata": {
"id": "ZjgxjiVkWyeZ"
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "qUMHc756k2kV"
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "QxreEhODmwC3"
},
"cell_type": "code",
"source": [],
"metadata": {
"id": "YvWfNadvI5gi"
},
"execution_count": null,
"outputs": []
},
"cell_type": "code",
"source": [],
"metadata": {
"id": "Zj8rcQgd-l2Y"
},
"execution_count": 22,
"outputs": []
},
"cell_type": "markdown",
"source": [
"### b.\tQue hubiera ocurrido si más del 50% de los pasajeros del barco hubiesen sido
varones entre 30 y 60 años"
],
"metadata": {
"id": "da-jMoOV-oOu"
},
"cell_type": "markdown",
"source": [
"##### **Modificación** del Dataset con las condiciones pedidas para el pronostico"
],
"metadata": {
"id": "DwogFk1m_Fuh"
},
"cell_type": "code",
"source": [
"#Cantidad de hombres\n",
"\n",
"datos = pd.read_csv('titanic.csv')\n",
"\n",
"\n",
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "DAVPee9J_MqZ",
"outputId": "ed4c9193-0b0d-459a-fbc4-389b163e2c3b"
},
"execution_count": 25,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
},
"cell_type": "markdown",
"source": [
"####Cantidad de hombres entre 30 y 60 años"
],
"metadata": {
"id": "I0QkHxz_F92-"
},
"cell_type": "code",
"source": [
"\n",
"hombres_30_60 = datos[(datos['Sex'] == 'male') & (datos['Age'] >= 30) & (datos['Age'] <=
60)]\n",
"\n",
"num_hombres_30_60 = hombres_30_60.shape[0]\n",
"\n",
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "Sg_0m-awF54c",
"outputId": "e4e96a9a-7d5e-4191-8ad0-72a336007d48"
},
"execution_count": 28,
"outputs": [
{
"output_type": "stream",
"name": "stdout",
"text": [
},
"cell_type": "markdown",
"source": [
"#### Reemplazo los 422 hombres con edades entre los 30 y 60"
],
"metadata": {
"id": "-7SWrDCUIMjj"
},
"cell_type": "code",
"source": [
"\n",
"\n",
"\n",
" raise ValueError(\"No hay suficientes hombres en el dataset para reemplazar las
edades.\")\n",
"\n",
"\n",
"\n",
"# Reemplazar las edades de los hombres seleccionados con las nuevas edades\n",
"\n",
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "f39Ze1q8IY_s",
"outputId": "d509af3b-8543-4c15-c191-26debd0caade"
},
"execution_count": 29,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"\n",
},
"cell_type": "markdown",
"source": [
"#### Verificación"
],
"metadata": {
"id": "yjAMBAxDInNw"
},
"cell_type": "code",
"source": [
"hombres_30_60 = datos[(datos['Sex'] == 'male') & (datos['Age'] >= 30) & (datos['Age'] <=
60)]\n",
"\n",
"num_hombres_30_60 = hombres_30_60.shape[0]\n",
"\n",
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "LVEVTCAkIsKG",
"outputId": "0ce7a198-2201-48ac-d7c9-43b99e7b3722"
},
"execution_count": 30,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
},
"cell_type": "markdown",
"source": [
],
"metadata": {
"id": "cp4ukgo5I7X6"
},
"cell_type": "code",
"source": [
"\n",
"Pasajeros = [\n",
"]\n",
"\n",
"\n",
],
"metadata": {
"colab": {
"base_uri": "https://localhost:8080/"
},
"id": "nMRG7aQd7M-D",
"outputId": "9eabd562-8db1-41d6-ebf3-b1a79bd65cd0"
},
"execution_count": 33,
"outputs": [
"output_type": "stream",
"name": "stdout",
"text": [
"Pasajero 1: Muerto\n",
"Pasajero 2: Muerto\n",
"Pasajero 3: Muerto\n"
},
"output_type": "stream",
"name": "stderr",
"text": [
" warnings.warn(\n",
" warnings.warn(\n",
" warnings.warn(\n"
]