precesamiento-listo

This commit is contained in:
Miguel Salgado 2024-02-25 02:27:33 -08:00
parent 69d071c756
commit b1d8b0e9a6
6 changed files with 3214 additions and 2964 deletions

View File

@ -0,0 +1,789 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "043492dd-e09f-440f-ad35-e2e741860bba",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "558043e1-1724-4bf6-8acf-e85c18b0150e",
"metadata": {},
"outputs": [],
"source": [
"estados_mexicanos = {\n",
" \"AGUASCALIENTES\",\n",
" \"BAJA CALIFORNIA\",\n",
" \"BAJA CALIFORNIA SUR\",\n",
" \"CAMPECHE\",\n",
" \"CHIAPAS\",\n",
" \"CHIHUAHUA\",\n",
" \"COAHUILA DE ZARAGOZA\",\n",
" \"COLIMA\",\n",
" \"DISTRITO FEDERAL\",\n",
" \"DURANGO\",\n",
" \"GUANAJUATO\",\n",
" \"GUERRERO\",\n",
" \"HIDALGO\",\n",
" \"JALISCO\",\n",
" \"MEXICO\",\n",
" \"MICHOACAN DE OCAMPO\",\n",
" \"MORELOS\",\n",
" \"NAYARIT\",\n",
" \"NUEVO LEON\",\n",
" \"OAXACA\",\n",
" \"PUEBLA\",\n",
" \"QUERETARO DE ARTEAGA\",\n",
" \"QUINTANA ROO\",\n",
" \"SAN LUIS POTOSI\",\n",
" \"SINALOA\",\n",
" \"SONORA\",\n",
" \"TABASCO\",\n",
" \"TAMAULIPAS\",\n",
" \"TLAXCALA\",\n",
" \"VERACRUZ DE IGNACIO DE LA LLAVE\",\n",
" \"YUCATAN\",\n",
" \"ZACATECAS\",\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6b047178-2902-4eb2-9a34-0b7d7beb277e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_93495/3168623387.py:1: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(\"2010-2019.csv\")\n"
]
}
],
"source": [
"df = pd.read_csv(\"2010-2019.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "61675b16-391b-4821-8376-f92ec4b5b916",
"metadata": {},
"outputs": [],
"source": [
"def _ano_nacimiento_vivo_func(str_date):\n",
" try:\n",
" return str_date.split(\"/\")[-1]\n",
" except:\n",
" return \"\"\n",
"\n",
"\n",
"df[\"año_de_nacimiento_vivo\"] = df[\"fecha_nacimiento_nac_vivo\"].apply(\n",
" _ano_nacimiento_vivo_func\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "87a086d4-bab8-43a8-a121-8aaf3554e672",
"metadata": {},
"outputs": [],
"source": [
"df = df[(5 < df[\"edad_madre\"]) & (df[\"edad_madre\"] < 90)]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f8eff617-7273-435f-a09a-8db4ec005ee0",
"metadata": {},
"outputs": [],
"source": [
"df_trisomias = df[df[\"codigo_anomalia\"].apply(lambda x: \"Q9\" in str(x))]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "1ff41e12-b6cd-41db-bd1b-47c2aa21c45e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2010</th>\n",
" <td>930</td>\n",
" <td>30.546237</td>\n",
" <td>8.244939</td>\n",
" <td>10</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>1051</td>\n",
" <td>31.010466</td>\n",
" <td>8.193777</td>\n",
" <td>12</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>961</td>\n",
" <td>30.462019</td>\n",
" <td>8.310565</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>1055</td>\n",
" <td>31.182938</td>\n",
" <td>8.247919</td>\n",
" <td>11</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>1031</td>\n",
" <td>31.018429</td>\n",
" <td>8.356304</td>\n",
" <td>13</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015</th>\n",
" <td>1016</td>\n",
" <td>31.500984</td>\n",
" <td>8.295052</td>\n",
" <td>14</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>1044</td>\n",
" <td>31.453065</td>\n",
" <td>8.147413</td>\n",
" <td>14</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>1043</td>\n",
" <td>31.410355</td>\n",
" <td>8.174581</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>1059</td>\n",
" <td>31.064212</td>\n",
" <td>8.173198</td>\n",
" <td>13</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>941</td>\n",
" <td>32.018066</td>\n",
" <td>8.195918</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" edad_madre \n",
" count mean std min max\n",
"año_de_nacimiento_vivo \n",
"2010 930 30.546237 8.244939 10 48\n",
"2011 1051 31.010466 8.193777 12 49\n",
"2012 961 30.462019 8.310565 13 47\n",
"2013 1055 31.182938 8.247919 11 51\n",
"2014 1031 31.018429 8.356304 13 50\n",
"2015 1016 31.500984 8.295052 14 52\n",
"2016 1044 31.453065 8.147413 14 47\n",
"2017 1043 31.410355 8.174581 13 47\n",
"2018 1059 31.064212 8.173198 13 48\n",
"2019 941 32.018066 8.195918 13 47"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"consulta_trisomias = df_trisomias.groupby([\"año_de_nacimiento_vivo\"]).agg(\n",
" {\n",
" \"edad_madre\": [\n",
" \"count\",\n",
" \"mean\",\n",
" \"std\",\n",
" \"min\",\n",
" \"max\",\n",
" ],\n",
" }\n",
")\n",
"consulta_trisomias"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "942da486-5c14-4d37-a775-009151c68f29",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2010</th>\n",
" <td>2063533</td>\n",
" <td>25.253220</td>\n",
" <td>6.319567</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>2156751</td>\n",
" <td>25.234223</td>\n",
" <td>6.331894</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>2197327</td>\n",
" <td>25.195768</td>\n",
" <td>6.321840</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>2189257</td>\n",
" <td>25.198235</td>\n",
" <td>6.322081</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>2173773</td>\n",
" <td>25.276009</td>\n",
" <td>6.322130</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015</th>\n",
" <td>2143345</td>\n",
" <td>25.367835</td>\n",
" <td>6.296604</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>2079251</td>\n",
" <td>25.468008</td>\n",
" <td>6.292815</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>2037647</td>\n",
" <td>25.510821</td>\n",
" <td>6.305873</td>\n",
" <td>9</td>\n",
" <td>62</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>1940338</td>\n",
" <td>25.678051</td>\n",
" <td>6.328369</td>\n",
" <td>9</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>1867693</td>\n",
" <td>25.840630</td>\n",
" <td>6.342544</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" edad_madre \n",
" count mean std min max\n",
"año_de_nacimiento_vivo \n",
"2010 2063533 25.253220 6.319567 9 58\n",
"2011 2156751 25.234223 6.331894 9 58\n",
"2012 2197327 25.195768 6.321840 9 58\n",
"2013 2189257 25.198235 6.322081 9 59\n",
"2014 2173773 25.276009 6.322130 9 58\n",
"2015 2143345 25.367835 6.296604 9 59\n",
"2016 2079251 25.468008 6.292815 9 59\n",
"2017 2037647 25.510821 6.305873 9 62\n",
"2018 1940338 25.678051 6.328369 9 60\n",
"2019 1867693 25.840630 6.342544 9 58"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Edades de madres\n",
"consulta_total = df.groupby([\"año_de_nacimiento_vivo\"]).agg(\n",
" {\n",
" \"edad_madre\": [\n",
" \"count\",\n",
" \"mean\",\n",
" \"std\",\n",
" \"min\",\n",
" \"max\",\n",
" ],\n",
" }\n",
")\n",
"consulta_total"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "5290532e-d470-49b6-bd68-07eab1b86e4c",
"metadata": {},
"outputs": [],
"source": [
"consulta = consulta_total.join(\n",
" consulta_trisomias, rsuffix=\"_trisomias\", lsuffix=\"_general\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "7a171ccc-139d-4fd3-b438-0475dd43e27b",
"metadata": {},
"outputs": [],
"source": [
"consulta[\"porcentaje\"] = (\n",
" consulta[(\"edad_madre_trisomias\", \"count\")]\n",
" / consulta[(\"edad_madre_general\", \"count\")]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "2d932831-b2ce-46e4-a531-edd08d4d5ecb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre_general</th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre_trisomias</th>\n",
" <th>porcentaje</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2010</th>\n",
" <td>2063533</td>\n",
" <td>25.253220</td>\n",
" <td>6.319567</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>930</td>\n",
" <td>30.546237</td>\n",
" <td>8.244939</td>\n",
" <td>10</td>\n",
" <td>48</td>\n",
" <td>0.000451</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>2156751</td>\n",
" <td>25.234223</td>\n",
" <td>6.331894</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>1051</td>\n",
" <td>31.010466</td>\n",
" <td>8.193777</td>\n",
" <td>12</td>\n",
" <td>49</td>\n",
" <td>0.000487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>2197327</td>\n",
" <td>25.195768</td>\n",
" <td>6.321840</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>961</td>\n",
" <td>30.462019</td>\n",
" <td>8.310565</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" <td>0.000437</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>2189257</td>\n",
" <td>25.198235</td>\n",
" <td>6.322081</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" <td>1055</td>\n",
" <td>31.182938</td>\n",
" <td>8.247919</td>\n",
" <td>11</td>\n",
" <td>51</td>\n",
" <td>0.000482</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>2173773</td>\n",
" <td>25.276009</td>\n",
" <td>6.322130</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>1031</td>\n",
" <td>31.018429</td>\n",
" <td>8.356304</td>\n",
" <td>13</td>\n",
" <td>50</td>\n",
" <td>0.000474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015</th>\n",
" <td>2143345</td>\n",
" <td>25.367835</td>\n",
" <td>6.296604</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" <td>1016</td>\n",
" <td>31.500984</td>\n",
" <td>8.295052</td>\n",
" <td>14</td>\n",
" <td>52</td>\n",
" <td>0.000474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>2079251</td>\n",
" <td>25.468008</td>\n",
" <td>6.292815</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" <td>1044</td>\n",
" <td>31.453065</td>\n",
" <td>8.147413</td>\n",
" <td>14</td>\n",
" <td>47</td>\n",
" <td>0.000502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>2037647</td>\n",
" <td>25.510821</td>\n",
" <td>6.305873</td>\n",
" <td>9</td>\n",
" <td>62</td>\n",
" <td>1043</td>\n",
" <td>31.410355</td>\n",
" <td>8.174581</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" <td>0.000512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>1940338</td>\n",
" <td>25.678051</td>\n",
" <td>6.328369</td>\n",
" <td>9</td>\n",
" <td>60</td>\n",
" <td>1059</td>\n",
" <td>31.064212</td>\n",
" <td>8.173198</td>\n",
" <td>13</td>\n",
" <td>48</td>\n",
" <td>0.000546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>1867693</td>\n",
" <td>25.840630</td>\n",
" <td>6.342544</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>941</td>\n",
" <td>32.018066</td>\n",
" <td>8.195918</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" <td>0.000504</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" edad_madre_general \\\n",
" count mean std min max \n",
"año_de_nacimiento_vivo \n",
"2010 2063533 25.253220 6.319567 9 58 \n",
"2011 2156751 25.234223 6.331894 9 58 \n",
"2012 2197327 25.195768 6.321840 9 58 \n",
"2013 2189257 25.198235 6.322081 9 59 \n",
"2014 2173773 25.276009 6.322130 9 58 \n",
"2015 2143345 25.367835 6.296604 9 59 \n",
"2016 2079251 25.468008 6.292815 9 59 \n",
"2017 2037647 25.510821 6.305873 9 62 \n",
"2018 1940338 25.678051 6.328369 9 60 \n",
"2019 1867693 25.840630 6.342544 9 58 \n",
"\n",
" edad_madre_trisomias \\\n",
" count mean std min max \n",
"año_de_nacimiento_vivo \n",
"2010 930 30.546237 8.244939 10 48 \n",
"2011 1051 31.010466 8.193777 12 49 \n",
"2012 961 30.462019 8.310565 13 47 \n",
"2013 1055 31.182938 8.247919 11 51 \n",
"2014 1031 31.018429 8.356304 13 50 \n",
"2015 1016 31.500984 8.295052 14 52 \n",
"2016 1044 31.453065 8.147413 14 47 \n",
"2017 1043 31.410355 8.174581 13 47 \n",
"2018 1059 31.064212 8.173198 13 48 \n",
"2019 941 32.018066 8.195918 13 47 \n",
"\n",
" porcentaje \n",
" \n",
"año_de_nacimiento_vivo \n",
"2010 0.000451 \n",
"2011 0.000487 \n",
"2012 0.000437 \n",
"2013 0.000482 \n",
"2014 0.000474 \n",
"2015 0.000474 \n",
"2016 0.000502 \n",
"2017 0.000512 \n",
"2018 0.000546 \n",
"2019 0.000504 "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"consulta"
]
},
{
"cell_type": "markdown",
"id": "a3a83bc5-b01f-4bee-a17a-6ab5ba8458ad",
"metadata": {},
"source": [
"# Pendiente\n",
"\n",
"Generar gráfica de cajas con edades de las madres con hijos de trisomias.\n",
"\n",
"https://stackoverflow.com/a/66565512"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "46753c90-fca5-4b92-9f64-165460b03bd5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,849 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "043492dd-e09f-440f-ad35-e2e741860bba",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "558043e1-1724-4bf6-8acf-e85c18b0150e",
"metadata": {},
"outputs": [],
"source": [
"estados_mexicanos = {\n",
" \"AGUASCALIENTES\",\n",
" \"BAJA CALIFORNIA\",\n",
" \"BAJA CALIFORNIA SUR\",\n",
" \"CAMPECHE\",\n",
" \"CHIAPAS\",\n",
" \"CHIHUAHUA\",\n",
" \"COAHUILA DE ZARAGOZA\",\n",
" \"COLIMA\",\n",
" \"DISTRITO FEDERAL\",\n",
" \"DURANGO\",\n",
" \"GUANAJUATO\",\n",
" \"GUERRERO\",\n",
" \"HIDALGO\",\n",
" \"JALISCO\",\n",
" \"MEXICO\",\n",
" \"MICHOACAN DE OCAMPO\",\n",
" \"MORELOS\",\n",
" \"NAYARIT\",\n",
" \"NUEVO LEON\",\n",
" \"OAXACA\",\n",
" \"PUEBLA\",\n",
" \"QUERETARO DE ARTEAGA\",\n",
" \"QUINTANA ROO\",\n",
" \"SAN LUIS POTOSI\",\n",
" \"SINALOA\",\n",
" \"SONORA\",\n",
" \"TABASCO\",\n",
" \"TAMAULIPAS\",\n",
" \"TLAXCALA\",\n",
" \"VERACRUZ DE IGNACIO DE LA LLAVE\",\n",
" \"YUCATAN\",\n",
" \"ZACATECAS\",\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6b047178-2902-4eb2-9a34-0b7d7beb277e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_93405/3168623387.py:1: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(\"2010-2019.csv\")\n"
]
}
],
"source": [
"df = pd.read_csv(\"2010-2019.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "61675b16-391b-4821-8376-f92ec4b5b916",
"metadata": {},
"outputs": [],
"source": [
"def _ano_nacimiento_vivo_func(str_date):\n",
" try:\n",
" return str_date.split(\"/\")[-1]\n",
" except:\n",
" return \"\"\n",
"\n",
"\n",
"df[\"año_de_nacimiento_vivo\"] = df[\"fecha_nacimiento_nac_vivo\"].apply(\n",
" _ano_nacimiento_vivo_func\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 34,
"id": "87a086d4-bab8-43a8-a121-8aaf3554e672",
"metadata": {},
"outputs": [],
"source": [
"df = df[(5 < df[\"edad_madre\"]) & (df[\"edad_madre\"] < 90)]"
]
},
{
"cell_type": "code",
"execution_count": 35,
"id": "f8eff617-7273-435f-a09a-8db4ec005ee0",
"metadata": {},
"outputs": [],
"source": [
"df_trisomias = df[df[\"codigo_anomalia\"].apply(lambda x: \"Q9\" in str(x))]"
]
},
{
"cell_type": "code",
"execution_count": 36,
"id": "1ff41e12-b6cd-41db-bd1b-47c2aa21c45e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>edo_captura</th>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">AGUASCALIENTES</th>\n",
" <th>2010</th>\n",
" <td>29</td>\n",
" <td>30.586207</td>\n",
" <td>9.037688</td>\n",
" <td>16</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>34</td>\n",
" <td>33.823529</td>\n",
" <td>6.815626</td>\n",
" <td>17</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>30</td>\n",
" <td>29.600000</td>\n",
" <td>8.880820</td>\n",
" <td>17</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>30</td>\n",
" <td>30.400000</td>\n",
" <td>9.761289</td>\n",
" <td>15</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>23</td>\n",
" <td>28.956522</td>\n",
" <td>7.968565</td>\n",
" <td>19</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">ZACATECAS</th>\n",
" <th>2015</th>\n",
" <td>13</td>\n",
" <td>31.307692</td>\n",
" <td>7.951778</td>\n",
" <td>19</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>8</td>\n",
" <td>29.000000</td>\n",
" <td>9.971388</td>\n",
" <td>16</td>\n",
" <td>45</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>9</td>\n",
" <td>32.555556</td>\n",
" <td>10.013879</td>\n",
" <td>18</td>\n",
" <td>43</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>15</td>\n",
" <td>33.000000</td>\n",
" <td>8.026741</td>\n",
" <td>16</td>\n",
" <td>41</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>12</td>\n",
" <td>26.583333</td>\n",
" <td>5.468228</td>\n",
" <td>18</td>\n",
" <td>35</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>320 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" edad_madre \n",
" count mean std min max\n",
"edo_captura año_de_nacimiento_vivo \n",
"AGUASCALIENTES 2010 29 30.586207 9.037688 16 45\n",
" 2011 34 33.823529 6.815626 17 45\n",
" 2012 30 29.600000 8.880820 17 43\n",
" 2013 30 30.400000 9.761289 15 43\n",
" 2014 23 28.956522 7.968565 19 41\n",
"... ... ... ... .. ..\n",
"ZACATECAS 2015 13 31.307692 7.951778 19 43\n",
" 2016 8 29.000000 9.971388 16 45\n",
" 2017 9 32.555556 10.013879 18 43\n",
" 2018 15 33.000000 8.026741 16 41\n",
" 2019 12 26.583333 5.468228 18 35\n",
"\n",
"[320 rows x 5 columns]"
]
},
"execution_count": 36,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"consulta_trisomias = df_trisomias.groupby(\n",
" [\"edo_captura\", \"año_de_nacimiento_vivo\"]\n",
").agg(\n",
" {\n",
" \"edad_madre\": [\n",
" \"count\",\n",
" \"mean\",\n",
" \"std\",\n",
" \"min\",\n",
" \"max\",\n",
" ],\n",
" }\n",
")\n",
"consulta_trisomias"
]
},
{
"cell_type": "code",
"execution_count": 37,
"id": "942da486-5c14-4d37-a775-009151c68f29",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>edo_captura</th>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">AGUASCALIENTES</th>\n",
" <th>2010</th>\n",
" <td>26973</td>\n",
" <td>25.569792</td>\n",
" <td>6.364083</td>\n",
" <td>10</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>28361</td>\n",
" <td>25.576073</td>\n",
" <td>6.362188</td>\n",
" <td>11</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>28840</td>\n",
" <td>25.510576</td>\n",
" <td>6.370119</td>\n",
" <td>12</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>28695</td>\n",
" <td>25.435302</td>\n",
" <td>6.343520</td>\n",
" <td>10</td>\n",
" <td>53</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>28718</td>\n",
" <td>25.434048</td>\n",
" <td>6.321484</td>\n",
" <td>12</td>\n",
" <td>56</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">ZACATECAS</th>\n",
" <th>2015</th>\n",
" <td>30099</td>\n",
" <td>25.619157</td>\n",
" <td>6.412798</td>\n",
" <td>12</td>\n",
" <td>57</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>30118</td>\n",
" <td>25.641975</td>\n",
" <td>6.402262</td>\n",
" <td>10</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>29955</td>\n",
" <td>25.663896</td>\n",
" <td>6.365496</td>\n",
" <td>13</td>\n",
" <td>53</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>28235</td>\n",
" <td>25.756933</td>\n",
" <td>6.407207</td>\n",
" <td>10</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>26800</td>\n",
" <td>25.978619</td>\n",
" <td>6.402249</td>\n",
" <td>9</td>\n",
" <td>50</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>320 rows × 5 columns</p>\n",
"</div>"
],
"text/plain": [
" edad_madre \n",
" count mean std min max\n",
"edo_captura año_de_nacimiento_vivo \n",
"AGUASCALIENTES 2010 26973 25.569792 6.364083 10 52\n",
" 2011 28361 25.576073 6.362188 11 47\n",
" 2012 28840 25.510576 6.370119 12 47\n",
" 2013 28695 25.435302 6.343520 10 53\n",
" 2014 28718 25.434048 6.321484 12 56\n",
"... ... ... ... .. ..\n",
"ZACATECAS 2015 30099 25.619157 6.412798 12 57\n",
" 2016 30118 25.641975 6.402262 10 51\n",
" 2017 29955 25.663896 6.365496 13 53\n",
" 2018 28235 25.756933 6.407207 10 50\n",
" 2019 26800 25.978619 6.402249 9 50\n",
"\n",
"[320 rows x 5 columns]"
]
},
"execution_count": 37,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Edades de madres\n",
"consulta_total = df.groupby([\"edo_captura\", \"año_de_nacimiento_vivo\"]).agg(\n",
" {\n",
" \"edad_madre\": [\n",
" \"count\",\n",
" \"mean\",\n",
" \"std\",\n",
" \"min\",\n",
" \"max\",\n",
" ],\n",
" }\n",
")\n",
"consulta_total"
]
},
{
"cell_type": "code",
"execution_count": 38,
"id": "5290532e-d470-49b6-bd68-07eab1b86e4c",
"metadata": {},
"outputs": [],
"source": [
"consulta = consulta_total.join(\n",
" consulta_trisomias, rsuffix=\"_trisomias\", lsuffix=\"_general\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 39,
"id": "7a171ccc-139d-4fd3-b438-0475dd43e27b",
"metadata": {},
"outputs": [],
"source": [
"consulta[\"porcentaje\"] = (\n",
" consulta[(\"edad_madre_trisomias\", \"count\")]\n",
" / consulta[(\"edad_madre_general\", \"count\")]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 40,
"id": "2d932831-b2ce-46e4-a531-edd08d4d5ecb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre_general</th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre_trisomias</th>\n",
" <th>porcentaje</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>edo_captura</th>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">AGUASCALIENTES</th>\n",
" <th>2010</th>\n",
" <td>26973</td>\n",
" <td>25.569792</td>\n",
" <td>6.364083</td>\n",
" <td>10</td>\n",
" <td>52</td>\n",
" <td>29</td>\n",
" <td>30.586207</td>\n",
" <td>9.037688</td>\n",
" <td>16</td>\n",
" <td>45</td>\n",
" <td>0.001075</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>28361</td>\n",
" <td>25.576073</td>\n",
" <td>6.362188</td>\n",
" <td>11</td>\n",
" <td>47</td>\n",
" <td>34</td>\n",
" <td>33.823529</td>\n",
" <td>6.815626</td>\n",
" <td>17</td>\n",
" <td>45</td>\n",
" <td>0.001199</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>28840</td>\n",
" <td>25.510576</td>\n",
" <td>6.370119</td>\n",
" <td>12</td>\n",
" <td>47</td>\n",
" <td>30</td>\n",
" <td>29.600000</td>\n",
" <td>8.880820</td>\n",
" <td>17</td>\n",
" <td>43</td>\n",
" <td>0.001040</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>28695</td>\n",
" <td>25.435302</td>\n",
" <td>6.343520</td>\n",
" <td>10</td>\n",
" <td>53</td>\n",
" <td>30</td>\n",
" <td>30.400000</td>\n",
" <td>9.761289</td>\n",
" <td>15</td>\n",
" <td>43</td>\n",
" <td>0.001045</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>28718</td>\n",
" <td>25.434048</td>\n",
" <td>6.321484</td>\n",
" <td>12</td>\n",
" <td>56</td>\n",
" <td>23</td>\n",
" <td>28.956522</td>\n",
" <td>7.968565</td>\n",
" <td>19</td>\n",
" <td>41</td>\n",
" <td>0.000801</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th rowspan=\"5\" valign=\"top\">ZACATECAS</th>\n",
" <th>2015</th>\n",
" <td>30099</td>\n",
" <td>25.619157</td>\n",
" <td>6.412798</td>\n",
" <td>12</td>\n",
" <td>57</td>\n",
" <td>13</td>\n",
" <td>31.307692</td>\n",
" <td>7.951778</td>\n",
" <td>19</td>\n",
" <td>43</td>\n",
" <td>0.000432</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>30118</td>\n",
" <td>25.641975</td>\n",
" <td>6.402262</td>\n",
" <td>10</td>\n",
" <td>51</td>\n",
" <td>8</td>\n",
" <td>29.000000</td>\n",
" <td>9.971388</td>\n",
" <td>16</td>\n",
" <td>45</td>\n",
" <td>0.000266</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>29955</td>\n",
" <td>25.663896</td>\n",
" <td>6.365496</td>\n",
" <td>13</td>\n",
" <td>53</td>\n",
" <td>9</td>\n",
" <td>32.555556</td>\n",
" <td>10.013879</td>\n",
" <td>18</td>\n",
" <td>43</td>\n",
" <td>0.000300</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>28235</td>\n",
" <td>25.756933</td>\n",
" <td>6.407207</td>\n",
" <td>10</td>\n",
" <td>50</td>\n",
" <td>15</td>\n",
" <td>33.000000</td>\n",
" <td>8.026741</td>\n",
" <td>16</td>\n",
" <td>41</td>\n",
" <td>0.000531</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>26800</td>\n",
" <td>25.978619</td>\n",
" <td>6.402249</td>\n",
" <td>9</td>\n",
" <td>50</td>\n",
" <td>12</td>\n",
" <td>26.583333</td>\n",
" <td>5.468228</td>\n",
" <td>18</td>\n",
" <td>35</td>\n",
" <td>0.000448</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>320 rows × 11 columns</p>\n",
"</div>"
],
"text/plain": [
" edad_madre_general \\\n",
" count mean std \n",
"edo_captura año_de_nacimiento_vivo \n",
"AGUASCALIENTES 2010 26973 25.569792 6.364083 \n",
" 2011 28361 25.576073 6.362188 \n",
" 2012 28840 25.510576 6.370119 \n",
" 2013 28695 25.435302 6.343520 \n",
" 2014 28718 25.434048 6.321484 \n",
"... ... ... ... \n",
"ZACATECAS 2015 30099 25.619157 6.412798 \n",
" 2016 30118 25.641975 6.402262 \n",
" 2017 29955 25.663896 6.365496 \n",
" 2018 28235 25.756933 6.407207 \n",
" 2019 26800 25.978619 6.402249 \n",
"\n",
" edad_madre_trisomias \\\n",
" min max count mean \n",
"edo_captura año_de_nacimiento_vivo \n",
"AGUASCALIENTES 2010 10 52 29 30.586207 \n",
" 2011 11 47 34 33.823529 \n",
" 2012 12 47 30 29.600000 \n",
" 2013 10 53 30 30.400000 \n",
" 2014 12 56 23 28.956522 \n",
"... .. .. ... ... \n",
"ZACATECAS 2015 12 57 13 31.307692 \n",
" 2016 10 51 8 29.000000 \n",
" 2017 13 53 9 32.555556 \n",
" 2018 10 50 15 33.000000 \n",
" 2019 9 50 12 26.583333 \n",
"\n",
" porcentaje \n",
" std min max \n",
"edo_captura año_de_nacimiento_vivo \n",
"AGUASCALIENTES 2010 9.037688 16 45 0.001075 \n",
" 2011 6.815626 17 45 0.001199 \n",
" 2012 8.880820 17 43 0.001040 \n",
" 2013 9.761289 15 43 0.001045 \n",
" 2014 7.968565 19 41 0.000801 \n",
"... ... .. .. ... \n",
"ZACATECAS 2015 7.951778 19 43 0.000432 \n",
" 2016 9.971388 16 45 0.000266 \n",
" 2017 10.013879 18 43 0.000300 \n",
" 2018 8.026741 16 41 0.000531 \n",
" 2019 5.468228 18 35 0.000448 \n",
"\n",
"[320 rows x 11 columns]"
]
},
"execution_count": 40,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"consulta"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "5f5a6d5e-7bb9-4ee6-aaa5-e2318ceb927a",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"id": "03e58d58-0616-45ff-90dd-37c1182d1d4a",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,154 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "47114285-22af-469e-81f3-560261036208",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "73c5d186-84f4-4a8e-a572-566ab4936bd7",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_93262/3018518299.py:3: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" pd.read_csv(\"2010-2016.csv\"),\n"
]
}
],
"source": [
"df = pd.concat(\n",
" [\n",
" pd.read_csv(\"2010-2016.csv\"),\n",
" pd.read_csv(\"2017-2019.csv\"),\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "194a6cbd-0bd7-4c24-8eb4-f895457ecfed",
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"2010-2019.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3cfe3c4f-91ea-41e5-a0b9-557183080871",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 20918084 entries, 0 to 5873376\n",
"Data columns (total 32 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 Unnamed: 0 int64 \n",
" 1 edo_captura object\n",
" 2 edo_nac_madre object\n",
" 3 fecha_nac_madre object\n",
" 4 edad_madre int64 \n",
" 5 estado_conyugal object\n",
" 6 entidad_residencia_madre object\n",
" 7 numero_embarazos int64 \n",
" 8 hijos_nacidos_muertos int64 \n",
" 9 hijos_nacidos_vivos int64 \n",
" 10 hijos_sobrevivientes int64 \n",
" 11 el_hijo_anterior_nacio object\n",
" 12 vive_aun_hijo_anterior object\n",
" 13 orden_nacimiento int64 \n",
" 14 recibio_atencion_prenatal object\n",
" 15 trimestre_recibio_primera_consulta object\n",
" 16 total_consultas_recibidas int64 \n",
" 17 madre_sobrevivio_al_parto object\n",
" 18 escolaridad_madre object\n",
" 19 ocupacion_habitual_madre object\n",
" 20 trabaja_actualmente object\n",
" 21 fecha_nacimiento_nac_vivo object\n",
" 22 hora_nacimiento_nac_vivo object\n",
" 23 sexo_nac_vivo object\n",
" 24 semanas_gestacion_nac_vivo int64 \n",
" 25 talla_nac_vivo int64 \n",
" 26 peso_nac_vivo int64 \n",
" 27 valoracion_apgar_nac_vivo int64 \n",
" 28 valoracion_silverman_nac_vivo int64 \n",
" 29 producto_de_un_embarazo object\n",
" 30 codigo_anomalia object\n",
" 31 entidad_certifico object\n",
"dtypes: int64(13), object(19)\n",
"memory usage: 5.1+ GB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f302574e-65d3-4b4e-9fad-c4a93b1ebba7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"16777232 80891859 -rw-r--r-- 1 miguel.salgado staff 0 4854454496 \"Feb 25 01:57:13 2024\" \"Feb 25 01:59:10 2024\" \"Feb 25 01:59:10 2024\" \"Feb 25 01:57:10 2024\" 4096 9504688 0 2010-2019.csv\n"
]
}
],
"source": [
"! stat 2010-2019.csv"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f6673b8-7e8d-42fc-af10-4302cee2b37d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,214 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f9c836a8-aa45-4b3f-8392-660ffdf0daf9",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "faf0690b-1462-4d93-b66d-3b3e017fcc88",
"metadata": {},
"outputs": [],
"source": [
"@cache\n",
"def get_dataset_for(year):\n",
" return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\", dtype=object)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4436b11a-5e07-4b5f-9aa8-9f60b9328e70",
"metadata": {},
"outputs": [],
"source": [
"columns_selected = [\n",
" \"edo_captura\",\n",
" \"edo_nac_madre\",\n",
" \"fecha_nac_madre\",\n",
" \"edad_madre\",\n",
" \"estado_conyugal\",\n",
" \"entidad_residencia_madre\",\n",
" \"numero_embarazos\",\n",
" \"hijos_nacidos_muertos\",\n",
" \"hijos_nacidos_vivos\",\n",
" \"hijos_sobrevivientes\",\n",
" \"el_hijo_anterior_nacio\",\n",
" \"vive_aun_hijo_anterior\",\n",
" \"orden_nacimiento\",\n",
" \"recibio_atencion_prenatal\",\n",
" \"trimestre_recibio_primera_consulta\",\n",
" \"total_consultas_recibidas\",\n",
" \"madre_sobrevivio_al_parto\",\n",
" \"escolaridad_madre\",\n",
" \"ocupacion_habitual_madre\",\n",
" \"trabaja_actualmente\",\n",
" \"fecha_nacimiento_nac_vivo\",\n",
" \"hora_nacimiento_nac_vivo\",\n",
" \"sexo_nac_vivo\",\n",
" \"semanas_gestacion_nac_vivo\",\n",
" \"talla_nac_vivo\",\n",
" \"peso_nac_vivo\",\n",
" \"valoracion_apgar_nac_vivo\",\n",
" \"valoracion_silverman_nac_vivo\",\n",
" \"producto_de_un_embarazo\",\n",
" \"codigo_anomalia\",\n",
" \"anomalia_congenita_nac_vivo\",\n",
" \"lugar_de_nacimiento\",\n",
" \"entidad_certifico\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "71959de6-9879-4471-a331-99a054b7f5af",
"metadata": {},
"outputs": [],
"source": [
"df = pd.concat([get_dataset_for(year)[columns_selected] for year in range(2010, 2017)])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9147e139-6a91-4660-81e2-db49ec151ff0",
"metadata": {},
"outputs": [],
"source": [
"def _ano_nacimiento_vivo_func(str_date):\n",
" try:\n",
" return str_date.split(\"/\")[-1]\n",
" except:\n",
" return \"\"\n",
"\n",
"\n",
"df[\"año_de_nacimiento_vivo\"] = df[\"fecha_nacimiento_nac_vivo\"].apply(\n",
" _ano_nacimiento_vivo_func\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f71063d3-5c78-426f-a484-80fcf543051f",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"columnas_finales = [\n",
" \"edo_captura\",\n",
" \"edo_nac_madre\",\n",
" \"fecha_nac_madre\",\n",
" \"edad_madre\",\n",
" \"estado_conyugal\",\n",
" \"entidad_residencia_madre\",\n",
" \"numero_embarazos\",\n",
" \"hijos_nacidos_muertos\",\n",
" \"hijos_nacidos_vivos\",\n",
" \"hijos_sobrevivientes\",\n",
" \"el_hijo_anterior_nacio\",\n",
" \"vive_aun_hijo_anterior\",\n",
" \"orden_nacimiento\",\n",
" \"recibio_atencion_prenatal\",\n",
" \"trimestre_recibio_primera_consulta\",\n",
" \"total_consultas_recibidas\",\n",
" \"madre_sobrevivio_al_parto\",\n",
" \"escolaridad_madre\",\n",
" \"ocupacion_habitual_madre\",\n",
" \"trabaja_actualmente\",\n",
" \"fecha_nacimiento_nac_vivo\",\n",
" \"hora_nacimiento_nac_vivo\",\n",
" \"sexo_nac_vivo\",\n",
" \"semanas_gestacion_nac_vivo\",\n",
" \"talla_nac_vivo\",\n",
" \"peso_nac_vivo\",\n",
" \"valoracion_apgar_nac_vivo\",\n",
" \"valoracion_silverman_nac_vivo\",\n",
" \"producto_de_un_embarazo\",\n",
" \"codigo_anomalia\",\n",
" \"entidad_certifico\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "937d0447-e6e6-477d-a43b-471302229fca",
"metadata": {},
"outputs": [],
"source": [
"df[columnas_finales].to_csv(\"2010-2016.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfe420d2-b0f3-48ce-b30d-06f8d901dbd8",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 12,
"id": "69940ce2-da7a-4752-9d0d-c6046eb03992",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20918084"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"5873377 + 15044707"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3db8468-11e0-4758-a9b1-f037e633b729",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}

View File

@ -0,0 +1,939 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "bcb2eb03-8273-46dd-ba20-f1817691fe42",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "5b51e9a7-1dbd-4b86-8aad-109a2f4095cd",
"metadata": {},
"outputs": [],
"source": [
"columns_selected = [\n",
" \"edo_captura\",\n",
" \"edo_nac_madre\",\n",
" \"fecha_nac_madre\",\n",
" \"edad_madre\",\n",
" \"estado_conyugal\",\n",
" \"entidad_residencia_madre\",\n",
" \"numero_embarazos\",\n",
" \"hijos_nacidos_muertos\",\n",
" \"hijos_nacidos_vivos\",\n",
" \"hijos_sobrevivientes\",\n",
" \"el_hijo_anterior_nacio\",\n",
" \"vive_aun_hijo_anterior\",\n",
" \"orden_nacimiento\",\n",
" \"recibio_atencion_prenatal\",\n",
" \"trimestre_recibio_primera_consulta\",\n",
" \"total_consultas_recibidas\",\n",
" \"madre_sobrevivio_al_parto\",\n",
" \"escolaridad_madre\",\n",
" \"ocupacion_habitual_madre\",\n",
" \"trabaja_actualmente\",\n",
" \"fecha_nacimiento_nac_vivo\",\n",
" \"hora_nacimiento_nac_vivo\",\n",
" \"sexo_nac_vivo\",\n",
" \"semanas_gestacion_nac_vivo\",\n",
" \"talla_nac_vivo\",\n",
" \"peso_nac_vivo\",\n",
" \"valoracion_apgar_nac_vivo\",\n",
" \"valoracion_silverman_nac_vivo\",\n",
" \"producto_de_un_embarazo\",\n",
" \"entidad_certifico\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "2848508d-264a-487b-951f-99ef8483e3b1",
"metadata": {},
"outputs": [],
"source": [
"df_original = pd.read_csv(\"~/Downloads/out2.csv\", dtype=object)"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "a6c8cc59-5461-4362-932f-0ddf8fbcb764",
"metadata": {},
"outputs": [],
"source": [
"# df_original.columns"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "f5641028-679f-473b-945d-5775d144dcf2",
"metadata": {},
"outputs": [],
"source": [
"df = df_original.copy()"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "b0362862-d25d-40a8-a4d7-95b1e337b955",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"{'entidad_residencia_madre.1', 'edo_captura.1', 'sexo_nacimiento_vivo.1', 'escolaridad_madre.1', 'edo_nac_madre.1', 'madre_sobrevivio_parto.1', 'recibio_atencion_prenatal.1', 'producto_de_un_embarazo.1', 'estado_conyugal'}\n"
]
}
],
"source": [
"df_cols = df.columns.to_list()\n",
"columnas_description = {x for x in df_cols if x[-2:] == \".1\"} | {\"estado_conyugal\"}\n",
"df_cols = [x for x in df_cols if x + \".1\" not in columnas_description]\n",
"print(columnas_description)\n",
"df = df[df_cols]"
]
},
{
"cell_type": "code",
"execution_count": 9,
"id": "62f5a056-048a-4d5c-a28b-704cfcc44692",
"metadata": {},
"outputs": [],
"source": [
"df = df.rename(columns={x: x[:-2] for x in columnas_description})"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "57fab793-97b6-4189-b6e0-578062763b68",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"Index(['Unnamed: 0', 'MPO_NACM', 'fecha_nac_madre', 'edad_madre', 'CON_INDM',\n",
" 'HABLA_INDM', 'CUAL_LENGM', 'estado_conyug', 'TIPOVIAL_RES',\n",
" 'TIPOASEN_RES', 'MPO_RES', 'LOC_RES', 'numeros_embarazos',\n",
" 'hijos_nacidos_muertos', 'hijos_nacidos_vivos', 'hijos_sobrevivientes',\n",
" 'hijo_anterior_nacio', 'VIVE_AUN', 'orden_nacimiento', 'TRIM_CONS',\n",
" 'total_consultas_recibidas', 'DERHAB', 'DERHAB2',\n",
" 'ocupacion_habitual_madre', 'CVEOCUPHAB', 'TRAB_ACT',\n",
" 'fecha_nacimiento_vivo', 'hora_nacimiento_vivo',\n",
" 'semanas_gestacion_vivo', 'talla_nacimiento_vivo', 'peso_nac_vivo',\n",
" 'valoracion_apgar_nac_vivo', 'valoracion_silverman_nac_vivo', 'BCG',\n",
" 'HEP_B', 'VIT_A', 'VIT_K', 'TAM_AUD', 'ACELRN', 'CVE_CIE', 'ACELRN2',\n",
" 'CVE_CIE2', 'PROCNAC', 'FORCEPS', 'ESPECIFIQUE', 'INST_NAC', 'UNIMED',\n",
" 'CLUES', 'ATENDIO', 'ATEN_OTRO', 'TIPOVIAL_NAC', 'TIPOASEN_NAC',\n",
" 'lugar_de_nacimiento', 'MPO_NAC', 'LOC_NAC', 'CERT_POR', 'OTROMEDICO',\n",
" 'UNIMED_33_1', 'CLUES_33_2', 'TIPOVIAL_CERT', 'TIPOASEN_CERT',\n",
" 'ENT_CERT', 'MPO_CERT', 'LOC_CERT', 'FECH_CERT', 'FECH_ALTA',\n",
" 'FECH_CAMB', 'IDCAPTURA', 'TIPO_FORMATO', 'edo_nac_madre',\n",
" 'edo_captura', 'el_hijo_anterior_nacio', 'entidad_certifico',\n",
" 'entidad_residencia_madre', 'escolaridad_madre', 'Descrip',\n",
" 'lugar_nacimiento', 'madre_sobrevivio_parto', 'producto_de_un_embarazo',\n",
" 'recibio_atencion_prenatal', 'sexo_nacimiento_vivo',\n",
" 'trabaja_actualmente', 'trimestre_recibio_primera_consulta',\n",
" 'vive_aun_hijo_anterior'],\n",
" dtype='object')"
]
},
"execution_count": 10,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df.columns"
]
},
{
"cell_type": "code",
"execution_count": 11,
"id": "3e3f76f5-2e5e-4ac6-a5c2-34cecdfa977b",
"metadata": {},
"outputs": [],
"source": [
"mapper = {\n",
" \"numeros_embarazos\": \"numero_embarazos\",\n",
" \"madre_sobrevivio_parto\": \"madre_sobrevivio_al_parto\",\n",
" \"fecha_nacimiento_vivo\": \"fecha_nacimiento_nac_vivo\",\n",
" \"hora_nacimiento_vivo\": \"hora_nacimiento_nac_vivo\",\n",
" \"sexo_nacimiento_vivo\": \"sexo_nac_vivo\",\n",
" \"semanas_gestacion_vivo\": \"semanas_gestacion_nac_vivo\",\n",
" \"talla_nacimiento_vivo\": \"talla_nac_vivo\",\n",
" \"Descrip\": \"estado_conyugal\",\n",
"}\n",
"df = df.rename(columns=mapper)"
]
},
{
"cell_type": "code",
"execution_count": 12,
"id": "14f1b915-aa58-42c0-99f9-901ad56509c0",
"metadata": {},
"outputs": [],
"source": [
"# df_original.sample(n=10)[[\"estado_conyugal\"]]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "0aa4b396-b901-441e-9f94-e6521e7146a9",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>edo_captura</th>\n",
" <th>edo_nac_madre</th>\n",
" <th>fecha_nac_madre</th>\n",
" <th>edad_madre</th>\n",
" <th>estado_conyugal</th>\n",
" <th>entidad_residencia_madre</th>\n",
" <th>numero_embarazos</th>\n",
" <th>hijos_nacidos_muertos</th>\n",
" <th>hijos_nacidos_vivos</th>\n",
" <th>hijos_sobrevivientes</th>\n",
" <th>el_hijo_anterior_nacio</th>\n",
" <th>vive_aun_hijo_anterior</th>\n",
" <th>orden_nacimiento</th>\n",
" <th>recibio_atencion_prenatal</th>\n",
" <th>trimestre_recibio_primera_consulta</th>\n",
" <th>total_consultas_recibidas</th>\n",
" <th>madre_sobrevivio_al_parto</th>\n",
" <th>escolaridad_madre</th>\n",
" <th>ocupacion_habitual_madre</th>\n",
" <th>trabaja_actualmente</th>\n",
" <th>fecha_nacimiento_nac_vivo</th>\n",
" <th>hora_nacimiento_nac_vivo</th>\n",
" <th>sexo_nac_vivo</th>\n",
" <th>semanas_gestacion_nac_vivo</th>\n",
" <th>talla_nac_vivo</th>\n",
" <th>peso_nac_vivo</th>\n",
" <th>valoracion_apgar_nac_vivo</th>\n",
" <th>valoracion_silverman_nac_vivo</th>\n",
" <th>producto_de_un_embarazo</th>\n",
" <th>entidad_certifico</th>\n",
" <th>CVE_CIE</th>\n",
" <th>CVE_CIE2</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>CHIAPAS</td>\n",
" <td>CHIAPAS</td>\n",
" <td>05/07/1980</td>\n",
" <td>36</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>CHIAPAS</td>\n",
" <td>9</td>\n",
" <td>1</td>\n",
" <td>8</td>\n",
" <td>8</td>\n",
" <td>VIVO</td>\n",
" <td>SI</td>\n",
" <td>9</td>\n",
" <td>SI</td>\n",
" <td>SEGUNDO</td>\n",
" <td>6</td>\n",
" <td>SI</td>\n",
" <td>NINGUNA</td>\n",
" <td>HOGAR</td>\n",
" <td>NaN</td>\n",
" <td>19/04/2017</td>\n",
" <td>02:00:00</td>\n",
" <td>MUJER</td>\n",
" <td>40</td>\n",
" <td>50</td>\n",
" <td>3100</td>\n",
" <td>8</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>CHIAPAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>CHIAPAS</td>\n",
" <td>CHIAPAS</td>\n",
" <td>17/05/1991</td>\n",
" <td>25</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>CHIAPAS</td>\n",
" <td>4</td>\n",
" <td>99</td>\n",
" <td>4</td>\n",
" <td>4</td>\n",
" <td>VIVO</td>\n",
" <td>SI</td>\n",
" <td>4</td>\n",
" <td>SI</td>\n",
" <td>SEGUNDO</td>\n",
" <td>4</td>\n",
" <td>SI</td>\n",
" <td>SECUNDARIA COMPLETA</td>\n",
" <td>HOGAR</td>\n",
" <td>NaN</td>\n",
" <td>11/01/2017</td>\n",
" <td>13:00:00</td>\n",
" <td>MUJER</td>\n",
" <td>39</td>\n",
" <td>50</td>\n",
" <td>3900</td>\n",
" <td>0</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>CHIAPAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>CHIAPAS</td>\n",
" <td>CHIAPAS</td>\n",
" <td>27/01/1982</td>\n",
" <td>35</td>\n",
" <td>CASADA</td>\n",
" <td>CHIAPAS</td>\n",
" <td>6</td>\n",
" <td>0</td>\n",
" <td>6</td>\n",
" <td>6</td>\n",
" <td>VIVO</td>\n",
" <td>SI</td>\n",
" <td>6</td>\n",
" <td>SI</td>\n",
" <td>PRIMERO</td>\n",
" <td>3</td>\n",
" <td>SI</td>\n",
" <td>PRIMARIA INCOMPLETA</td>\n",
" <td>AMA DE CASA</td>\n",
" <td>NaN</td>\n",
" <td>22/05/2017</td>\n",
" <td>12:23:00</td>\n",
" <td>MUJER</td>\n",
" <td>38</td>\n",
" <td>50</td>\n",
" <td>2960</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>CHIAPAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>CHIAPAS</td>\n",
" <td>CHIAPAS</td>\n",
" <td>16/11/1983</td>\n",
" <td>33</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>CHIAPAS</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>S.I.</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>SI</td>\n",
" <td>TERCERO</td>\n",
" <td>4</td>\n",
" <td>SI</td>\n",
" <td>PRIMARIA COMPLETA</td>\n",
" <td>AMA DE CASA</td>\n",
" <td>NaN</td>\n",
" <td>03/06/2017</td>\n",
" <td>10:35:00</td>\n",
" <td>MUJER</td>\n",
" <td>99</td>\n",
" <td>48</td>\n",
" <td>2900</td>\n",
" <td>99</td>\n",
" <td>99</td>\n",
" <td>ÚNICO</td>\n",
" <td>CHIAPAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>CHIAPAS</td>\n",
" <td>CHIAPAS</td>\n",
" <td>22/11/1982</td>\n",
" <td>34</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>CHIAPAS</td>\n",
" <td>3</td>\n",
" <td>99</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>VIVO</td>\n",
" <td>SI</td>\n",
" <td>3</td>\n",
" <td>SI</td>\n",
" <td>PRIMERO</td>\n",
" <td>5</td>\n",
" <td>SI</td>\n",
" <td>NINGUNA</td>\n",
" <td>AMA DE CASA</td>\n",
" <td>NaN</td>\n",
" <td>29/04/2017</td>\n",
" <td>23:12:00</td>\n",
" <td>MUJER</td>\n",
" <td>39</td>\n",
" <td>51</td>\n",
" <td>3050</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>CHIAPAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5873372</th>\n",
" <td>ZACATECAS</td>\n",
" <td>OTROS PAISES DE LATINOAMERICA</td>\n",
" <td>01/07/1987</td>\n",
" <td>31</td>\n",
" <td>CASADA</td>\n",
" <td>ZACATECAS</td>\n",
" <td>3</td>\n",
" <td>1</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>VIVO</td>\n",
" <td>SI</td>\n",
" <td>3</td>\n",
" <td>SI</td>\n",
" <td>PRIMERO</td>\n",
" <td>12</td>\n",
" <td>SI</td>\n",
" <td>PROFESIONAL</td>\n",
" <td>LIC. EN INFORMATICA</td>\n",
" <td>SI</td>\n",
" <td>23/05/2019</td>\n",
" <td>16:06:00</td>\n",
" <td>MUJER</td>\n",
" <td>39</td>\n",
" <td>49</td>\n",
" <td>2990</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>ZACATECAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5873373</th>\n",
" <td>ZACATECAS</td>\n",
" <td>ZACATECAS</td>\n",
" <td>17/07/1986</td>\n",
" <td>32</td>\n",
" <td>CASADA</td>\n",
" <td>ZACATECAS</td>\n",
" <td>3</td>\n",
" <td>0</td>\n",
" <td>3</td>\n",
" <td>3</td>\n",
" <td>VIVO</td>\n",
" <td>SI</td>\n",
" <td>3</td>\n",
" <td>SI</td>\n",
" <td>PRIMERO</td>\n",
" <td>5</td>\n",
" <td>SI</td>\n",
" <td>SECUNDARIA COMPLETA</td>\n",
" <td>HOGAR</td>\n",
" <td>NaN</td>\n",
" <td>24/05/2019</td>\n",
" <td>16:55:00</td>\n",
" <td>HOMBRE</td>\n",
" <td>37</td>\n",
" <td>50</td>\n",
" <td>3830</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>ZACATECAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5873374</th>\n",
" <td>ZACATECAS</td>\n",
" <td>ZACATECAS</td>\n",
" <td>16/01/2000</td>\n",
" <td>19</td>\n",
" <td>CASADA</td>\n",
" <td>ZACATECAS</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NO HA TENIDO OTROS HIJOS(AS)</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>SI</td>\n",
" <td>PRIMERO</td>\n",
" <td>5</td>\n",
" <td>SI</td>\n",
" <td>SECUNDARIA COMPLETA</td>\n",
" <td>HOGAR</td>\n",
" <td>NaN</td>\n",
" <td>24/05/2019</td>\n",
" <td>23:13:00</td>\n",
" <td>HOMBRE</td>\n",
" <td>42</td>\n",
" <td>56</td>\n",
" <td>4460</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>ZACATECAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5873375</th>\n",
" <td>ZACATECAS</td>\n",
" <td>ZACATECAS</td>\n",
" <td>26/06/2000</td>\n",
" <td>18</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>ZACATECAS</td>\n",
" <td>2</td>\n",
" <td>0</td>\n",
" <td>2</td>\n",
" <td>2</td>\n",
" <td>VIVO</td>\n",
" <td>SI</td>\n",
" <td>2</td>\n",
" <td>NO</td>\n",
" <td>NO RECIBIO</td>\n",
" <td>0</td>\n",
" <td>SI</td>\n",
" <td>BACHILLERATO O PREPARATORIA INCOMPLETA</td>\n",
" <td>HOGAR</td>\n",
" <td>NaN</td>\n",
" <td>25/05/2019</td>\n",
" <td>01:47:00</td>\n",
" <td>HOMBRE</td>\n",
" <td>39</td>\n",
" <td>49</td>\n",
" <td>3090</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>ZACATECAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" <tr>\n",
" <th>5873376</th>\n",
" <td>ZACATECAS</td>\n",
" <td>ZACATECAS</td>\n",
" <td>24/09/2002</td>\n",
" <td>16</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>SAN LUIS POTOSI</td>\n",
" <td>1</td>\n",
" <td>0</td>\n",
" <td>1</td>\n",
" <td>1</td>\n",
" <td>NO HA TENIDO OTROS HIJOS(AS)</td>\n",
" <td>NaN</td>\n",
" <td>1</td>\n",
" <td>SI</td>\n",
" <td>PRIMERO</td>\n",
" <td>9</td>\n",
" <td>SI</td>\n",
" <td>SECUNDARIA INCOMPLETA</td>\n",
" <td>HOGAR</td>\n",
" <td>NaN</td>\n",
" <td>25/05/2019</td>\n",
" <td>08:53:00</td>\n",
" <td>MUJER</td>\n",
" <td>40</td>\n",
" <td>54</td>\n",
" <td>3300</td>\n",
" <td>9</td>\n",
" <td>0</td>\n",
" <td>ÚNICO</td>\n",
" <td>ZACATECAS</td>\n",
" <td>0000</td>\n",
" <td>0000</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>5873377 rows × 32 columns</p>\n",
"</div>"
],
"text/plain": [
" edo_captura edo_nac_madre fecha_nac_madre edad_madre \\\n",
"0 CHIAPAS CHIAPAS 05/07/1980 36 \n",
"1 CHIAPAS CHIAPAS 17/05/1991 25 \n",
"2 CHIAPAS CHIAPAS 27/01/1982 35 \n",
"3 CHIAPAS CHIAPAS 16/11/1983 33 \n",
"4 CHIAPAS CHIAPAS 22/11/1982 34 \n",
"... ... ... ... ... \n",
"5873372 ZACATECAS OTROS PAISES DE LATINOAMERICA 01/07/1987 31 \n",
"5873373 ZACATECAS ZACATECAS 17/07/1986 32 \n",
"5873374 ZACATECAS ZACATECAS 16/01/2000 19 \n",
"5873375 ZACATECAS ZACATECAS 26/06/2000 18 \n",
"5873376 ZACATECAS ZACATECAS 24/09/2002 16 \n",
"\n",
" estado_conyugal entidad_residencia_madre numero_embarazos \\\n",
"0 UNIÓN LIBRE CHIAPAS 9 \n",
"1 UNIÓN LIBRE CHIAPAS 4 \n",
"2 CASADA CHIAPAS 6 \n",
"3 UNIÓN LIBRE CHIAPAS 1 \n",
"4 UNIÓN LIBRE CHIAPAS 3 \n",
"... ... ... ... \n",
"5873372 CASADA ZACATECAS 3 \n",
"5873373 CASADA ZACATECAS 3 \n",
"5873374 CASADA ZACATECAS 1 \n",
"5873375 UNIÓN LIBRE ZACATECAS 2 \n",
"5873376 UNIÓN LIBRE SAN LUIS POTOSI 1 \n",
"\n",
" hijos_nacidos_muertos hijos_nacidos_vivos hijos_sobrevivientes \\\n",
"0 1 8 8 \n",
"1 99 4 4 \n",
"2 0 6 6 \n",
"3 0 1 1 \n",
"4 99 3 3 \n",
"... ... ... ... \n",
"5873372 1 2 2 \n",
"5873373 0 3 3 \n",
"5873374 0 1 1 \n",
"5873375 0 2 2 \n",
"5873376 0 1 1 \n",
"\n",
" el_hijo_anterior_nacio vive_aun_hijo_anterior orden_nacimiento \\\n",
"0 VIVO SI 9 \n",
"1 VIVO SI 4 \n",
"2 VIVO SI 6 \n",
"3 S.I. NaN 1 \n",
"4 VIVO SI 3 \n",
"... ... ... ... \n",
"5873372 VIVO SI 3 \n",
"5873373 VIVO SI 3 \n",
"5873374 NO HA TENIDO OTROS HIJOS(AS) NaN 1 \n",
"5873375 VIVO SI 2 \n",
"5873376 NO HA TENIDO OTROS HIJOS(AS) NaN 1 \n",
"\n",
" recibio_atencion_prenatal trimestre_recibio_primera_consulta \\\n",
"0 SI SEGUNDO \n",
"1 SI SEGUNDO \n",
"2 SI PRIMERO \n",
"3 SI TERCERO \n",
"4 SI PRIMERO \n",
"... ... ... \n",
"5873372 SI PRIMERO \n",
"5873373 SI PRIMERO \n",
"5873374 SI PRIMERO \n",
"5873375 NO NO RECIBIO \n",
"5873376 SI PRIMERO \n",
"\n",
" total_consultas_recibidas madre_sobrevivio_al_parto \\\n",
"0 6 SI \n",
"1 4 SI \n",
"2 3 SI \n",
"3 4 SI \n",
"4 5 SI \n",
"... ... ... \n",
"5873372 12 SI \n",
"5873373 5 SI \n",
"5873374 5 SI \n",
"5873375 0 SI \n",
"5873376 9 SI \n",
"\n",
" escolaridad_madre ocupacion_habitual_madre \\\n",
"0 NINGUNA HOGAR \n",
"1 SECUNDARIA COMPLETA HOGAR \n",
"2 PRIMARIA INCOMPLETA AMA DE CASA \n",
"3 PRIMARIA COMPLETA AMA DE CASA \n",
"4 NINGUNA AMA DE CASA \n",
"... ... ... \n",
"5873372 PROFESIONAL LIC. EN INFORMATICA \n",
"5873373 SECUNDARIA COMPLETA HOGAR \n",
"5873374 SECUNDARIA COMPLETA HOGAR \n",
"5873375 BACHILLERATO O PREPARATORIA INCOMPLETA HOGAR \n",
"5873376 SECUNDARIA INCOMPLETA HOGAR \n",
"\n",
" trabaja_actualmente fecha_nacimiento_nac_vivo \\\n",
"0 NaN 19/04/2017 \n",
"1 NaN 11/01/2017 \n",
"2 NaN 22/05/2017 \n",
"3 NaN 03/06/2017 \n",
"4 NaN 29/04/2017 \n",
"... ... ... \n",
"5873372 SI 23/05/2019 \n",
"5873373 NaN 24/05/2019 \n",
"5873374 NaN 24/05/2019 \n",
"5873375 NaN 25/05/2019 \n",
"5873376 NaN 25/05/2019 \n",
"\n",
" hora_nacimiento_nac_vivo sexo_nac_vivo semanas_gestacion_nac_vivo \\\n",
"0 02:00:00 MUJER 40 \n",
"1 13:00:00 MUJER 39 \n",
"2 12:23:00 MUJER 38 \n",
"3 10:35:00 MUJER 99 \n",
"4 23:12:00 MUJER 39 \n",
"... ... ... ... \n",
"5873372 16:06:00 MUJER 39 \n",
"5873373 16:55:00 HOMBRE 37 \n",
"5873374 23:13:00 HOMBRE 42 \n",
"5873375 01:47:00 HOMBRE 39 \n",
"5873376 08:53:00 MUJER 40 \n",
"\n",
" talla_nac_vivo peso_nac_vivo valoracion_apgar_nac_vivo \\\n",
"0 50 3100 8 \n",
"1 50 3900 0 \n",
"2 50 2960 9 \n",
"3 48 2900 99 \n",
"4 51 3050 9 \n",
"... ... ... ... \n",
"5873372 49 2990 9 \n",
"5873373 50 3830 9 \n",
"5873374 56 4460 9 \n",
"5873375 49 3090 9 \n",
"5873376 54 3300 9 \n",
"\n",
" valoracion_silverman_nac_vivo producto_de_un_embarazo \\\n",
"0 0 ÚNICO \n",
"1 0 ÚNICO \n",
"2 0 ÚNICO \n",
"3 99 ÚNICO \n",
"4 0 ÚNICO \n",
"... ... ... \n",
"5873372 0 ÚNICO \n",
"5873373 0 ÚNICO \n",
"5873374 0 ÚNICO \n",
"5873375 0 ÚNICO \n",
"5873376 0 ÚNICO \n",
"\n",
" entidad_certifico CVE_CIE CVE_CIE2 \n",
"0 CHIAPAS 0000 0000 \n",
"1 CHIAPAS 0000 0000 \n",
"2 CHIAPAS 0000 0000 \n",
"3 CHIAPAS 0000 0000 \n",
"4 CHIAPAS 0000 0000 \n",
"... ... ... ... \n",
"5873372 ZACATECAS 0000 0000 \n",
"5873373 ZACATECAS 0000 0000 \n",
"5873374 ZACATECAS 0000 0000 \n",
"5873375 ZACATECAS 0000 0000 \n",
"5873376 ZACATECAS 0000 0000 \n",
"\n",
"[5873377 rows x 32 columns]"
]
},
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df[columns_selected + [\"CVE_CIE\", \"CVE_CIE2\"]]"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "7aaf8aac-3785-4704-b3e2-214a9234602b",
"metadata": {},
"outputs": [],
"source": [
"def _codigo_de_anomalia(x):\n",
" return \",\".join([str(x[\"CVE_CIE\"]), str(x[\"CVE_CIE2\"])])\n",
"\n",
"\n",
"df[\"codigo_anomalia\"] = df[[\"CVE_CIE\", \"CVE_CIE2\"]].apply(_codigo_de_anomalia, axis=1)"
]
},
{
"cell_type": "code",
"execution_count": 16,
"id": "f69b89af-239a-4270-818c-89f25f4b2f88",
"metadata": {},
"outputs": [],
"source": [
"df = df[columns_selected + [\"codigo_anomalia\"]]"
]
},
{
"cell_type": "code",
"execution_count": 17,
"id": "904449a1-3e85-4837-ae7c-8e1ee6f3dee9",
"metadata": {},
"outputs": [],
"source": [
"def _ano_nacimiento_vivo_func(str_date):\n",
" try:\n",
" return str_date.split(\"/\")[-1]\n",
" except:\n",
" return \"\"\n",
"\n",
"\n",
"df[\"año_de_nacimiento_vivo\"] = df[\"fecha_nacimiento_nac_vivo\"].apply(\n",
" _ano_nacimiento_vivo_func\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "f08a323d-4570-404f-8d11-7288e3973ee7",
"metadata": {},
"outputs": [],
"source": [
"columnas_finales = [\n",
" \"edo_captura\",\n",
" \"edo_nac_madre\",\n",
" \"fecha_nac_madre\",\n",
" \"edad_madre\",\n",
" \"estado_conyugal\",\n",
" \"entidad_residencia_madre\",\n",
" \"numero_embarazos\",\n",
" \"hijos_nacidos_muertos\",\n",
" \"hijos_nacidos_vivos\",\n",
" \"hijos_sobrevivientes\",\n",
" \"el_hijo_anterior_nacio\",\n",
" \"vive_aun_hijo_anterior\",\n",
" \"orden_nacimiento\",\n",
" \"recibio_atencion_prenatal\",\n",
" \"trimestre_recibio_primera_consulta\",\n",
" \"total_consultas_recibidas\",\n",
" \"madre_sobrevivio_al_parto\",\n",
" \"escolaridad_madre\",\n",
" \"ocupacion_habitual_madre\",\n",
" \"trabaja_actualmente\",\n",
" \"fecha_nacimiento_nac_vivo\",\n",
" \"hora_nacimiento_nac_vivo\",\n",
" \"sexo_nac_vivo\",\n",
" \"semanas_gestacion_nac_vivo\",\n",
" \"talla_nac_vivo\",\n",
" \"peso_nac_vivo\",\n",
" \"valoracion_apgar_nac_vivo\",\n",
" \"valoracion_silverman_nac_vivo\",\n",
" \"producto_de_un_embarazo\",\n",
" \"codigo_anomalia\",\n",
" \"entidad_certifico\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "010aaf2a-4098-4609-b110-9761902d1118",
"metadata": {},
"outputs": [],
"source": [
"df[columnas_finales].to_csv(\"2017-2019.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "d61bb185-91fa-4872-a2f8-1638c1abc3c4",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}