unir-metodologia/estudio_edad_madres_por_año_general.ipynb

790 lines
24 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "043492dd-e09f-440f-ad35-e2e741860bba",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": 2,
"id": "558043e1-1724-4bf6-8acf-e85c18b0150e",
"metadata": {},
"outputs": [],
"source": [
"estados_mexicanos = {\n",
" \"AGUASCALIENTES\",\n",
" \"BAJA CALIFORNIA\",\n",
" \"BAJA CALIFORNIA SUR\",\n",
" \"CAMPECHE\",\n",
" \"CHIAPAS\",\n",
" \"CHIHUAHUA\",\n",
" \"COAHUILA DE ZARAGOZA\",\n",
" \"COLIMA\",\n",
" \"DISTRITO FEDERAL\",\n",
" \"DURANGO\",\n",
" \"GUANAJUATO\",\n",
" \"GUERRERO\",\n",
" \"HIDALGO\",\n",
" \"JALISCO\",\n",
" \"MEXICO\",\n",
" \"MICHOACAN DE OCAMPO\",\n",
" \"MORELOS\",\n",
" \"NAYARIT\",\n",
" \"NUEVO LEON\",\n",
" \"OAXACA\",\n",
" \"PUEBLA\",\n",
" \"QUERETARO DE ARTEAGA\",\n",
" \"QUINTANA ROO\",\n",
" \"SAN LUIS POTOSI\",\n",
" \"SINALOA\",\n",
" \"SONORA\",\n",
" \"TABASCO\",\n",
" \"TAMAULIPAS\",\n",
" \"TLAXCALA\",\n",
" \"VERACRUZ DE IGNACIO DE LA LLAVE\",\n",
" \"YUCATAN\",\n",
" \"ZACATECAS\",\n",
"}"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "6b047178-2902-4eb2-9a34-0b7d7beb277e",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_93495/3168623387.py:1: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" df = pd.read_csv(\"2010-2019.csv\")\n"
]
}
],
"source": [
"df = pd.read_csv(\"2010-2019.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "61675b16-391b-4821-8376-f92ec4b5b916",
"metadata": {},
"outputs": [],
"source": [
"def _ano_nacimiento_vivo_func(str_date):\n",
" try:\n",
" return str_date.split(\"/\")[-1]\n",
" except:\n",
" return \"\"\n",
"\n",
"\n",
"df[\"año_de_nacimiento_vivo\"] = df[\"fecha_nacimiento_nac_vivo\"].apply(\n",
" _ano_nacimiento_vivo_func\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 19,
"id": "87a086d4-bab8-43a8-a121-8aaf3554e672",
"metadata": {},
"outputs": [],
"source": [
"df = df[(5 < df[\"edad_madre\"]) & (df[\"edad_madre\"] < 90)]"
]
},
{
"cell_type": "code",
"execution_count": 20,
"id": "f8eff617-7273-435f-a09a-8db4ec005ee0",
"metadata": {},
"outputs": [],
"source": [
"df_trisomias = df[df[\"codigo_anomalia\"].apply(lambda x: \"Q9\" in str(x))]"
]
},
{
"cell_type": "code",
"execution_count": 21,
"id": "1ff41e12-b6cd-41db-bd1b-47c2aa21c45e",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2010</th>\n",
" <td>930</td>\n",
" <td>30.546237</td>\n",
" <td>8.244939</td>\n",
" <td>10</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>1051</td>\n",
" <td>31.010466</td>\n",
" <td>8.193777</td>\n",
" <td>12</td>\n",
" <td>49</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>961</td>\n",
" <td>30.462019</td>\n",
" <td>8.310565</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>1055</td>\n",
" <td>31.182938</td>\n",
" <td>8.247919</td>\n",
" <td>11</td>\n",
" <td>51</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>1031</td>\n",
" <td>31.018429</td>\n",
" <td>8.356304</td>\n",
" <td>13</td>\n",
" <td>50</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015</th>\n",
" <td>1016</td>\n",
" <td>31.500984</td>\n",
" <td>8.295052</td>\n",
" <td>14</td>\n",
" <td>52</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>1044</td>\n",
" <td>31.453065</td>\n",
" <td>8.147413</td>\n",
" <td>14</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>1043</td>\n",
" <td>31.410355</td>\n",
" <td>8.174581</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>1059</td>\n",
" <td>31.064212</td>\n",
" <td>8.173198</td>\n",
" <td>13</td>\n",
" <td>48</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>941</td>\n",
" <td>32.018066</td>\n",
" <td>8.195918</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" edad_madre \n",
" count mean std min max\n",
"año_de_nacimiento_vivo \n",
"2010 930 30.546237 8.244939 10 48\n",
"2011 1051 31.010466 8.193777 12 49\n",
"2012 961 30.462019 8.310565 13 47\n",
"2013 1055 31.182938 8.247919 11 51\n",
"2014 1031 31.018429 8.356304 13 50\n",
"2015 1016 31.500984 8.295052 14 52\n",
"2016 1044 31.453065 8.147413 14 47\n",
"2017 1043 31.410355 8.174581 13 47\n",
"2018 1059 31.064212 8.173198 13 48\n",
"2019 941 32.018066 8.195918 13 47"
]
},
"execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"consulta_trisomias = df_trisomias.groupby([\"año_de_nacimiento_vivo\"]).agg(\n",
" {\n",
" \"edad_madre\": [\n",
" \"count\",\n",
" \"mean\",\n",
" \"std\",\n",
" \"min\",\n",
" \"max\",\n",
" ],\n",
" }\n",
")\n",
"consulta_trisomias"
]
},
{
"cell_type": "code",
"execution_count": 22,
"id": "942da486-5c14-4d37-a775-009151c68f29",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" </tr>\n",
" <tr>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2010</th>\n",
" <td>2063533</td>\n",
" <td>25.253220</td>\n",
" <td>6.319567</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>2156751</td>\n",
" <td>25.234223</td>\n",
" <td>6.331894</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>2197327</td>\n",
" <td>25.195768</td>\n",
" <td>6.321840</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>2189257</td>\n",
" <td>25.198235</td>\n",
" <td>6.322081</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>2173773</td>\n",
" <td>25.276009</td>\n",
" <td>6.322130</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015</th>\n",
" <td>2143345</td>\n",
" <td>25.367835</td>\n",
" <td>6.296604</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>2079251</td>\n",
" <td>25.468008</td>\n",
" <td>6.292815</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>2037647</td>\n",
" <td>25.510821</td>\n",
" <td>6.305873</td>\n",
" <td>9</td>\n",
" <td>62</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>1940338</td>\n",
" <td>25.678051</td>\n",
" <td>6.328369</td>\n",
" <td>9</td>\n",
" <td>60</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>1867693</td>\n",
" <td>25.840630</td>\n",
" <td>6.342544</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" edad_madre \n",
" count mean std min max\n",
"año_de_nacimiento_vivo \n",
"2010 2063533 25.253220 6.319567 9 58\n",
"2011 2156751 25.234223 6.331894 9 58\n",
"2012 2197327 25.195768 6.321840 9 58\n",
"2013 2189257 25.198235 6.322081 9 59\n",
"2014 2173773 25.276009 6.322130 9 58\n",
"2015 2143345 25.367835 6.296604 9 59\n",
"2016 2079251 25.468008 6.292815 9 59\n",
"2017 2037647 25.510821 6.305873 9 62\n",
"2018 1940338 25.678051 6.328369 9 60\n",
"2019 1867693 25.840630 6.342544 9 58"
]
},
"execution_count": 22,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"# Edades de madres\n",
"consulta_total = df.groupby([\"año_de_nacimiento_vivo\"]).agg(\n",
" {\n",
" \"edad_madre\": [\n",
" \"count\",\n",
" \"mean\",\n",
" \"std\",\n",
" \"min\",\n",
" \"max\",\n",
" ],\n",
" }\n",
")\n",
"consulta_total"
]
},
{
"cell_type": "code",
"execution_count": 23,
"id": "5290532e-d470-49b6-bd68-07eab1b86e4c",
"metadata": {},
"outputs": [],
"source": [
"consulta = consulta_total.join(\n",
" consulta_trisomias, rsuffix=\"_trisomias\", lsuffix=\"_general\"\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 24,
"id": "7a171ccc-139d-4fd3-b438-0475dd43e27b",
"metadata": {},
"outputs": [],
"source": [
"consulta[\"porcentaje\"] = (\n",
" consulta[(\"edad_madre_trisomias\", \"count\")]\n",
" / consulta[(\"edad_madre_general\", \"count\")]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 25,
"id": "2d932831-b2ce-46e4-a531-edd08d4d5ecb",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead tr th {\n",
" text-align: left;\n",
" }\n",
"\n",
" .dataframe thead tr:last-of-type th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr>\n",
" <th></th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre_general</th>\n",
" <th colspan=\"5\" halign=\"left\">edad_madre_trisomias</th>\n",
" <th>porcentaje</th>\n",
" </tr>\n",
" <tr>\n",
" <th></th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th>count</th>\n",
" <th>mean</th>\n",
" <th>std</th>\n",
" <th>min</th>\n",
" <th>max</th>\n",
" <th></th>\n",
" </tr>\n",
" <tr>\n",
" <th>año_de_nacimiento_vivo</th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" <th></th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>2010</th>\n",
" <td>2063533</td>\n",
" <td>25.253220</td>\n",
" <td>6.319567</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>930</td>\n",
" <td>30.546237</td>\n",
" <td>8.244939</td>\n",
" <td>10</td>\n",
" <td>48</td>\n",
" <td>0.000451</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2011</th>\n",
" <td>2156751</td>\n",
" <td>25.234223</td>\n",
" <td>6.331894</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>1051</td>\n",
" <td>31.010466</td>\n",
" <td>8.193777</td>\n",
" <td>12</td>\n",
" <td>49</td>\n",
" <td>0.000487</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2012</th>\n",
" <td>2197327</td>\n",
" <td>25.195768</td>\n",
" <td>6.321840</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>961</td>\n",
" <td>30.462019</td>\n",
" <td>8.310565</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" <td>0.000437</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2013</th>\n",
" <td>2189257</td>\n",
" <td>25.198235</td>\n",
" <td>6.322081</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" <td>1055</td>\n",
" <td>31.182938</td>\n",
" <td>8.247919</td>\n",
" <td>11</td>\n",
" <td>51</td>\n",
" <td>0.000482</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2014</th>\n",
" <td>2173773</td>\n",
" <td>25.276009</td>\n",
" <td>6.322130</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>1031</td>\n",
" <td>31.018429</td>\n",
" <td>8.356304</td>\n",
" <td>13</td>\n",
" <td>50</td>\n",
" <td>0.000474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2015</th>\n",
" <td>2143345</td>\n",
" <td>25.367835</td>\n",
" <td>6.296604</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" <td>1016</td>\n",
" <td>31.500984</td>\n",
" <td>8.295052</td>\n",
" <td>14</td>\n",
" <td>52</td>\n",
" <td>0.000474</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2016</th>\n",
" <td>2079251</td>\n",
" <td>25.468008</td>\n",
" <td>6.292815</td>\n",
" <td>9</td>\n",
" <td>59</td>\n",
" <td>1044</td>\n",
" <td>31.453065</td>\n",
" <td>8.147413</td>\n",
" <td>14</td>\n",
" <td>47</td>\n",
" <td>0.000502</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2017</th>\n",
" <td>2037647</td>\n",
" <td>25.510821</td>\n",
" <td>6.305873</td>\n",
" <td>9</td>\n",
" <td>62</td>\n",
" <td>1043</td>\n",
" <td>31.410355</td>\n",
" <td>8.174581</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" <td>0.000512</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2018</th>\n",
" <td>1940338</td>\n",
" <td>25.678051</td>\n",
" <td>6.328369</td>\n",
" <td>9</td>\n",
" <td>60</td>\n",
" <td>1059</td>\n",
" <td>31.064212</td>\n",
" <td>8.173198</td>\n",
" <td>13</td>\n",
" <td>48</td>\n",
" <td>0.000546</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2019</th>\n",
" <td>1867693</td>\n",
" <td>25.840630</td>\n",
" <td>6.342544</td>\n",
" <td>9</td>\n",
" <td>58</td>\n",
" <td>941</td>\n",
" <td>32.018066</td>\n",
" <td>8.195918</td>\n",
" <td>13</td>\n",
" <td>47</td>\n",
" <td>0.000504</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"</div>"
],
"text/plain": [
" edad_madre_general \\\n",
" count mean std min max \n",
"año_de_nacimiento_vivo \n",
"2010 2063533 25.253220 6.319567 9 58 \n",
"2011 2156751 25.234223 6.331894 9 58 \n",
"2012 2197327 25.195768 6.321840 9 58 \n",
"2013 2189257 25.198235 6.322081 9 59 \n",
"2014 2173773 25.276009 6.322130 9 58 \n",
"2015 2143345 25.367835 6.296604 9 59 \n",
"2016 2079251 25.468008 6.292815 9 59 \n",
"2017 2037647 25.510821 6.305873 9 62 \n",
"2018 1940338 25.678051 6.328369 9 60 \n",
"2019 1867693 25.840630 6.342544 9 58 \n",
"\n",
" edad_madre_trisomias \\\n",
" count mean std min max \n",
"año_de_nacimiento_vivo \n",
"2010 930 30.546237 8.244939 10 48 \n",
"2011 1051 31.010466 8.193777 12 49 \n",
"2012 961 30.462019 8.310565 13 47 \n",
"2013 1055 31.182938 8.247919 11 51 \n",
"2014 1031 31.018429 8.356304 13 50 \n",
"2015 1016 31.500984 8.295052 14 52 \n",
"2016 1044 31.453065 8.147413 14 47 \n",
"2017 1043 31.410355 8.174581 13 47 \n",
"2018 1059 31.064212 8.173198 13 48 \n",
"2019 941 32.018066 8.195918 13 47 \n",
"\n",
" porcentaje \n",
" \n",
"año_de_nacimiento_vivo \n",
"2010 0.000451 \n",
"2011 0.000487 \n",
"2012 0.000437 \n",
"2013 0.000482 \n",
"2014 0.000474 \n",
"2015 0.000474 \n",
"2016 0.000502 \n",
"2017 0.000512 \n",
"2018 0.000546 \n",
"2019 0.000504 "
]
},
"execution_count": 25,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"consulta"
]
},
{
"cell_type": "markdown",
"id": "a3a83bc5-b01f-4bee-a17a-6ab5ba8458ad",
"metadata": {},
"source": [
"# Pendiente\n",
"\n",
"Generar gráfica de cajas con edades de las madres con hijos de trisomias.\n",
"\n",
"https://stackoverflow.com/a/66565512"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "46753c90-fca5-4b92-9f64-165460b03bd5",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}