{ "cells": [ { "cell_type": "code", "execution_count": 1, "id": "043492dd-e09f-440f-ad35-e2e741860bba", "metadata": {}, "outputs": [], "source": [ "from functools import cache\n", "import pandas as pd\n", "\n", "pd.set_option(\"display.max_columns\", None)" ] }, { "cell_type": "code", "execution_count": 2, "id": "558043e1-1724-4bf6-8acf-e85c18b0150e", "metadata": {}, "outputs": [], "source": [ "estados_mexicanos = {\n", " \"AGUASCALIENTES\",\n", " \"BAJA CALIFORNIA\",\n", " \"BAJA CALIFORNIA SUR\",\n", " \"CAMPECHE\",\n", " \"CHIAPAS\",\n", " \"CHIHUAHUA\",\n", " \"COAHUILA DE ZARAGOZA\",\n", " \"COLIMA\",\n", " \"DISTRITO FEDERAL\",\n", " \"DURANGO\",\n", " \"GUANAJUATO\",\n", " \"GUERRERO\",\n", " \"HIDALGO\",\n", " \"JALISCO\",\n", " \"MEXICO\",\n", " \"MICHOACAN DE OCAMPO\",\n", " \"MORELOS\",\n", " \"NAYARIT\",\n", " \"NUEVO LEON\",\n", " \"OAXACA\",\n", " \"PUEBLA\",\n", " \"QUERETARO DE ARTEAGA\",\n", " \"QUINTANA ROO\",\n", " \"SAN LUIS POTOSI\",\n", " \"SINALOA\",\n", " \"SONORA\",\n", " \"TABASCO\",\n", " \"TAMAULIPAS\",\n", " \"TLAXCALA\",\n", " \"VERACRUZ DE IGNACIO DE LA LLAVE\",\n", " \"YUCATAN\",\n", " \"ZACATECAS\",\n", "}" ] }, { "cell_type": "code", "execution_count": 3, "id": "6b047178-2902-4eb2-9a34-0b7d7beb277e", "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_93495/3168623387.py:1: DtypeWarning: Columns (21) have mixed types. Specify dtype option on import or set low_memory=False.\n", " df = pd.read_csv(\"2010-2019.csv\")\n" ] } ], "source": [ "df = pd.read_csv(\"2010-2019.csv\")" ] }, { "cell_type": "code", "execution_count": 4, "id": "61675b16-391b-4821-8376-f92ec4b5b916", "metadata": {}, "outputs": [], "source": [ "def _ano_nacimiento_vivo_func(str_date):\n", " try:\n", " return str_date.split(\"/\")[-1]\n", " except:\n", " return \"\"\n", "\n", "\n", "df[\"año_de_nacimiento_vivo\"] = df[\"fecha_nacimiento_nac_vivo\"].apply(\n", " _ano_nacimiento_vivo_func\n", ")" ] }, { "cell_type": "code", "execution_count": 19, "id": "87a086d4-bab8-43a8-a121-8aaf3554e672", "metadata": {}, "outputs": [], "source": [ "df = df[(5 < df[\"edad_madre\"]) & (df[\"edad_madre\"] < 90)]" ] }, { "cell_type": "code", "execution_count": 20, "id": "f8eff617-7273-435f-a09a-8db4ec005ee0", "metadata": {}, "outputs": [], "source": [ "df_trisomias = df[df[\"codigo_anomalia\"].apply(lambda x: \"Q9\" in str(x))]" ] }, { "cell_type": "code", "execution_count": 21, "id": "1ff41e12-b6cd-41db-bd1b-47c2aa21c45e", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
edad_madre
countmeanstdminmax
año_de_nacimiento_vivo
201093030.5462378.2449391048
2011105131.0104668.1937771249
201296130.4620198.3105651347
2013105531.1829388.2479191151
2014103131.0184298.3563041350
2015101631.5009848.2950521452
2016104431.4530658.1474131447
2017104331.4103558.1745811347
2018105931.0642128.1731981348
201994132.0180668.1959181347
\n", "
" ], "text/plain": [ " edad_madre \n", " count mean std min max\n", "año_de_nacimiento_vivo \n", "2010 930 30.546237 8.244939 10 48\n", "2011 1051 31.010466 8.193777 12 49\n", "2012 961 30.462019 8.310565 13 47\n", "2013 1055 31.182938 8.247919 11 51\n", "2014 1031 31.018429 8.356304 13 50\n", "2015 1016 31.500984 8.295052 14 52\n", "2016 1044 31.453065 8.147413 14 47\n", "2017 1043 31.410355 8.174581 13 47\n", "2018 1059 31.064212 8.173198 13 48\n", "2019 941 32.018066 8.195918 13 47" ] }, "execution_count": 21, "metadata": {}, "output_type": "execute_result" } ], "source": [ "consulta_trisomias = df_trisomias.groupby([\"año_de_nacimiento_vivo\"]).agg(\n", " {\n", " \"edad_madre\": [\n", " \"count\",\n", " \"mean\",\n", " \"std\",\n", " \"min\",\n", " \"max\",\n", " ],\n", " }\n", ")\n", "consulta_trisomias" ] }, { "cell_type": "code", "execution_count": 22, "id": "942da486-5c14-4d37-a775-009151c68f29", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
edad_madre
countmeanstdminmax
año_de_nacimiento_vivo
2010206353325.2532206.319567958
2011215675125.2342236.331894958
2012219732725.1957686.321840958
2013218925725.1982356.322081959
2014217377325.2760096.322130958
2015214334525.3678356.296604959
2016207925125.4680086.292815959
2017203764725.5108216.305873962
2018194033825.6780516.328369960
2019186769325.8406306.342544958
\n", "
" ], "text/plain": [ " edad_madre \n", " count mean std min max\n", "año_de_nacimiento_vivo \n", "2010 2063533 25.253220 6.319567 9 58\n", "2011 2156751 25.234223 6.331894 9 58\n", "2012 2197327 25.195768 6.321840 9 58\n", "2013 2189257 25.198235 6.322081 9 59\n", "2014 2173773 25.276009 6.322130 9 58\n", "2015 2143345 25.367835 6.296604 9 59\n", "2016 2079251 25.468008 6.292815 9 59\n", "2017 2037647 25.510821 6.305873 9 62\n", "2018 1940338 25.678051 6.328369 9 60\n", "2019 1867693 25.840630 6.342544 9 58" ] }, "execution_count": 22, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Edades de madres\n", "consulta_total = df.groupby([\"año_de_nacimiento_vivo\"]).agg(\n", " {\n", " \"edad_madre\": [\n", " \"count\",\n", " \"mean\",\n", " \"std\",\n", " \"min\",\n", " \"max\",\n", " ],\n", " }\n", ")\n", "consulta_total" ] }, { "cell_type": "code", "execution_count": 23, "id": "5290532e-d470-49b6-bd68-07eab1b86e4c", "metadata": {}, "outputs": [], "source": [ "consulta = consulta_total.join(\n", " consulta_trisomias, rsuffix=\"_trisomias\", lsuffix=\"_general\"\n", ")" ] }, { "cell_type": "code", "execution_count": 24, "id": "7a171ccc-139d-4fd3-b438-0475dd43e27b", "metadata": {}, "outputs": [], "source": [ "consulta[\"porcentaje\"] = (\n", " consulta[(\"edad_madre_trisomias\", \"count\")]\n", " / consulta[(\"edad_madre_general\", \"count\")]\n", ")" ] }, { "cell_type": "code", "execution_count": 25, "id": "2d932831-b2ce-46e4-a531-edd08d4d5ecb", "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
edad_madre_generaledad_madre_trisomiasporcentaje
countmeanstdminmaxcountmeanstdminmax
año_de_nacimiento_vivo
2010206353325.2532206.31956795893030.5462378.24493910480.000451
2011215675125.2342236.331894958105131.0104668.19377712490.000487
2012219732725.1957686.32184095896130.4620198.31056513470.000437
2013218925725.1982356.322081959105531.1829388.24791911510.000482
2014217377325.2760096.322130958103131.0184298.35630413500.000474
2015214334525.3678356.296604959101631.5009848.29505214520.000474
2016207925125.4680086.292815959104431.4530658.14741314470.000502
2017203764725.5108216.305873962104331.4103558.17458113470.000512
2018194033825.6780516.328369960105931.0642128.17319813480.000546
2019186769325.8406306.34254495894132.0180668.19591813470.000504
\n", "
" ], "text/plain": [ " edad_madre_general \\\n", " count mean std min max \n", "año_de_nacimiento_vivo \n", "2010 2063533 25.253220 6.319567 9 58 \n", "2011 2156751 25.234223 6.331894 9 58 \n", "2012 2197327 25.195768 6.321840 9 58 \n", "2013 2189257 25.198235 6.322081 9 59 \n", "2014 2173773 25.276009 6.322130 9 58 \n", "2015 2143345 25.367835 6.296604 9 59 \n", "2016 2079251 25.468008 6.292815 9 59 \n", "2017 2037647 25.510821 6.305873 9 62 \n", "2018 1940338 25.678051 6.328369 9 60 \n", "2019 1867693 25.840630 6.342544 9 58 \n", "\n", " edad_madre_trisomias \\\n", " count mean std min max \n", "año_de_nacimiento_vivo \n", "2010 930 30.546237 8.244939 10 48 \n", "2011 1051 31.010466 8.193777 12 49 \n", "2012 961 30.462019 8.310565 13 47 \n", "2013 1055 31.182938 8.247919 11 51 \n", "2014 1031 31.018429 8.356304 13 50 \n", "2015 1016 31.500984 8.295052 14 52 \n", "2016 1044 31.453065 8.147413 14 47 \n", "2017 1043 31.410355 8.174581 13 47 \n", "2018 1059 31.064212 8.173198 13 48 \n", "2019 941 32.018066 8.195918 13 47 \n", "\n", " porcentaje \n", " \n", "año_de_nacimiento_vivo \n", "2010 0.000451 \n", "2011 0.000487 \n", "2012 0.000437 \n", "2013 0.000482 \n", "2014 0.000474 \n", "2015 0.000474 \n", "2016 0.000502 \n", "2017 0.000512 \n", "2018 0.000546 \n", "2019 0.000504 " ] }, "execution_count": 25, "metadata": {}, "output_type": "execute_result" } ], "source": [ "consulta" ] }, { "cell_type": "markdown", "id": "a3a83bc5-b01f-4bee-a17a-6ab5ba8458ad", "metadata": {}, "source": [ "# Pendiente\n", "\n", "Generar gráfica de cajas con edades de las madres con hijos de trisomias.\n", "\n", "https://stackoverflow.com/a/66565512" ] }, { "cell_type": "code", "execution_count": null, "id": "46753c90-fca5-4b92-9f64-165460b03bd5", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }