{ "cells": [ { "cell_type": "code", "execution_count": 12, "id": "16c86730-6982-4338-a98c-49f0c5b603ba", "metadata": {}, "outputs": [], "source": [ "from functools import cache\n", "import pandas as pd\n", "\n", "pd.set_option(\"display.max_columns\", None)" ] }, { "cell_type": "code", "execution_count": 13, "id": "754c0d91-a10d-4e84-9dc2-02b01ec1ac57", "metadata": {}, "outputs": [], "source": [ "@cache\n", "def get_dataset_for(year):\n", " return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\", dtype=object)" ] }, { "cell_type": "code", "execution_count": null, "id": "d03c787f-f963-4eff-8294-63bc7b073d04", "metadata": {}, "outputs": [], "source": [ "_df = get_dataset_for(2013)" ] }, { "cell_type": "code", "execution_count": null, "id": "9b688263-b6ea-4cec-a1e3-8b7a9935d909", "metadata": { "scrolled": true }, "outputs": [], "source": [ "estados_mexicanos = {\n", " \"AGUASCALIENTES\",\n", " \"BAJA CALIFORNIA\",\n", " \"BAJA CALIFORNIA SUR\",\n", " \"CAMPECHE\",\n", " \"CHIAPAS\",\n", " \"CHIHUAHUA\",\n", " \"COAHUILA DE ZARAGOZA\",\n", " \"COLIMA\",\n", " \"DISTRITO FEDERAL\",\n", " \"DURANGO\",\n", " \"GUANAJUATO\",\n", " \"GUERRERO\",\n", " \"HIDALGO\",\n", " \"JALISCO\",\n", " \"MEXICO\",\n", " \"MICHOACAN DE OCAMPO\",\n", " \"MORELOS\",\n", " \"NAYARIT\",\n", " \"NUEVO LEON\",\n", " \"OAXACA\",\n", " \"PUEBLA\",\n", " \"QUERETARO DE ARTEAGA\",\n", " \"QUINTANA ROO\",\n", " \"SAN LUIS POTOSI\",\n", " \"SINALOA\",\n", " \"SONORA\",\n", " \"TABASCO\",\n", " \"TAMAULIPAS\",\n", " \"TLAXCALA\",\n", " \"VERACRUZ DE IGNACIO DE LA LLAVE\",\n", " \"YUCATAN\",\n", " \"ZACATECAS\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "bbfc26f0-7bae-444c-9f08-cbf77096713a", "metadata": { "scrolled": true }, "outputs": [], "source": [ "_df.sample(n=20)" ] }, { "cell_type": "code", "execution_count": null, "id": "f793f126-f1a6-417c-8801-b39fdd9b740d", "metadata": {}, "outputs": [], "source": [ "columns = [\n", " \"ENTIDAD_NACMAD\",\n", " \"FECH_NACM\",\n", " \"ESTADO_CIVIL\",\n", " \"ENTIDAD_RESMAD\",\n", " \"NUMERO_EMBARAZOS\",\n", " \"NACIDOS_MUERTOS\",\n", " \"NACIDOS_VIVOS\",\n", " \"SOBREVIVIENTES\",\n", " \"ANTERIOR_NACIO\",\n", " \"VIVE_AUN\",\n", " \"ORDEN_NAC\",\n", " \"ATENCION_PRENA\",\n", " \"TRIMESTR_ATEN\",\n", " \"SOBREVIVIO_PARTO\",\n", " \"ESCOLARIDAD\",\n", " \"DESC_OCUPHAB\",\n", " \"OCUPACION_HABITUAL\",\n", " \"TRABAJA_ACTUALMENTE\",\n", " \"FECH_NACH\",\n", " \"HORA_NACH\",\n", " \"SEXO_RN\",\n", " \"GESTACH\",\n", " \"TALLAH\",\n", " \"PESOH\",\n", " \"APGARH\",\n", " \"SILVERMAN\",\n", " \"NACIMIENTOS\",\n", " \"MES_NACI\",\n", " \"Producto\",\n", " \"CIE10\",\n", " \"CIE10_2da\",\n", " \"PROCEDIMIENTO\",\n", " \"OTRO_PROCEDIMIENTO\",\n", " \"LUGAR_NACIM\",\n", " \"ENTIDAD_NACIM\",\n", " \"ENTIDAD_CERTIF\",\n", " \"FECHA_CERTIF\",\n", "]\n", "columns = [x.lower() for x in columns]" ] }, { "cell_type": "code", "execution_count": null, "id": "3a0a131b-b6d5-490c-afe8-b38e01f67afa", "metadata": {}, "outputs": [], "source": [ "# len(set(columns) - set(df.columns.to_list()))" ] }, { "cell_type": "code", "execution_count": null, "id": "22d00a15-0b36-4da8-be7a-ea5081c0e20b", "metadata": {}, "outputs": [], "source": [ "# columns_original = get_dataset_for(2013).columns.to_list()\n", "columns_selected = [\n", " \"edo_captura\",\n", " \"edo_nac_madre\",\n", " \"fecha_nac_madre\",\n", " \"edad_madre\",\n", " \"estado_conyugal\",\n", " \"entidad_residencia_madre\",\n", " \"numero_embarazos\",\n", " \"hijos_nacidos_muertos\",\n", " \"hijos_nacidos_vivos\",\n", " \"hijos_sobrevivientes\",\n", " \"el_hijo_anterior_nacio\",\n", " \"vive_aun_hijo_anterior\",\n", " \"orden_nacimiento\",\n", " \"recibio_atencion_prenatal\",\n", " \"trimestre_recibio_primera_consulta\",\n", " \"total_consultas_recibidas\",\n", " \"madre_sobrevivio_al_parto\",\n", " \"escolaridad_madre\",\n", " \"ocupacion_habitual_madre\",\n", " \"trabaja_actualmente\",\n", " \"fecha_nacimiento_nac_vivo\",\n", " \"hora_nacimiento_nac_vivo\",\n", " \"sexo_nac_vivo\",\n", " \"semanas_gestacion_nac_vivo\",\n", " \"talla_nac_vivo\",\n", " \"peso_nac_vivo\",\n", " \"valoracion_apgar_nac_vivo\",\n", " \"valoracion_silverman_nac_vivo\",\n", " \"producto_de_un_embarazo\",\n", " \"codigo_anomalia\",\n", " \"anomalia_congenita_nac_vivo\",\n", " \"entidad_nacimiento\",\n", " \"entidad_certifico\",\n", "]\n", "print(\" - \", end=\"\")\n", "print(*sorted(columns_selected), sep=\"\\n - \")\n", "len(columns_selected)" ] }, { "cell_type": "code", "execution_count": null, "id": "1938d1bb-b1a5-4d85-9e7e-6df8636bef9b", "metadata": { "scrolled": true }, "outputs": [], "source": [ "print(\"Año - #Cols - #Faltantes\")\n", "for year in range(2008, 2020):\n", " df = get_dataset_for(year)\n", " columns = set(df.columns.to_list())\n", " print(year, \" - \", len(columns), \" - \", len(set(columns_selected) - columns))" ] }, { "cell_type": "code", "execution_count": null, "id": "c9ebdd00-ee89-4806-82aa-589e4bd7e3df", "metadata": {}, "outputs": [], "source": [ "df = pd.concat([get_dataset_for(year)[columns_selected] for year in range(2010, 2017)])" ] }, { "cell_type": "code", "execution_count": null, "id": "95ae7344-2c95-4f22-b381-faddec655ee8", "metadata": {}, "outputs": [], "source": [ "df" ] }, { "cell_type": "code", "execution_count": null, "id": "28ab2aac-ab57-4c3c-ad7d-3af70bbdeb60", "metadata": {}, "outputs": [], "source": [ "def _ano_nacimiento_vivo_func(str_date):\n", " try:\n", " return str_date.split(\"/\")[-1]\n", " except:\n", " return \"\"\n", "\n", "\n", "df[\"año_de_nacimiento_vivo\"] = df[\"fecha_nacimiento_nac_vivo\"].apply(\n", " _ano_nacimiento_vivo_func\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "4dd783b6-3639-4ab6-9582-438f97a26e9c", "metadata": {}, "outputs": [], "source": [ "df.edad_madre = df.edad_madre.astype(int)\n", "df = df[df.edad_madre < 120]" ] }, { "cell_type": "code", "execution_count": null, "id": "f941bb5f-dc2f-40d3-a626-00d948370241", "metadata": {}, "outputs": [], "source": [ "df.info()" ] }, { "cell_type": "code", "execution_count": null, "id": "d962779a-4747-4bc3-9095-236459bb3c64", "metadata": {}, "outputs": [], "source": [ "set(df[\"año_de_nacimiento_vivo\"].to_list())" ] }, { "cell_type": "code", "execution_count": null, "id": "23b1d148-e7f8-44b2-9213-cd97b66762b8", "metadata": {}, "outputs": [], "source": [ "df_trisomias = df[df[\"codigo_anomalia\"].apply(lambda x: \"Q9\" in x)]" ] }, { "cell_type": "code", "execution_count": null, "id": "46e4d052-cc6d-4ad8-908b-57d5a156d8a6", "metadata": {}, "outputs": [], "source": [ "df_trisomias" ] }, { "cell_type": "code", "execution_count": null, "id": "eeac7dfa-51f1-4942-8c7e-8932640aff02", "metadata": {}, "outputs": [], "source": [ "df_trisomias[[\"edad_madre\"]].describe()" ] }, { "cell_type": "markdown", "id": "199d6f0e-f957-42c5-8527-1e1e8496147f", "metadata": {}, "source": [ "# Agrupación por estado lugar de nacimiento de la madre, y año del registro, agregación de edad de la madre" ] }, { "cell_type": "code", "execution_count": null, "id": "204a3e36-b24c-44e3-ad2c-b440538c6fe2", "metadata": {}, "outputs": [], "source": [ "consulta = df.groupby([\"edo_captura\", \"edo_nac_madre\", \"año_de_nacimiento_vivo\"]).agg(\n", " {\n", " \"edad_madre\": [\n", " \"count\",\n", " \"mean\",\n", " \"std\",\n", " \"min\",\n", " \"max\",\n", " ],\n", " }\n", ")" ] }, { "cell_type": "code", "execution_count": null, "id": "69e1796d-cb0f-4704-ae9d-aafe7701707b", "metadata": { "scrolled": true }, "outputs": [], "source": [ "[row_name for row_name in consulta.index.to_list() if row_name[0] == \"JALISCO\"]" ] }, { "cell_type": "code", "execution_count": null, "id": "ed9657d0-4d2e-4400-b665-18dae6dfe777", "metadata": {}, "outputs": [], "source": [ "edos_fronterizos = {\n", " \"BAJA CALIFORNIA\",\n", " \"SONORA\",\n", " \"CHIHUAHUA\",\n", " \"COAHUILA DE ZARAGOZA\",\n", " \"NUEVO LEON\",\n", " \"TAMAULIPAS\",\n", "}" ] }, { "cell_type": "code", "execution_count": null, "id": "dbaedd5e-5ddb-4cc3-ad0d-3eccf610e78e", "metadata": {}, "outputs": [], "source": [ "edo_nac_madre_de_interes = {\n", " \"ESTADOS UNIDOS DE NORTEAMERICA\",\n", " \"OTROS PAISES\",\n", "} # \"OTROS PAISES DE LATINOAMERICA\"}\n", "consulta_edo_madre_especifico = consulta.loc[\n", " [\n", " row_name\n", " for row_name in consulta.index.to_list()\n", " if row_name[1] in edo_nac_madre_de_interes\n", " # and row_name[0] in edos_fronterizos\n", " ]\n", "]" ] }, { "cell_type": "raw", "id": "0897a47d-d9b7-4cde-a843-82a78418a77c", "metadata": {}, "source": [ "with pd.option_context('display.max_rows', None, 'display.max_columns', None): # more options can be specified also\n", " display(consulta_edo_madre_especifico)" ] }, { "cell_type": "code", "execution_count": null, "id": "d3a4a225-caaa-4980-aa65-6c9c043b6971", "metadata": {}, "outputs": [], "source": [ "# consulta_edo_madre_especifico.index, consulta_edo_madre_especifico.columns" ] }, { "cell_type": "code", "execution_count": null, "id": "bfaa53b3-4006-4ded-8ff1-124ebd82d029", "metadata": {}, "outputs": [], "source": [ "%matplotlib notebook\n", "from matplotlib.figure import Figure" ] }, { "cell_type": "code", "execution_count": null, "id": "0e46df9b-891e-424e-9d56-21a7355f417c", "metadata": {}, "outputs": [], "source": [ "{x[0] for x in consulta_edo_madre_especifico.index.to_list()}" ] }, { "cell_type": "code", "execution_count": null, "id": "bf52e20f-e51f-4e26-bbea-e7a318c17b44", "metadata": {}, "outputs": [], "source": [ "fig = Figure(figsize=(12, 6))\n", "ax = fig.add_subplot()\n", "edo_nac_madre_deseado = \"ESTADOS UNIDOS DE NORTEAMERICA\"\n", "_df = consulta_edo_madre_especifico.reset_index()\n", "edo = \"BAJA CALIFORNIA\"\n", "for edo in {x[0] for x in consulta_edo_madre_especifico.index.to_list()}:\n", " _to_plot = _df[\n", " (_df[\"edo_nac_madre\"] == edo_nac_madre_deseado) & (_df[\"edo_captura\"] == edo)\n", " ][[(\"año_de_nacimiento_vivo\", \"\"), (\"edad_madre\", \"count\")]]\n", " _to_plot.columns = [\"Año\", \"Cantidad\"]\n", " _to_plot = _to_plot.set_index(\"Año\")\n", "\n", " _to_plot.plot(y=\"Cantidad\", ax=ax, label=edo, linestyle=\"--\", grid=True)\n", "\n", "ax.legend(loc=\"upper left\")\n", "ax.set_xlabel(\"Año\")\n", "ax.set_ylabel(\"Cantidad de Nacimientos\")\n", "fig" ] }, { "cell_type": "code", "execution_count": null, "id": "3df800dd-8ca3-4ef0-9f86-e3ccddbd3f27", "metadata": { "scrolled": true }, "outputs": [], "source": [ "consulta_edo_madre_especifico.reset_index().columns" ] }, { "cell_type": "markdown", "id": "0607aeff-41b8-49e0-985f-708a8be45d64", "metadata": {}, "source": [ "# Agrupación por estado de captura, y año" ] }, { "cell_type": "code", "execution_count": null, "id": "503b0648-5a53-4e10-97a5-6141d9c9c6b8", "metadata": {}, "outputs": [], "source": [ "# Edades de madres\n", "consulta = df.groupby([\"edo_captura\", \"año_de_nacimiento_vivo\"]).agg(\n", " {\n", " \"edad_madre\": [\n", " \"count\",\n", " \"mean\",\n", " \"std\",\n", " \"min\",\n", " \"max\",\n", " ],\n", " }\n", ")\n", "consulta2" ] }, { "cell_type": "code", "execution_count": null, "id": "1016421b-f5d4-4d99-bf10-5994abf0377f", "metadata": {}, "outputs": [], "source": [ "_df = consulta2.reset_index()\n", "_df.columns = [\"_\".join(x).rstrip(\"_\") for x in _df.columns.to_flat_index()]\n", "_df = _df.rename(columns={\"año_de_nacimiento_vivo\": \"año\"})\n", "_df" ] }, { "cell_type": "code", "execution_count": null, "id": "2fe9eafa-549f-45c0-923c-b59f5fdd5cc5", "metadata": {}, "outputs": [], "source": [ "fig = Figure(figsize=(12, 6))\n", "ax = fig.add_subplot()\n", "_df = consulta2.reset_index()\n", "_df.columns = [\"_\".join(x).rstrip(\"_\") for x in _df.columns.to_flat_index()]\n", "_df = _df.rename(columns={\"año_de_nacimiento_vivo\": \"año\"})\n", "edo = \"BAJA CALIFORNIA\"\n", "for edo in estados_mexicanos:\n", " _to_plot = _df[(_df[\"edo_captura\"] == edo)][\n", " [\"año\", \"edad_madre_mean\", \"edad_madre_std\"]\n", " ]\n", " _to_plot = _to_plot.set_index(\"año\")\n", " _to_plot.plot(y=\"edad_madre_mean\", ax=ax, label=edo, linestyle=\"--\", grid=True)\n", "\n", "\n", "fig" ] }, { "cell_type": "code", "execution_count": null, "id": "7f2d498f-afad-427d-b403-749dd2e7ac07", "metadata": {}, "outputs": [], "source": [ "fig = Figure(figsize=(12, 6))\n", "ax = fig.add_subplot()\n", "_df = consulta2.reset_index()\n", "for edo in {x[0] for x in consulta.index.to_list()}:\n", " _to_plot = _df[(_df[\"edo_captura\"] == edo)][\n", " [(\"año_de_nacimiento_vivo\", \"\"), (\"edad_madre\", \"count\")]\n", " ]\n", " _to_plot.columns = [\"Año\", \"Cantidad\"]\n", " _to_plot = _to_plot.set_index(\"Año\")\n", "\n", " _to_plot.plot(y=\"Cantidad\", ax=ax, label=edo, linestyle=\"--\", grid=True)\n", "\n", "ax.legend(loc=\"upper left\")\n", "ax.set_xlabel(\"Año\")\n", "ax.set_ylabel(\"Cantidad de Nacimientos\")\n", "fig" ] }, { "cell_type": "markdown", "id": "80f2928c-b4b7-49ea-84c9-6a4aba677be2", "metadata": {}, "source": [ "\n" ] }, { "cell_type": "markdown", "id": "d97e103b-5384-4bae-b7b8-ebbdb7ec6a68", "metadata": {}, "source": [ "# Filtrar a solo observar los:" ] }, { "cell_type": "markdown", "id": "f60b5c93-ffb7-41d6-90a8-9a0e32d396f7", "metadata": {}, "source": [ "## Top GDP" ] }, { "cell_type": "markdown", "id": "ee3bea25-e8bb-4af9-b246-b84b46a79e94", "metadata": {}, "source": [ "## Agrupación por zonas metropolitanas\n" ] }, { "cell_type": "code", "execution_count": null, "id": "a99f74c8-33d2-44fe-b78f-b0a9e5267251", "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.6" } }, "nbformat": 4, "nbformat_minor": 5 }