diff --git a/Pipfile b/Pipfile index e61a173..350c988 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,7 @@ jupyterlab = "*" pandas = "*" scipy = "*" scikit-learn = "*" +pyarrow = "*" [dev-packages] diff --git a/analisis-de-columnas.ipynb b/analisis-de-columnas.ipynb index 00ce8f8..b1a7090 100644 --- a/analisis-de-columnas.ipynb +++ b/analisis-de-columnas.ipynb @@ -294,17 +294,63 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "62e36cac-2d1a-42c2-a68e-e0888d7afa0d", + "attachments": {}, + "cell_type": "markdown", + "id": "6bad6fa6-b4b3-4b20-9910-c827ecfc86cf", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "Las columnas de interes, netas seran las siguientes:" + ] + }, + { + "cell_type": "raw", + "id": "b83b48eb-1375-4b1c-8eca-53583fd090f0", + "metadata": {}, + "source": [ + "NOMBRE DE CAMPO\n", + "ENTIDAD_NACMAD\n", + "FECH_NACM\n", + "ESTADO_CIVIL\n", + "ENTIDAD_RESMAD\n", + "NUMERO_EMBARAZOS\n", + "NACIDOS_MUERTOS\n", + "NACIDOS_VIVOS\n", + "SOBREVIVIENTES\n", + "ANTERIOR_NACIO\n", + "VIVE_AUN\n", + "ORDEN_NAC\n", + "ATENCION_PRENA\n", + "TRIMESTR_ATEN\n", + "SOBREVIVIO_PARTO\n", + "ESCOLARIDAD\n", + "DESC_OCUPHAB\n", + "OCUPACION_HABITUAL\n", + "TRABAJA_ACTUALMENTE\n", + "FECH_NACH\n", + "HORA_NACH\n", + "SEXO_RN\n", + "GESTACH\n", + "TALLAH\n", + "PESOH\n", + "APGARH\n", + "SILVERMAN\n", + "NACIMIENTOS\n", + "MES_NACI\n", + "Producto\n", + "CIE10\n", + "CIE10_2da\n", + "PROCEDIMIENTO\n", + "OTRO_PROCEDIMIENTO\n", + "LUGAR_NACIM\n", + "ENTIDAD_NACIM\n", + "ENTIDAD_CERTIF\n", + "FECHA_CERTIF" + ] }, { "cell_type": "code", "execution_count": null, - "id": "8929221a-af25-4673-8952-0a55f4e59e80", + "id": "a17735af-9fa2-409c-a744-55418b93986a", "metadata": {}, "outputs": [], "source": [] diff --git a/exploracion-individual.ipynb b/exploracion-individual.ipynb new file mode 100644 index 0000000..632c9fa --- /dev/null +++ b/exploracion-individual.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "16c86730-6982-4338-a98c-49f0c5b603ba", + "metadata": {}, + "outputs": [], + "source": [ + "from functools import cache\n", + "import pandas as pd\n", + "\n", + "@cache\n", + "def get_dataset_for(year):\n", + " return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d03c787f-f963-4eff-8294-63bc7b073d04", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_48786/301737641.py:6: DtypeWarning: Columns (5,6,7,54,60) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\")\n" + ] + } + ], + "source": [ + "df = get_dataset_for(2015)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f793f126-f1a6-417c-8801-b39fdd9b740d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
| \n", + " | edo_captura | \n", + "edo_nac_madre | \n", + "mpo_nac_madre | \n", + "fecha_nac_madre | \n", + "edad_madre | \n", + "madre_se_considera_indigena | \n", + "madre_habla_lengua_indigena | \n", + "lengua_indigena_hablada | \n", + "estado_conyugal | \n", + "entidad_residencia_madre | \n", + "... | \n", + "localidad_nacimiento | \n", + "certificado_por | \n", + "unidad_medica_certifico | \n", + "clues_certifico | \n", + "entidad_certifico | \n", + "municipio_certifico | \n", + "localidad_certifico | \n", + "fecha_certificacion | \n", + "fecha_captura | \n", + "tipo_formato | \n", + "
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | \n", + "MEXICO | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "16/09/1994 | \n", + "20 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "UNIÓN LIBRE | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "07/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| 1 | \n", + "MEXICO | \n", + "DISTRITO FEDERAL | \n", + "MIGUEL HIDALGO | \n", + "09/09/1995 | \n", + "19 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "SOLTERA | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "06/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| 2 | \n", + "MEXICO | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "08/08/1993 | \n", + "21 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "CASADA | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "06/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| 3 | \n", + "MEXICO | \n", + "MEXICO | \n", + "TLALNEPANTLA DE BAZ | \n", + "13/07/1990 | \n", + "25 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "UNIÓN LIBRE | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "06/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| 4 | \n", + "MEXICO | \n", + "HIDALGO | \n", + "TENANGO DE DORIA | \n", + "02/01/1983 | \n", + "32 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "UNIÓN LIBRE | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "07/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| ... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "... | \n", + "
| 2145194 | \n", + "MEXICO | \n", + "MEXICO | \n", + "SAN FELIPE DEL PROGRESO | \n", + "18/09/1989 | \n", + "25 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "UNIÓN LIBRE | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "04/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| 2145195 | \n", + "MEXICO | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "07/08/1988 | \n", + "26 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "SOLTERA | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "04/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| 2145196 | \n", + "MEXICO | \n", + "MEXICO | \n", + "HUIXQUILUCAN | \n", + "13/03/1988 | \n", + "27 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "CASADA | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "05/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| 2145197 | \n", + "MEXICO | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "26/05/1998 | \n", + "17 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "UNIÓN LIBRE | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "05/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
| 2145198 | \n", + "MEXICO | \n", + "MEXICO | \n", + "ATIZAPAN | \n", + "27/01/1996 | \n", + "19 | \n", + "NaN | \n", + "NaN | \n", + "NaN | \n", + "UNIÓN LIBRE | \n", + "MEXICO | \n", + "... | \n", + "NAUCALPAN DE JUAREZ | \n", + "MÉDICO GINECO-OBSTETRA | \n", + "NO APLICA | \n", + "9997 | \n", + "MEXICO | \n", + "NAUCALPAN DE JUAREZ | \n", + "NAUCALPAN DE JUAREZ | \n", + "06/08/2015 | \n", + "08/11/2015 | \n", + "2010 | \n", + "
2145199 rows × 61 columns
\n", + "