diff --git a/Pipfile b/Pipfile index e61a173..350c988 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,7 @@ jupyterlab = "*" pandas = "*" scipy = "*" scikit-learn = "*" +pyarrow = "*" [dev-packages] diff --git a/analisis-de-columnas.ipynb b/analisis-de-columnas.ipynb index 00ce8f8..b1a7090 100644 --- a/analisis-de-columnas.ipynb +++ b/analisis-de-columnas.ipynb @@ -294,17 +294,63 @@ ] }, { - "cell_type": "code", - "execution_count": null, - "id": "62e36cac-2d1a-42c2-a68e-e0888d7afa0d", + "attachments": {}, + "cell_type": "markdown", + "id": "6bad6fa6-b4b3-4b20-9910-c827ecfc86cf", "metadata": {}, - "outputs": [], - "source": [] + "source": [ + "Las columnas de interes, netas seran las siguientes:" + ] + }, + { + "cell_type": "raw", + "id": "b83b48eb-1375-4b1c-8eca-53583fd090f0", + "metadata": {}, + "source": [ + "NOMBRE DE CAMPO\n", + "ENTIDAD_NACMAD\n", + "FECH_NACM\n", + "ESTADO_CIVIL\n", + "ENTIDAD_RESMAD\n", + "NUMERO_EMBARAZOS\n", + "NACIDOS_MUERTOS\n", + "NACIDOS_VIVOS\n", + "SOBREVIVIENTES\n", + "ANTERIOR_NACIO\n", + "VIVE_AUN\n", + "ORDEN_NAC\n", + "ATENCION_PRENA\n", + "TRIMESTR_ATEN\n", + "SOBREVIVIO_PARTO\n", + "ESCOLARIDAD\n", + "DESC_OCUPHAB\n", + "OCUPACION_HABITUAL\n", + "TRABAJA_ACTUALMENTE\n", + "FECH_NACH\n", + "HORA_NACH\n", + "SEXO_RN\n", + "GESTACH\n", + "TALLAH\n", + "PESOH\n", + "APGARH\n", + "SILVERMAN\n", + "NACIMIENTOS\n", + "MES_NACI\n", + "Producto\n", + "CIE10\n", + "CIE10_2da\n", + "PROCEDIMIENTO\n", + "OTRO_PROCEDIMIENTO\n", + "LUGAR_NACIM\n", + "ENTIDAD_NACIM\n", + "ENTIDAD_CERTIF\n", + "FECHA_CERTIF" + ] }, { "cell_type": "code", "execution_count": null, - "id": "8929221a-af25-4673-8952-0a55f4e59e80", + "id": "a17735af-9fa2-409c-a744-55418b93986a", "metadata": {}, "outputs": [], "source": [] diff --git a/exploracion-individual.ipynb b/exploracion-individual.ipynb new file mode 100644 index 0000000..632c9fa --- /dev/null +++ b/exploracion-individual.ipynb @@ -0,0 +1,491 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 3, + "id": "16c86730-6982-4338-a98c-49f0c5b603ba", + "metadata": {}, + "outputs": [], + "source": [ + "from functools import cache\n", + "import pandas as pd\n", + "\n", + "@cache\n", + "def get_dataset_for(year):\n", + " return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "id": "d03c787f-f963-4eff-8294-63bc7b073d04", + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_48786/301737641.py:6: DtypeWarning: Columns (5,6,7,54,60) have mixed types. Specify dtype option on import or set low_memory=False.\n", + " return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\")\n" + ] + } + ], + "source": [ + "df = get_dataset_for(2015)" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "id": "f793f126-f1a6-417c-8801-b39fdd9b740d", + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
edo_capturaedo_nac_madrempo_nac_madrefecha_nac_madreedad_madremadre_se_considera_indigenamadre_habla_lengua_indigenalengua_indigena_habladaestado_conyugalentidad_residencia_madre...localidad_nacimientocertificado_porunidad_medica_certificoclues_certificoentidad_certificomunicipio_certificolocalidad_certificofecha_certificacionfecha_capturatipo_formato
0MEXICOMEXICONAUCALPAN DE JUAREZ16/09/199420NaNNaNNaNUNIÓN LIBREMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ07/08/201508/11/20152010
1MEXICODISTRITO FEDERALMIGUEL HIDALGO09/09/199519NaNNaNNaNSOLTERAMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ06/08/201508/11/20152010
2MEXICOMEXICONAUCALPAN DE JUAREZ08/08/199321NaNNaNNaNCASADAMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ06/08/201508/11/20152010
3MEXICOMEXICOTLALNEPANTLA DE BAZ13/07/199025NaNNaNNaNUNIÓN LIBREMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ06/08/201508/11/20152010
4MEXICOHIDALGOTENANGO DE DORIA02/01/198332NaNNaNNaNUNIÓN LIBREMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ07/08/201508/11/20152010
..................................................................
2145194MEXICOMEXICOSAN FELIPE DEL PROGRESO18/09/198925NaNNaNNaNUNIÓN LIBREMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ04/08/201508/11/20152010
2145195MEXICOMEXICONAUCALPAN DE JUAREZ07/08/198826NaNNaNNaNSOLTERAMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ04/08/201508/11/20152010
2145196MEXICOMEXICOHUIXQUILUCAN13/03/198827NaNNaNNaNCASADAMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ05/08/201508/11/20152010
2145197MEXICOMEXICONAUCALPAN DE JUAREZ26/05/199817NaNNaNNaNUNIÓN LIBREMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ05/08/201508/11/20152010
2145198MEXICOMEXICOATIZAPAN27/01/199619NaNNaNNaNUNIÓN LIBREMEXICO...NAUCALPAN DE JUAREZMÉDICO GINECO-OBSTETRANO APLICA9997MEXICONAUCALPAN DE JUAREZNAUCALPAN DE JUAREZ06/08/201508/11/20152010
\n", + "

2145199 rows × 61 columns

\n", + "
" + ], + "text/plain": [ + " edo_captura edo_nac_madre mpo_nac_madre \\\n", + "0 MEXICO MEXICO NAUCALPAN DE JUAREZ \n", + "1 MEXICO DISTRITO FEDERAL MIGUEL HIDALGO \n", + "2 MEXICO MEXICO NAUCALPAN DE JUAREZ \n", + "3 MEXICO MEXICO TLALNEPANTLA DE BAZ \n", + "4 MEXICO HIDALGO TENANGO DE DORIA \n", + "... ... ... ... \n", + "2145194 MEXICO MEXICO SAN FELIPE DEL PROGRESO \n", + "2145195 MEXICO MEXICO NAUCALPAN DE JUAREZ \n", + "2145196 MEXICO MEXICO HUIXQUILUCAN \n", + "2145197 MEXICO MEXICO NAUCALPAN DE JUAREZ \n", + "2145198 MEXICO MEXICO ATIZAPAN \n", + "\n", + " fecha_nac_madre edad_madre madre_se_considera_indigena \\\n", + "0 16/09/1994 20 NaN \n", + "1 09/09/1995 19 NaN \n", + "2 08/08/1993 21 NaN \n", + "3 13/07/1990 25 NaN \n", + "4 02/01/1983 32 NaN \n", + "... ... ... ... \n", + "2145194 18/09/1989 25 NaN \n", + "2145195 07/08/1988 26 NaN \n", + "2145196 13/03/1988 27 NaN \n", + "2145197 26/05/1998 17 NaN \n", + "2145198 27/01/1996 19 NaN \n", + "\n", + " madre_habla_lengua_indigena lengua_indigena_hablada estado_conyugal \\\n", + "0 NaN NaN UNIÓN LIBRE \n", + "1 NaN NaN SOLTERA \n", + "2 NaN NaN CASADA \n", + "3 NaN NaN UNIÓN LIBRE \n", + "4 NaN NaN UNIÓN LIBRE \n", + "... ... ... ... \n", + "2145194 NaN NaN UNIÓN LIBRE \n", + "2145195 NaN NaN SOLTERA \n", + "2145196 NaN NaN CASADA \n", + "2145197 NaN NaN UNIÓN LIBRE \n", + "2145198 NaN NaN UNIÓN LIBRE \n", + "\n", + " entidad_residencia_madre ... localidad_nacimiento \\\n", + "0 MEXICO ... NAUCALPAN DE JUAREZ \n", + "1 MEXICO ... NAUCALPAN DE JUAREZ \n", + "2 MEXICO ... NAUCALPAN DE JUAREZ \n", + "3 MEXICO ... NAUCALPAN DE JUAREZ \n", + "4 MEXICO ... NAUCALPAN DE JUAREZ \n", + "... ... ... ... \n", + "2145194 MEXICO ... NAUCALPAN DE JUAREZ \n", + "2145195 MEXICO ... NAUCALPAN DE JUAREZ \n", + "2145196 MEXICO ... NAUCALPAN DE JUAREZ \n", + "2145197 MEXICO ... NAUCALPAN DE JUAREZ \n", + "2145198 MEXICO ... NAUCALPAN DE JUAREZ \n", + "\n", + " certificado_por unidad_medica_certifico clues_certifico \\\n", + "0 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "1 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "2 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "3 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "4 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "... ... ... ... \n", + "2145194 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "2145195 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "2145196 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "2145197 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "2145198 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n", + "\n", + " entidad_certifico municipio_certifico localidad_certifico \\\n", + "0 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "1 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "2 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "3 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "4 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "... ... ... ... \n", + "2145194 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "2145195 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "2145196 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "2145197 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "2145198 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n", + "\n", + " fecha_certificacion fecha_captura tipo_formato \n", + "0 07/08/2015 08/11/2015 2010 \n", + "1 06/08/2015 08/11/2015 2010 \n", + "2 06/08/2015 08/11/2015 2010 \n", + "3 06/08/2015 08/11/2015 2010 \n", + "4 07/08/2015 08/11/2015 2010 \n", + "... ... ... ... \n", + "2145194 04/08/2015 08/11/2015 2010 \n", + "2145195 04/08/2015 08/11/2015 2010 \n", + "2145196 05/08/2015 08/11/2015 2010 \n", + "2145197 05/08/2015 08/11/2015 2010 \n", + "2145198 06/08/2015 08/11/2015 2010 \n", + "\n", + "[2145199 rows x 61 columns]" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "df" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3a0a131b-b6d5-490c-afe8-b38e01f67afa", + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.6" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +}