Agregue columnas netas.

This commit is contained in:
Miguel Salgado 2024-02-24 19:39:47 -08:00
parent 7fe39453e9
commit 9929931f72
3 changed files with 544 additions and 6 deletions

View File

@ -12,6 +12,7 @@ jupyterlab = "*"
pandas = "*" pandas = "*"
scipy = "*" scipy = "*"
scikit-learn = "*" scikit-learn = "*"
pyarrow = "*"
[dev-packages] [dev-packages]

View File

@ -294,17 +294,63 @@
] ]
}, },
{ {
"cell_type": "code", "attachments": {},
"execution_count": null, "cell_type": "markdown",
"id": "62e36cac-2d1a-42c2-a68e-e0888d7afa0d", "id": "6bad6fa6-b4b3-4b20-9910-c827ecfc86cf",
"metadata": {}, "metadata": {},
"outputs": [], "source": [
"source": [] "Las columnas de interes, netas seran las siguientes:"
]
},
{
"cell_type": "raw",
"id": "b83b48eb-1375-4b1c-8eca-53583fd090f0",
"metadata": {},
"source": [
"NOMBRE DE CAMPO\n",
"ENTIDAD_NACMAD\n",
"FECH_NACM\n",
"ESTADO_CIVIL\n",
"ENTIDAD_RESMAD\n",
"NUMERO_EMBARAZOS\n",
"NACIDOS_MUERTOS\n",
"NACIDOS_VIVOS\n",
"SOBREVIVIENTES\n",
"ANTERIOR_NACIO\n",
"VIVE_AUN\n",
"ORDEN_NAC\n",
"ATENCION_PRENA\n",
"TRIMESTR_ATEN\n",
"SOBREVIVIO_PARTO\n",
"ESCOLARIDAD\n",
"DESC_OCUPHAB\n",
"OCUPACION_HABITUAL\n",
"TRABAJA_ACTUALMENTE\n",
"FECH_NACH\n",
"HORA_NACH\n",
"SEXO_RN\n",
"GESTACH\n",
"TALLAH\n",
"PESOH\n",
"APGARH\n",
"SILVERMAN\n",
"NACIMIENTOS\n",
"MES_NACI\n",
"Producto\n",
"CIE10\n",
"CIE10_2da\n",
"PROCEDIMIENTO\n",
"OTRO_PROCEDIMIENTO\n",
"LUGAR_NACIM\n",
"ENTIDAD_NACIM\n",
"ENTIDAD_CERTIF\n",
"FECHA_CERTIF"
]
}, },
{ {
"cell_type": "code", "cell_type": "code",
"execution_count": null, "execution_count": null,
"id": "8929221a-af25-4673-8952-0a55f4e59e80", "id": "a17735af-9fa2-409c-a744-55418b93986a",
"metadata": {}, "metadata": {},
"outputs": [], "outputs": [],
"source": [] "source": []

View File

@ -0,0 +1,491 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 3,
"id": "16c86730-6982-4338-a98c-49f0c5b603ba",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"@cache\n",
"def get_dataset_for(year):\n",
" return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 14,
"id": "d03c787f-f963-4eff-8294-63bc7b073d04",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_48786/301737641.py:6: DtypeWarning: Columns (5,6,7,54,60) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\")\n"
]
}
],
"source": [
"df = get_dataset_for(2015)"
]
},
{
"cell_type": "code",
"execution_count": 15,
"id": "f793f126-f1a6-417c-8801-b39fdd9b740d",
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<div>\n",
"<style scoped>\n",
" .dataframe tbody tr th:only-of-type {\n",
" vertical-align: middle;\n",
" }\n",
"\n",
" .dataframe tbody tr th {\n",
" vertical-align: top;\n",
" }\n",
"\n",
" .dataframe thead th {\n",
" text-align: right;\n",
" }\n",
"</style>\n",
"<table border=\"1\" class=\"dataframe\">\n",
" <thead>\n",
" <tr style=\"text-align: right;\">\n",
" <th></th>\n",
" <th>edo_captura</th>\n",
" <th>edo_nac_madre</th>\n",
" <th>mpo_nac_madre</th>\n",
" <th>fecha_nac_madre</th>\n",
" <th>edad_madre</th>\n",
" <th>madre_se_considera_indigena</th>\n",
" <th>madre_habla_lengua_indigena</th>\n",
" <th>lengua_indigena_hablada</th>\n",
" <th>estado_conyugal</th>\n",
" <th>entidad_residencia_madre</th>\n",
" <th>...</th>\n",
" <th>localidad_nacimiento</th>\n",
" <th>certificado_por</th>\n",
" <th>unidad_medica_certifico</th>\n",
" <th>clues_certifico</th>\n",
" <th>entidad_certifico</th>\n",
" <th>municipio_certifico</th>\n",
" <th>localidad_certifico</th>\n",
" <th>fecha_certificacion</th>\n",
" <th>fecha_captura</th>\n",
" <th>tipo_formato</th>\n",
" </tr>\n",
" </thead>\n",
" <tbody>\n",
" <tr>\n",
" <th>0</th>\n",
" <td>MEXICO</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>16/09/1994</td>\n",
" <td>20</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>07/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>1</th>\n",
" <td>MEXICO</td>\n",
" <td>DISTRITO FEDERAL</td>\n",
" <td>MIGUEL HIDALGO</td>\n",
" <td>09/09/1995</td>\n",
" <td>19</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>SOLTERA</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>06/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2</th>\n",
" <td>MEXICO</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>08/08/1993</td>\n",
" <td>21</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CASADA</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>06/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>3</th>\n",
" <td>MEXICO</td>\n",
" <td>MEXICO</td>\n",
" <td>TLALNEPANTLA DE BAZ</td>\n",
" <td>13/07/1990</td>\n",
" <td>25</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>06/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>4</th>\n",
" <td>MEXICO</td>\n",
" <td>HIDALGO</td>\n",
" <td>TENANGO DE DORIA</td>\n",
" <td>02/01/1983</td>\n",
" <td>32</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>07/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>...</th>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" <td>...</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2145194</th>\n",
" <td>MEXICO</td>\n",
" <td>MEXICO</td>\n",
" <td>SAN FELIPE DEL PROGRESO</td>\n",
" <td>18/09/1989</td>\n",
" <td>25</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>04/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2145195</th>\n",
" <td>MEXICO</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>07/08/1988</td>\n",
" <td>26</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>SOLTERA</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>04/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2145196</th>\n",
" <td>MEXICO</td>\n",
" <td>MEXICO</td>\n",
" <td>HUIXQUILUCAN</td>\n",
" <td>13/03/1988</td>\n",
" <td>27</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>CASADA</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>05/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2145197</th>\n",
" <td>MEXICO</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>26/05/1998</td>\n",
" <td>17</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>05/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" <tr>\n",
" <th>2145198</th>\n",
" <td>MEXICO</td>\n",
" <td>MEXICO</td>\n",
" <td>ATIZAPAN</td>\n",
" <td>27/01/1996</td>\n",
" <td>19</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>NaN</td>\n",
" <td>UNIÓN LIBRE</td>\n",
" <td>MEXICO</td>\n",
" <td>...</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>MÉDICO GINECO-OBSTETRA</td>\n",
" <td>NO APLICA</td>\n",
" <td>9997</td>\n",
" <td>MEXICO</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>NAUCALPAN DE JUAREZ</td>\n",
" <td>06/08/2015</td>\n",
" <td>08/11/2015</td>\n",
" <td>2010</td>\n",
" </tr>\n",
" </tbody>\n",
"</table>\n",
"<p>2145199 rows × 61 columns</p>\n",
"</div>"
],
"text/plain": [
" edo_captura edo_nac_madre mpo_nac_madre \\\n",
"0 MEXICO MEXICO NAUCALPAN DE JUAREZ \n",
"1 MEXICO DISTRITO FEDERAL MIGUEL HIDALGO \n",
"2 MEXICO MEXICO NAUCALPAN DE JUAREZ \n",
"3 MEXICO MEXICO TLALNEPANTLA DE BAZ \n",
"4 MEXICO HIDALGO TENANGO DE DORIA \n",
"... ... ... ... \n",
"2145194 MEXICO MEXICO SAN FELIPE DEL PROGRESO \n",
"2145195 MEXICO MEXICO NAUCALPAN DE JUAREZ \n",
"2145196 MEXICO MEXICO HUIXQUILUCAN \n",
"2145197 MEXICO MEXICO NAUCALPAN DE JUAREZ \n",
"2145198 MEXICO MEXICO ATIZAPAN \n",
"\n",
" fecha_nac_madre edad_madre madre_se_considera_indigena \\\n",
"0 16/09/1994 20 NaN \n",
"1 09/09/1995 19 NaN \n",
"2 08/08/1993 21 NaN \n",
"3 13/07/1990 25 NaN \n",
"4 02/01/1983 32 NaN \n",
"... ... ... ... \n",
"2145194 18/09/1989 25 NaN \n",
"2145195 07/08/1988 26 NaN \n",
"2145196 13/03/1988 27 NaN \n",
"2145197 26/05/1998 17 NaN \n",
"2145198 27/01/1996 19 NaN \n",
"\n",
" madre_habla_lengua_indigena lengua_indigena_hablada estado_conyugal \\\n",
"0 NaN NaN UNIÓN LIBRE \n",
"1 NaN NaN SOLTERA \n",
"2 NaN NaN CASADA \n",
"3 NaN NaN UNIÓN LIBRE \n",
"4 NaN NaN UNIÓN LIBRE \n",
"... ... ... ... \n",
"2145194 NaN NaN UNIÓN LIBRE \n",
"2145195 NaN NaN SOLTERA \n",
"2145196 NaN NaN CASADA \n",
"2145197 NaN NaN UNIÓN LIBRE \n",
"2145198 NaN NaN UNIÓN LIBRE \n",
"\n",
" entidad_residencia_madre ... localidad_nacimiento \\\n",
"0 MEXICO ... NAUCALPAN DE JUAREZ \n",
"1 MEXICO ... NAUCALPAN DE JUAREZ \n",
"2 MEXICO ... NAUCALPAN DE JUAREZ \n",
"3 MEXICO ... NAUCALPAN DE JUAREZ \n",
"4 MEXICO ... NAUCALPAN DE JUAREZ \n",
"... ... ... ... \n",
"2145194 MEXICO ... NAUCALPAN DE JUAREZ \n",
"2145195 MEXICO ... NAUCALPAN DE JUAREZ \n",
"2145196 MEXICO ... NAUCALPAN DE JUAREZ \n",
"2145197 MEXICO ... NAUCALPAN DE JUAREZ \n",
"2145198 MEXICO ... NAUCALPAN DE JUAREZ \n",
"\n",
" certificado_por unidad_medica_certifico clues_certifico \\\n",
"0 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"1 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"2 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"3 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"4 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"... ... ... ... \n",
"2145194 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"2145195 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"2145196 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"2145197 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"2145198 MÉDICO GINECO-OBSTETRA NO APLICA 9997 \n",
"\n",
" entidad_certifico municipio_certifico localidad_certifico \\\n",
"0 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"1 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"2 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"3 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"4 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"... ... ... ... \n",
"2145194 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"2145195 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"2145196 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"2145197 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"2145198 MEXICO NAUCALPAN DE JUAREZ NAUCALPAN DE JUAREZ \n",
"\n",
" fecha_certificacion fecha_captura tipo_formato \n",
"0 07/08/2015 08/11/2015 2010 \n",
"1 06/08/2015 08/11/2015 2010 \n",
"2 06/08/2015 08/11/2015 2010 \n",
"3 06/08/2015 08/11/2015 2010 \n",
"4 07/08/2015 08/11/2015 2010 \n",
"... ... ... ... \n",
"2145194 04/08/2015 08/11/2015 2010 \n",
"2145195 04/08/2015 08/11/2015 2010 \n",
"2145196 05/08/2015 08/11/2015 2010 \n",
"2145197 05/08/2015 08/11/2015 2010 \n",
"2145198 06/08/2015 08/11/2015 2010 \n",
"\n",
"[2145199 rows x 61 columns]"
]
},
"execution_count": 15,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"df"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "3a0a131b-b6d5-490c-afe8-b38e01f67afa",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}