unir-metodologia/procesamiento_2010-2016.ipynb

215 lines
5.5 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "f9c836a8-aa45-4b3f-8392-660ffdf0daf9",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "faf0690b-1462-4d93-b66d-3b3e017fcc88",
"metadata": {},
"outputs": [],
"source": [
"@cache\n",
"def get_dataset_for(year):\n",
" return pd.read_csv(f\"datasets/sinac{year}DatosAbiertos.csv\", dtype=object)"
]
},
{
"cell_type": "code",
"execution_count": 7,
"id": "4436b11a-5e07-4b5f-9aa8-9f60b9328e70",
"metadata": {},
"outputs": [],
"source": [
"columns_selected = [\n",
" \"edo_captura\",\n",
" \"edo_nac_madre\",\n",
" \"fecha_nac_madre\",\n",
" \"edad_madre\",\n",
" \"estado_conyugal\",\n",
" \"entidad_residencia_madre\",\n",
" \"numero_embarazos\",\n",
" \"hijos_nacidos_muertos\",\n",
" \"hijos_nacidos_vivos\",\n",
" \"hijos_sobrevivientes\",\n",
" \"el_hijo_anterior_nacio\",\n",
" \"vive_aun_hijo_anterior\",\n",
" \"orden_nacimiento\",\n",
" \"recibio_atencion_prenatal\",\n",
" \"trimestre_recibio_primera_consulta\",\n",
" \"total_consultas_recibidas\",\n",
" \"madre_sobrevivio_al_parto\",\n",
" \"escolaridad_madre\",\n",
" \"ocupacion_habitual_madre\",\n",
" \"trabaja_actualmente\",\n",
" \"fecha_nacimiento_nac_vivo\",\n",
" \"hora_nacimiento_nac_vivo\",\n",
" \"sexo_nac_vivo\",\n",
" \"semanas_gestacion_nac_vivo\",\n",
" \"talla_nac_vivo\",\n",
" \"peso_nac_vivo\",\n",
" \"valoracion_apgar_nac_vivo\",\n",
" \"valoracion_silverman_nac_vivo\",\n",
" \"producto_de_un_embarazo\",\n",
" \"codigo_anomalia\",\n",
" \"anomalia_congenita_nac_vivo\",\n",
" \"lugar_de_nacimiento\",\n",
" \"entidad_certifico\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "71959de6-9879-4471-a331-99a054b7f5af",
"metadata": {},
"outputs": [],
"source": [
"df = pd.concat([get_dataset_for(year)[columns_selected] for year in range(2010, 2017)])"
]
},
{
"cell_type": "code",
"execution_count": 8,
"id": "9147e139-6a91-4660-81e2-db49ec151ff0",
"metadata": {},
"outputs": [],
"source": [
"def _ano_nacimiento_vivo_func(str_date):\n",
" try:\n",
" return str_date.split(\"/\")[-1]\n",
" except:\n",
" return \"\"\n",
"\n",
"\n",
"df[\"año_de_nacimiento_vivo\"] = df[\"fecha_nacimiento_nac_vivo\"].apply(\n",
" _ano_nacimiento_vivo_func\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 10,
"id": "f71063d3-5c78-426f-a484-80fcf543051f",
"metadata": {
"scrolled": true
},
"outputs": [],
"source": [
"columnas_finales = [\n",
" \"edo_captura\",\n",
" \"edo_nac_madre\",\n",
" \"fecha_nac_madre\",\n",
" \"edad_madre\",\n",
" \"estado_conyugal\",\n",
" \"entidad_residencia_madre\",\n",
" \"numero_embarazos\",\n",
" \"hijos_nacidos_muertos\",\n",
" \"hijos_nacidos_vivos\",\n",
" \"hijos_sobrevivientes\",\n",
" \"el_hijo_anterior_nacio\",\n",
" \"vive_aun_hijo_anterior\",\n",
" \"orden_nacimiento\",\n",
" \"recibio_atencion_prenatal\",\n",
" \"trimestre_recibio_primera_consulta\",\n",
" \"total_consultas_recibidas\",\n",
" \"madre_sobrevivio_al_parto\",\n",
" \"escolaridad_madre\",\n",
" \"ocupacion_habitual_madre\",\n",
" \"trabaja_actualmente\",\n",
" \"fecha_nacimiento_nac_vivo\",\n",
" \"hora_nacimiento_nac_vivo\",\n",
" \"sexo_nac_vivo\",\n",
" \"semanas_gestacion_nac_vivo\",\n",
" \"talla_nac_vivo\",\n",
" \"peso_nac_vivo\",\n",
" \"valoracion_apgar_nac_vivo\",\n",
" \"valoracion_silverman_nac_vivo\",\n",
" \"producto_de_un_embarazo\",\n",
" \"codigo_anomalia\",\n",
" \"entidad_certifico\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 13,
"id": "937d0447-e6e6-477d-a43b-471302229fca",
"metadata": {},
"outputs": [],
"source": [
"df[columnas_finales].to_csv(\"2010-2016.csv\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "dfe420d2-b0f3-48ce-b30d-06f8d901dbd8",
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 12,
"id": "69940ce2-da7a-4752-9d0d-c6046eb03992",
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"20918084"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"5873377 + 15044707"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "c3db8468-11e0-4758-a9b1-f037e633b729",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}