unir-metodologia/procesamiento-unificador.ipynb

155 lines
4.6 KiB
Plaintext

{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"id": "47114285-22af-469e-81f3-560261036208",
"metadata": {},
"outputs": [],
"source": [
"from functools import cache\n",
"import pandas as pd\n",
"\n",
"pd.set_option(\"display.max_columns\", None)"
]
},
{
"cell_type": "code",
"execution_count": 3,
"id": "73c5d186-84f4-4a8e-a572-566ab4936bd7",
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"/var/folders/05/y38rqjl55hjb_hbnypxzgrsw0000gn/T/ipykernel_93262/3018518299.py:3: DtypeWarning: Columns (20) have mixed types. Specify dtype option on import or set low_memory=False.\n",
" pd.read_csv(\"2010-2016.csv\"),\n"
]
}
],
"source": [
"df = pd.concat(\n",
" [\n",
" pd.read_csv(\"2010-2016.csv\"),\n",
" pd.read_csv(\"2017-2019.csv\"),\n",
" ]\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 4,
"id": "194a6cbd-0bd7-4c24-8eb4-f895457ecfed",
"metadata": {},
"outputs": [],
"source": [
"df.to_csv(\"2010-2019.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 5,
"id": "3cfe3c4f-91ea-41e5-a0b9-557183080871",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"<class 'pandas.core.frame.DataFrame'>\n",
"Index: 20918084 entries, 0 to 5873376\n",
"Data columns (total 32 columns):\n",
" # Column Dtype \n",
"--- ------ ----- \n",
" 0 Unnamed: 0 int64 \n",
" 1 edo_captura object\n",
" 2 edo_nac_madre object\n",
" 3 fecha_nac_madre object\n",
" 4 edad_madre int64 \n",
" 5 estado_conyugal object\n",
" 6 entidad_residencia_madre object\n",
" 7 numero_embarazos int64 \n",
" 8 hijos_nacidos_muertos int64 \n",
" 9 hijos_nacidos_vivos int64 \n",
" 10 hijos_sobrevivientes int64 \n",
" 11 el_hijo_anterior_nacio object\n",
" 12 vive_aun_hijo_anterior object\n",
" 13 orden_nacimiento int64 \n",
" 14 recibio_atencion_prenatal object\n",
" 15 trimestre_recibio_primera_consulta object\n",
" 16 total_consultas_recibidas int64 \n",
" 17 madre_sobrevivio_al_parto object\n",
" 18 escolaridad_madre object\n",
" 19 ocupacion_habitual_madre object\n",
" 20 trabaja_actualmente object\n",
" 21 fecha_nacimiento_nac_vivo object\n",
" 22 hora_nacimiento_nac_vivo object\n",
" 23 sexo_nac_vivo object\n",
" 24 semanas_gestacion_nac_vivo int64 \n",
" 25 talla_nac_vivo int64 \n",
" 26 peso_nac_vivo int64 \n",
" 27 valoracion_apgar_nac_vivo int64 \n",
" 28 valoracion_silverman_nac_vivo int64 \n",
" 29 producto_de_un_embarazo object\n",
" 30 codigo_anomalia object\n",
" 31 entidad_certifico object\n",
"dtypes: int64(13), object(19)\n",
"memory usage: 5.1+ GB\n"
]
}
],
"source": [
"df.info()"
]
},
{
"cell_type": "code",
"execution_count": 6,
"id": "f302574e-65d3-4b4e-9fad-c4a93b1ebba7",
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"16777232 80891859 -rw-r--r-- 1 miguel.salgado staff 0 4854454496 \"Feb 25 01:57:13 2024\" \"Feb 25 01:59:10 2024\" \"Feb 25 01:59:10 2024\" \"Feb 25 01:57:10 2024\" 4096 9504688 0 2010-2019.csv\n"
]
}
],
"source": [
"! stat 2010-2019.csv"
]
},
{
"cell_type": "code",
"execution_count": null,
"id": "6f6673b8-7e8d-42fc-af10-4302cee2b37d",
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.6"
}
},
"nbformat": 4,
"nbformat_minor": 5
}