195 lines
7.5 KiB
Plaintext
195 lines
7.5 KiB
Plaintext
![]() |
{
|
||
|
"cells": [
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"metadata": {},
|
||
|
"outputs": [
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Requirement already satisfied: protobuf in /usr/local/lib/python3.8/site-packages (4.24.3)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
|
||
|
"\u001b[0m\n",
|
||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
|
||
|
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
|
||
|
"\n",
|
||
|
"WARNING: apt does not have a stable CLI interface. Use with caution in scripts.\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"Hit:1 http://deb.debian.org/debian bookworm InRelease\n",
|
||
|
"Hit:2 http://deb.debian.org/debian bookworm-updates InRelease\n",
|
||
|
"Hit:3 http://deb.debian.org/debian-security bookworm-security InRelease\n",
|
||
|
"Reading package lists...\n",
|
||
|
"Building dependency tree...\n",
|
||
|
"Reading state information...\n",
|
||
|
"9 packages can be upgraded. Run 'apt list --upgradable' to see them.\n",
|
||
|
"Reading package lists..."
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stderr",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
"WARNING: apt does not have a stable CLI interface. Use with caution in scripts.\n",
|
||
|
"\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"name": "stdout",
|
||
|
"output_type": "stream",
|
||
|
"text": [
|
||
|
"\n",
|
||
|
"Building dependency tree...\n",
|
||
|
"Reading state information...\n",
|
||
|
"protobuf-compiler is already the newest version (3.21.12-3).\n",
|
||
|
"0 upgraded, 0 newly installed, 0 to remove and 9 not upgraded.\n"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"source": [
|
||
|
"import os\n",
|
||
|
"\n",
|
||
|
"commands = [\n",
|
||
|
" 'pip install protobuf',\n",
|
||
|
" # 'pip install gtfs_realtime_pb2',\n",
|
||
|
" 'apt update -y',\n",
|
||
|
" 'apt install protobuf-compiler -y',\n",
|
||
|
"]\n",
|
||
|
"\n",
|
||
|
"[os.system(cmd) for cmd in commands];"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 15,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"from google.transit import gtfs_realtime_pb2\n",
|
||
|
"from google.protobuf.json_format import MessageToDict # Importar la función MessageToDict\n",
|
||
|
"import json\n",
|
||
|
"\n",
|
||
|
"# Ruta al archivo .proto\n",
|
||
|
"nombre_de_archivo_proto = '/app/data/input/proto_file/input.proto'\n",
|
||
|
"\n",
|
||
|
"# Cargar el archivo .proto\n",
|
||
|
"feed = gtfs_realtime_pb2.FeedMessage()\n",
|
||
|
"with open(nombre_de_archivo_proto, 'rb') as proto_file:\n",
|
||
|
" feed.ParseFromString(proto_file.read())\n",
|
||
|
"\n",
|
||
|
"# Listas para almacenar las entidades con y sin 'trip_update'\n",
|
||
|
"entities_with_trip_update = []\n",
|
||
|
"entities_without_trip_update = []\n",
|
||
|
"\n",
|
||
|
"# Dividir las entidades en las dos listas basadas en la presencia del campo 'trip_update'\n",
|
||
|
"for entity in feed.entity:\n",
|
||
|
" entity_dict = MessageToDict(entity) # Convertir el objeto FeedEntity a un diccionario\n",
|
||
|
" if entity.HasField('trip_update'):\n",
|
||
|
" entities_with_trip_update.append(entity_dict)\n",
|
||
|
" else:\n",
|
||
|
" entities_without_trip_update.append(entity_dict)\n",
|
||
|
"\n",
|
||
|
"# Guardar las entidades en archivos .json separados\n",
|
||
|
"with open(\"/app/data/output/json_file/with_trip_update.json\", \"w\") as file:\n",
|
||
|
" json.dump({\"entity\": entities_with_trip_update}, file)\n",
|
||
|
"\n",
|
||
|
"with open(\"/app/data/output/json_file/without_trip_update.json\", \"w\") as file:\n",
|
||
|
" json.dump({\"entity\": entities_without_trip_update}, file)\n"
|
||
|
]
|
||
|
},
|
||
|
{
|
||
|
"cell_type": "code",
|
||
|
"execution_count": 20,
|
||
|
"metadata": {},
|
||
|
"outputs": [],
|
||
|
"source": [
|
||
|
"import pandas as pd\n",
|
||
|
"import json\n",
|
||
|
"\n",
|
||
|
"def load_with_trip_update(json_path):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Carga el archivo JSON con actualizaciones de viaje y lo convierte en un DataFrame.\n",
|
||
|
" \n",
|
||
|
" Parámetros:\n",
|
||
|
" - json_path (str): Ruta del archivo JSON a cargar.\n",
|
||
|
" \n",
|
||
|
" Retorna:\n",
|
||
|
" - DataFrame: Datos del archivo JSON en formato tabular.\n",
|
||
|
" \"\"\"\n",
|
||
|
" with open(json_path, \"r\") as file:\n",
|
||
|
" data = json.load(file)\n",
|
||
|
" \n",
|
||
|
" df = pd.json_normalize(data=data['entity'], \n",
|
||
|
" record_path=['tripUpdate', 'stopTimeUpdate'], \n",
|
||
|
" meta=[['tripUpdate', 'trip', 'tripId'],\n",
|
||
|
" ['tripUpdate', 'trip', 'startTime'],\n",
|
||
|
" ['tripUpdate', 'trip', 'startDate'],\n",
|
||
|
" ['tripUpdate', 'trip', 'routeId'],\n",
|
||
|
" ['tripUpdate', 'trip', 'directionId'],\n",
|
||
|
" ['tripUpdate', 'vehicle', 'licensePlate'],\n",
|
||
|
" ['tripUpdate', 'timestamp'],\n",
|
||
|
" 'id'],\n",
|
||
|
" errors='ignore')\n",
|
||
|
" return df\n",
|
||
|
"\n",
|
||
|
"def load_without_trip_update(json_path):\n",
|
||
|
" \"\"\"\n",
|
||
|
" Carga el archivo JSON sin actualizaciones de viaje y lo convierte en un DataFrame.\n",
|
||
|
" \n",
|
||
|
" Parámetros:\n",
|
||
|
" - json_path (str): Ruta del archivo JSON a cargar.\n",
|
||
|
" \n",
|
||
|
" Retorna:\n",
|
||
|
" - DataFrame: Datos del archivo JSON en formato tabular.\n",
|
||
|
" \"\"\"\n",
|
||
|
" with open(json_path, \"r\") as file:\n",
|
||
|
" data = json.load(file)\n",
|
||
|
" \n",
|
||
|
" df = pd.json_normalize(data=data['entity'], \n",
|
||
|
" meta=[['vehicle', 'trip', 'tripId'],\n",
|
||
|
" ['vehicle', 'trip', 'startTime'],\n",
|
||
|
" ['vehicle', 'trip', 'startDate'],\n",
|
||
|
" ['vehicle', 'trip', 'routeId'],\n",
|
||
|
" ['vehicle', 'trip', 'directionId'],\n",
|
||
|
" ['vehicle', 'position', 'latitude'],\n",
|
||
|
" ['vehicle', 'position', 'longitude'],\n",
|
||
|
" ['vehicle', 'position', 'bearing'],\n",
|
||
|
" ['vehicle', 'position', 'odometer'],\n",
|
||
|
" ['vehicle', 'position', 'speed'],\n",
|
||
|
" ['vehicle', 'timestamp'],\n",
|
||
|
" ['vehicle', 'vehicle', 'licensePlate'],\n",
|
||
|
" 'id'],\n",
|
||
|
" errors='ignore')\n",
|
||
|
" return df\n",
|
||
|
"\n",
|
||
|
"df_wi = load_with_trip_update('/app/data/output/json_file/with_trip_update.json')\n",
|
||
|
"df_wo = load_without_trip_update('/app/data/output/json_file/without_trip_update.json')"
|
||
|
]
|
||
|
}
|
||
|
],
|
||
|
"metadata": {
|
||
|
"kernelspec": {
|
||
|
"display_name": "Python 3 (ipykernel)",
|
||
|
"language": "python",
|
||
|
"name": "python3"
|
||
|
},
|
||
|
"orig_nbformat": 4
|
||
|
},
|
||
|
"nbformat": 4,
|
||
|
"nbformat_minor": 2
|
||
|
}
|