pantallas-led/notebooks/get_realtime_data.ipynb

195 lines
7.5 KiB
Plaintext
Raw Permalink Normal View History

2023-10-23 20:43:15 -03:00
{
"cells": [
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Requirement already satisfied: protobuf in /usr/local/lib/python3.8/site-packages (4.24.3)\n"
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n",
"\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n",
"\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
"\n",
"WARNING: apt does not have a stable CLI interface. Use with caution in scripts.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"Hit:1 http://deb.debian.org/debian bookworm InRelease\n",
"Hit:2 http://deb.debian.org/debian bookworm-updates InRelease\n",
"Hit:3 http://deb.debian.org/debian-security bookworm-security InRelease\n",
"Reading package lists...\n",
"Building dependency tree...\n",
"Reading state information...\n",
"9 packages can be upgraded. Run 'apt list --upgradable' to see them.\n",
"Reading package lists..."
]
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"\n",
"WARNING: apt does not have a stable CLI interface. Use with caution in scripts.\n",
"\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
"\n",
"Building dependency tree...\n",
"Reading state information...\n",
"protobuf-compiler is already the newest version (3.21.12-3).\n",
"0 upgraded, 0 newly installed, 0 to remove and 9 not upgraded.\n"
]
}
],
"source": [
"import os\n",
"\n",
"commands = [\n",
" 'pip install protobuf',\n",
" # 'pip install gtfs_realtime_pb2',\n",
" 'apt update -y',\n",
" 'apt install protobuf-compiler -y',\n",
"]\n",
"\n",
"[os.system(cmd) for cmd in commands];"
]
},
{
"cell_type": "code",
"execution_count": 15,
"metadata": {},
"outputs": [],
"source": [
"from google.transit import gtfs_realtime_pb2\n",
"from google.protobuf.json_format import MessageToDict # Importar la función MessageToDict\n",
"import json\n",
"\n",
"# Ruta al archivo .proto\n",
"nombre_de_archivo_proto = '/app/data/input/proto_file/input.proto'\n",
"\n",
"# Cargar el archivo .proto\n",
"feed = gtfs_realtime_pb2.FeedMessage()\n",
"with open(nombre_de_archivo_proto, 'rb') as proto_file:\n",
" feed.ParseFromString(proto_file.read())\n",
"\n",
"# Listas para almacenar las entidades con y sin 'trip_update'\n",
"entities_with_trip_update = []\n",
"entities_without_trip_update = []\n",
"\n",
"# Dividir las entidades en las dos listas basadas en la presencia del campo 'trip_update'\n",
"for entity in feed.entity:\n",
" entity_dict = MessageToDict(entity) # Convertir el objeto FeedEntity a un diccionario\n",
" if entity.HasField('trip_update'):\n",
" entities_with_trip_update.append(entity_dict)\n",
" else:\n",
" entities_without_trip_update.append(entity_dict)\n",
"\n",
"# Guardar las entidades en archivos .json separados\n",
"with open(\"/app/data/output/json_file/with_trip_update.json\", \"w\") as file:\n",
" json.dump({\"entity\": entities_with_trip_update}, file)\n",
"\n",
"with open(\"/app/data/output/json_file/without_trip_update.json\", \"w\") as file:\n",
" json.dump({\"entity\": entities_without_trip_update}, file)\n"
]
},
{
"cell_type": "code",
"execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"import pandas as pd\n",
"import json\n",
"\n",
"def load_with_trip_update(json_path):\n",
" \"\"\"\n",
" Carga el archivo JSON con actualizaciones de viaje y lo convierte en un DataFrame.\n",
" \n",
" Parámetros:\n",
" - json_path (str): Ruta del archivo JSON a cargar.\n",
" \n",
" Retorna:\n",
" - DataFrame: Datos del archivo JSON en formato tabular.\n",
" \"\"\"\n",
" with open(json_path, \"r\") as file:\n",
" data = json.load(file)\n",
" \n",
" df = pd.json_normalize(data=data['entity'], \n",
" record_path=['tripUpdate', 'stopTimeUpdate'], \n",
" meta=[['tripUpdate', 'trip', 'tripId'],\n",
" ['tripUpdate', 'trip', 'startTime'],\n",
" ['tripUpdate', 'trip', 'startDate'],\n",
" ['tripUpdate', 'trip', 'routeId'],\n",
" ['tripUpdate', 'trip', 'directionId'],\n",
" ['tripUpdate', 'vehicle', 'licensePlate'],\n",
" ['tripUpdate', 'timestamp'],\n",
" 'id'],\n",
" errors='ignore')\n",
" return df\n",
"\n",
"def load_without_trip_update(json_path):\n",
" \"\"\"\n",
" Carga el archivo JSON sin actualizaciones de viaje y lo convierte en un DataFrame.\n",
" \n",
" Parámetros:\n",
" - json_path (str): Ruta del archivo JSON a cargar.\n",
" \n",
" Retorna:\n",
" - DataFrame: Datos del archivo JSON en formato tabular.\n",
" \"\"\"\n",
" with open(json_path, \"r\") as file:\n",
" data = json.load(file)\n",
" \n",
" df = pd.json_normalize(data=data['entity'], \n",
" meta=[['vehicle', 'trip', 'tripId'],\n",
" ['vehicle', 'trip', 'startTime'],\n",
" ['vehicle', 'trip', 'startDate'],\n",
" ['vehicle', 'trip', 'routeId'],\n",
" ['vehicle', 'trip', 'directionId'],\n",
" ['vehicle', 'position', 'latitude'],\n",
" ['vehicle', 'position', 'longitude'],\n",
" ['vehicle', 'position', 'bearing'],\n",
" ['vehicle', 'position', 'odometer'],\n",
" ['vehicle', 'position', 'speed'],\n",
" ['vehicle', 'timestamp'],\n",
" ['vehicle', 'vehicle', 'licensePlate'],\n",
" 'id'],\n",
" errors='ignore')\n",
" return df\n",
"\n",
"df_wi = load_with_trip_update('/app/data/output/json_file/with_trip_update.json')\n",
"df_wo = load_without_trip_update('/app/data/output/json_file/without_trip_update.json')"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3 (ipykernel)",
"language": "python",
"name": "python3"
},
"orig_nbformat": 4
},
"nbformat": 4,
"nbformat_minor": 2
}