{ "cells": [ { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Requirement already satisfied: protobuf in /usr/local/lib/python3.8/site-packages (4.24.3)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ "\u001b[33mWARNING: Running pip as the 'root' user can result in broken permissions and conflicting behaviour with the system package manager. It is recommended to use a virtual environment instead: https://pip.pypa.io/warnings/venv\u001b[0m\u001b[33m\n", "\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m23.0.1\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m23.2.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "\n", "WARNING: apt does not have a stable CLI interface. Use with caution in scripts.\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Hit:1 http://deb.debian.org/debian bookworm InRelease\n", "Hit:2 http://deb.debian.org/debian bookworm-updates InRelease\n", "Hit:3 http://deb.debian.org/debian-security bookworm-security InRelease\n", "Reading package lists...\n", "Building dependency tree...\n", "Reading state information...\n", "9 packages can be upgraded. Run 'apt list --upgradable' to see them.\n", "Reading package lists..." ] }, { "name": "stderr", "output_type": "stream", "text": [ "\n", "WARNING: apt does not have a stable CLI interface. Use with caution in scripts.\n", "\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "\n", "Building dependency tree...\n", "Reading state information...\n", "protobuf-compiler is already the newest version (3.21.12-3).\n", "0 upgraded, 0 newly installed, 0 to remove and 9 not upgraded.\n" ] } ], "source": [ "import os\n", "\n", "commands = [\n", " 'pip install protobuf',\n", " # 'pip install gtfs_realtime_pb2',\n", " 'apt update -y',\n", " 'apt install protobuf-compiler -y',\n", "]\n", "\n", "[os.system(cmd) for cmd in commands];" ] }, { "cell_type": "code", "execution_count": 15, "metadata": {}, "outputs": [], "source": [ "from google.transit import gtfs_realtime_pb2\n", "from google.protobuf.json_format import MessageToDict # Importar la función MessageToDict\n", "import json\n", "\n", "# Ruta al archivo .proto\n", "nombre_de_archivo_proto = '/app/data/input/proto_file/input.proto'\n", "\n", "# Cargar el archivo .proto\n", "feed = gtfs_realtime_pb2.FeedMessage()\n", "with open(nombre_de_archivo_proto, 'rb') as proto_file:\n", " feed.ParseFromString(proto_file.read())\n", "\n", "# Listas para almacenar las entidades con y sin 'trip_update'\n", "entities_with_trip_update = []\n", "entities_without_trip_update = []\n", "\n", "# Dividir las entidades en las dos listas basadas en la presencia del campo 'trip_update'\n", "for entity in feed.entity:\n", " entity_dict = MessageToDict(entity) # Convertir el objeto FeedEntity a un diccionario\n", " if entity.HasField('trip_update'):\n", " entities_with_trip_update.append(entity_dict)\n", " else:\n", " entities_without_trip_update.append(entity_dict)\n", "\n", "# Guardar las entidades en archivos .json separados\n", "with open(\"/app/data/output/json_file/with_trip_update.json\", \"w\") as file:\n", " json.dump({\"entity\": entities_with_trip_update}, file)\n", "\n", "with open(\"/app/data/output/json_file/without_trip_update.json\", \"w\") as file:\n", " json.dump({\"entity\": entities_without_trip_update}, file)\n" ] }, { "cell_type": "code", "execution_count": 20, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import json\n", "\n", "def load_with_trip_update(json_path):\n", " \"\"\"\n", " Carga el archivo JSON con actualizaciones de viaje y lo convierte en un DataFrame.\n", " \n", " Parámetros:\n", " - json_path (str): Ruta del archivo JSON a cargar.\n", " \n", " Retorna:\n", " - DataFrame: Datos del archivo JSON en formato tabular.\n", " \"\"\"\n", " with open(json_path, \"r\") as file:\n", " data = json.load(file)\n", " \n", " df = pd.json_normalize(data=data['entity'], \n", " record_path=['tripUpdate', 'stopTimeUpdate'], \n", " meta=[['tripUpdate', 'trip', 'tripId'],\n", " ['tripUpdate', 'trip', 'startTime'],\n", " ['tripUpdate', 'trip', 'startDate'],\n", " ['tripUpdate', 'trip', 'routeId'],\n", " ['tripUpdate', 'trip', 'directionId'],\n", " ['tripUpdate', 'vehicle', 'licensePlate'],\n", " ['tripUpdate', 'timestamp'],\n", " 'id'],\n", " errors='ignore')\n", " return df\n", "\n", "def load_without_trip_update(json_path):\n", " \"\"\"\n", " Carga el archivo JSON sin actualizaciones de viaje y lo convierte en un DataFrame.\n", " \n", " Parámetros:\n", " - json_path (str): Ruta del archivo JSON a cargar.\n", " \n", " Retorna:\n", " - DataFrame: Datos del archivo JSON en formato tabular.\n", " \"\"\"\n", " with open(json_path, \"r\") as file:\n", " data = json.load(file)\n", " \n", " df = pd.json_normalize(data=data['entity'], \n", " meta=[['vehicle', 'trip', 'tripId'],\n", " ['vehicle', 'trip', 'startTime'],\n", " ['vehicle', 'trip', 'startDate'],\n", " ['vehicle', 'trip', 'routeId'],\n", " ['vehicle', 'trip', 'directionId'],\n", " ['vehicle', 'position', 'latitude'],\n", " ['vehicle', 'position', 'longitude'],\n", " ['vehicle', 'position', 'bearing'],\n", " ['vehicle', 'position', 'odometer'],\n", " ['vehicle', 'position', 'speed'],\n", " ['vehicle', 'timestamp'],\n", " ['vehicle', 'vehicle', 'licensePlate'],\n", " 'id'],\n", " errors='ignore')\n", " return df\n", "\n", "df_wi = load_with_trip_update('/app/data/output/json_file/with_trip_update.json')\n", "df_wo = load_without_trip_update('/app/data/output/json_file/without_trip_update.json')" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3 (ipykernel)", "language": "python", "name": "python3" }, "orig_nbformat": 4 }, "nbformat": 4, "nbformat_minor": 2 }