se procesa archivo zip

francisco/photos
Francisco Sandoval 2024-01-20 14:18:32 -03:00
parent d35e135565
commit 06483aa811
2 changed files with 242 additions and 21 deletions

View File

@ -0,0 +1,203 @@
ALTER TABLE rol_linea drop CONSTRAINT IF EXISTS rol_linea_id_linea_fkey;
-----
ALTER TABLE linea_paradero drop CONSTRAINT IF EXISTS linea_paradero_id_linea_fkey;
-----
update operador
set vigente =false
where id_operador not in (select agency_id from z_agency za);
-----
update operador
set vigente =true
where id_operador in (select agency_id from z_agency za);
-----
insert into operador
select agency_id, null as id_region , true as vigente,
agency_name, agency_url , agency_timezone , agency_lang , agency_phone , agency_fare_url
from z_agency za
where agency_id not in (select id_operador from operador za);
-----
delete from gtfs_calendar ;
-----
insert into gtfs_calendar
select service_id, monday::bool , tuesday::bool , wednesday::bool , thursday::bool ,friday::bool ,saturday::bool ,sunday::bool
from z_calendar zc;
-----
update paradero p
set vigente = false
where id_paradero::text not in (select stop_id from z_stops );
-----
update paradero
set vigente = true,
stop_name= (select stop_name from z_stops where stop_id=paradero.id_paradero::text limit 1 ),
stop_desc=(select stop_desc from z_stops where stop_id=paradero.id_paradero::text limit 1 ),
stop_lat =(select stop_lat from z_stops where stop_id=paradero.id_paradero::text limit 1)::float(8),
stop_lon =(select stop_lon from z_stops where stop_id=paradero.id_paradero::text limit 1)::float(8),
stop_code=(select stop_code from z_stops where stop_id=paradero.id_paradero::text limit 1)
where id_paradero::text in (select stop_id from z_stops );
-----
insert into paradero
select
stop_id, null as id_comuna, null as id_tipo_paradero, true,
stop_code , stop_name , stop_desc , stop_lat::float(8) , stop_lon::float(8) , zone_id ,
stop_url , location_type ,
parent_station ,null as stop_time_zone , wheelchair_boarding::numeric
from z_stops zs
where stop_id not in (select id_paradero::text from paradero);
-----
delete from linea_paradero;
-----
delete from linea ;
-----
insert into linea
select distinct
trim(zr.route_id)||'-'||trim(zt.direction_id::varchar) as route_id,
zr.agency_id ,
substring(zt.trip_headsign FROM '(\S+) -') as route_short_name ,
zr.route_desc ,
zr.route_type::numeric ,
zr.route_url,
zr.route_color ,
zr.route_text_color ,
zt.trip_headsign as route_long_name , true
from z_routes zr
inner join z_trips zt
on zr.route_id =zt.route_id
and replace (zt.trip_headsign,' ','') =replace ((SPLIT_PART(route_long_name, '-', 1)||'-'||SPLIT_PART(route_long_name, '-', 2)),' ','')
where route_long_name not like '%'||route_short_name ||'%'
union
select distinct
trim(zr.route_id)||'-'||trim(zt.direction_id::varchar) as route_id,
zr.agency_id ,
substring(zt.trip_headsign FROM '(\S+) -') as route_short_name ,
zr.route_desc ,
zr.route_type::numeric ,
zr.route_url,
zr.route_color ,
zr.route_text_color ,
zt.trip_headsign as route_long_name , true
from z_routes zr
inner join z_trips zt
on zr.route_id =zt.route_id
and replace (zt.trip_headsign,' ','') =replace ((SPLIT_PART(route_long_name, '-', 3)||'-'||SPLIT_PART(route_long_name, '-', 4)),' ','')
where route_long_name not like '%'||route_short_name ||'%'
union
select distinct
trim(zr.route_id)||'-'||trim(zt.direction_id::varchar) as route_id,
zr.agency_id ,
route_short_name ,
zr.route_desc ,
zr.route_type::numeric ,
zr.route_url,
zr.route_color ,
zr.route_text_color ,
zt.trip_headsign as route_long_name , true
from z_routes zr
inner join z_trips zt
on zr.route_id =zt.route_id
and
( position('-' in zt.trip_headsign) = 0
or
route_long_name like '%'||route_short_name ||'%'
)
order by 1;
-----
delete from gtfs_shape;
-----
insert into gtfs_shape
select shape_id::numeric,shape_pt_lat::float8,shape_pt_lon::float8,
shape_pt_sequence::numeric,shape_dist_traveled::float8
from z_shapes zs;
-----
delete from gtfs_stop_times;
-----
delete from gtfs_frequencie;
-----
delete from gtfs_trips;
-----
insert into gtfs_trips
select
trip_id,
trim(route_id)||'-'||trim(direction_id::varchar) as route_id,
shape_id::numeric ,null as regreso, trip_headsign,trip_short_name,direction_id::numeric,service_id::varchar,block_id
from z_trips zt ;
-----
insert into gtfs_stop_times
select stop_id,trip_id,arrival_time,stop_sequence::numeric,stop_headsign,departure_time,drop_off_type::numeric,null as shape_dist_traveled ,
timepoint::numeric , pickup_type::numeric
from z_stop_times zst ;
-----
insert into linea_paradero (id_linea , id_paradero )
SELECT DISTINCT
l.id_linea , p.id_paradero
FROM linea l
JOIN gtfs_trips t ON l.id_linea =t.id_linea
JOIN gtfs_stop_times st ON t.id_trip = st.id_trip
JOIN paradero p ON st.id_paradero = p.id_paradero;
-----
UPDATE paradero
SET stop_name = REGEXP_REPLACE(
REGEXP_REPLACE(
stop_name,
'\yentre\y',
'-entre',
'gi'
),
'\yesq\y',
'-esq',
'gi'
);
-----
ALTER TABLE rol_linea ADD CONSTRAINT rol_linea_id_linea_fkey FOREIGN KEY (id_linea) REFERENCES linea(id_linea);
-----
ALTER TABLE linea_paradero ADD CONSTRAINT linea_paradero_id_linea_fkey FOREIGN KEY (id_linea) REFERENCES linea(id_linea);
-----

View File

@ -37,7 +37,6 @@ def upload_zip(request):
filezip = request.FILES['filezip']
if filezip.closed == False and hasattr(filezip,'temporary_file_path'):
# Notificar a Redis que proceso ha iniciado
r.set('filezip', filezip.name)
@ -83,25 +82,31 @@ def procesa_zip(file_upload):
"trips.txt"
]
# Crear las tablas en el esquema y cargar los datos
cur = connection.cursor()
for file in gtfs_files:
filepath = os.path.join(carpeta_destino, file)
if os.path.exists(filepath):
create_and_load_table(filepath, file, cur)
os.remove(filepath)
try:
# Crear las tablas en el esquema y cargar los datos
cur = connection.cursor()
for file in gtfs_files:
filepath = os.path.join(carpeta_destino, file)
if os.path.exists(filepath):
create_and_load_table(filepath, file, cur)
os.remove(filepath)
procesa_tablas_z(cur)
cur.close()
connection.commit()
cur.close()
connection.commit()
print('==============', flush=True)
print(f'fin proceso archivo: {archivo_zip}', flush=True)
except Exception as e:
print(f'ERROR: {e}', flush=True)
finally:
# notificar a Redis que proceso ha finalizado
db_host = os.getenv('DB_REDIS_HOST')
db_port = os.getenv('DB_REDIS_PORT')
r = redis.Redis(host=db_host, port=db_port, decode_responses=True)
r.delete('filezip')
# notificar a Redis que proceso ha finalizado
db_host = os.getenv('DB_REDIS_HOST')
db_port = os.getenv('DB_REDIS_PORT')
r = redis.Redis(host=db_host, port=db_port, decode_responses=True)
r.delete('filezip')
print('==============', flush=True)
print(f'fin proceso archivo: {archivo_zip}', flush=True)
@ -115,21 +120,34 @@ def create_and_load_table(filepath, file_name, cursor):
column_definitions = ',\n'.join([f"{column_name} TEXT" for column_name in columns])
create_table = f"CREATE TABLE IF NOT EXISTS {table_name} (\n{column_definitions}\n);"
cursor.execute(create_table)
print(f'SQL> {create_table}', flush=True)
cursor.execute(create_table)
print('', flush=True)
truncate_table = f"truncate table {table_name}; commit;"
cursor.execute(truncate_table)
print(f'SQL> {truncate_table}', flush=True)
cursor.execute(truncate_table)
print('', flush=True)
file.seek(0)
next(reader)
insert_query = f"COPY {table_name} ({', '.join(columns)}) FROM STDIN WITH CSV"
cursor.copy_expert(sql=insert_query, file=file)
print(f'SQL> {insert_query}', flush=True)
cursor.copy_expert(sql=insert_query, file=file)
print('', flush=True)
def procesa_tablas_z(cursor):
current_folder = os.path.dirname(os.path.abspath(__file__))
with open(os.path.join(current_folder, 'actualiza_GTFS.sql'),'r') as file:
content = ''.join(file.readlines())
arr_sql = content.split('-----')
for sql in arr_sql:
sql = sql.strip()
if sql > ' ':
print(f'SQL> {sql}', flush=True)
cursor.execute(sql)
print('', flush=True)