Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fix excluded_zones generator #4223

Merged
merged 3 commits into from
Feb 22, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 4 additions & 0 deletions source/navitiacommon/navitiacommon/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -201,3 +201,7 @@ def files_exists_in_zipfile(poi_zipfile, set_files):
z = zipfile.ZipFile(poi_zipfile)
files_from_zip = {member.filename for member in z.infolist()}
return not bool(len(set_files.difference(files_from_zip)))


def is_empty_directory(directory, ext="json"):
return len(glob.glob("{}/*.{}".format(directory, ext))) == 0 if os.path.isdir(directory) else True
40 changes: 23 additions & 17 deletions source/tyr/tyr/binarisation.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
import navitiacommon.task_pb2
from tyr import celery, redis
from tyr.rabbit_mq_handler import RabbitMqHandler
from navitiacommon import models
from navitiacommon import models, utils
from tyr.helper import get_instance_logger, get_named_arg, get_autocomplete_instance_logger, get_task_logger
from contextlib import contextmanager
import glob
Expand Down Expand Up @@ -1249,25 +1249,31 @@ def poi2asgard(self, instance_config, filename, job_id, dataset_uid):
shutil.rmtree(excluded_zone_dir)

os.mkdir(excluded_zone_dir)
poi_to_excluded_zones(filename, excluded_zone_dir, instance.name)

try:
with collect_metric("poi2Asgard", job, dataset_uid):
asgard_bucket = current_app.config.get('MINIO_ASGARD_BUCKET_NAME', None)
if not asgard_bucket:
dataset.state = "failed"
return

bash_command = (
"env REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt "
"aws s3 sync ./{excluded_zone_dir} s3://{asgard_bucket}/excluded_zones".format(
excluded_zone_dir=excluded_zone_dir, asgard_bucket=asgard_bucket
poi_to_excluded_zones(filename, excluded_zone_dir, instance.name)
if utils.is_empty_directory(excluded_zone_dir):
logger.warning(
"opg_excluded_zones: Impossible to push excluded zones to S3 for instance {}, empty directory".format(
instance.name
)
)
process = subprocess.Popen(bash_command.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
if error:
raise Exception("Error occurred when putting excluded zones to asgard: {}".format(error))
else:
with collect_metric("poi2Asgard", job, dataset_uid):
asgard_bucket = current_app.config.get('MINIO_ASGARD_BUCKET_NAME', None)
if not asgard_bucket:
dataset.state = "failed"
return

bash_command = (
"env REQUESTS_CA_BUNDLE=/etc/ssl/certs/ca-certificates.crt "
"aws s3 sync ./{excluded_zone_dir} s3://{asgard_bucket}/excluded_zones".format(
excluded_zone_dir=excluded_zone_dir, asgard_bucket=asgard_bucket
)
)
process = subprocess.Popen(bash_command.split(), stdout=subprocess.PIPE)
output, error = process.communicate()
if error:
raise Exception("Error occurred when putting excluded zones to asgard: {}".format(error))
except:
logger.exception("")
job.state = "failed"
Expand Down
80 changes: 52 additions & 28 deletions source/tyr/tyr/poi_to_excluded_zones.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,57 +2,81 @@
import csv
import json
import logging
import zipfile


def poi_to_excluded_zones(poi_file, output_dir, instance_name):
logger = logging.getLogger(__name__)
def parse_file(filename):
try:
with open(filename) as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar='"')
for row in reader:
yield row
except Exception as e:
logging.getLogger(__name__).error(
"opg_excluded_zones: Unable to read file {}, error ({})".format(filename, str(e))
)
raise


def get_excluded_zones(path):
result = {}
for row in parse_file(path + "/poi_properties.txt"):
if row[1].lower() != "excluded_zones":
continue
try:
result[row[0]] = json.loads(row[2])
except Exception:
logging.getLogger(__name__).error(
"opg_excluded_zones: Ignored line, Invalid json ({})".format(row[2])
)
return result


def get_geometries_ids(path, excluded_zones):
result = {}
for row in parse_file(path + "/poi.txt"):
if row[0] not in excluded_zones:
continue
result[row[0]] = row[7]
return result


def get_geometries_shapes(path):
result = {}
for row in parse_file(path + "/geometries.txt"):
result[row[0]] = row[1]
return result


def poi_to_excluded_zones(poi_file, output_dir, instance_name):
tmp_path = "tmp/poi_{}".format(instance_name)
import zipfile

with zipfile.ZipFile(poi_file, 'r') as zip_ref:
zip_ref.extractall(tmp_path)

excluded_zones = {}
excluded_geometries_ids = {}

# get excluded zones
with open(tmp_path + "/poi_properties.txt") as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar='"')
for row in reader:
if row[1].lower() != "excluded_zones":
continue
excluded_zones[row[0]] = json.loads(row[2])
excluded_zones = get_excluded_zones(tmp_path)

# find geometry id
with open(tmp_path + "/poi.txt") as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar='"')
for row in reader:
if row[0] not in excluded_zones:
continue
excluded_geometries_ids[row[0]] = row[7]
excluded_geometries_ids = get_geometries_ids(tmp_path, excluded_zones)

if excluded_geometries_ids.keys() != excluded_zones.keys():
logger.warning("not all excluded zone's pois are found in poi.txt")
logger.warning("excluded_geometries_ids: {}".format(excluded_geometries_ids.keys()))
logger.warning("excluded_zones: {}".format(excluded_zones.keys()))
logging.getLogger(__name__).warning("not all excluded zone's pois are found in poi.txt")
logging.getLogger(__name__).warning("excluded_geometries_ids: {}".format(excluded_geometries_ids.keys()))
logging.getLogger(__name__).warning("excluded_zones: {}".format(excluded_zones.keys()))

# read geometries
geometries_shapes = {}
with open(tmp_path + "/geometries.txt") as csvfile:
reader = csv.reader(csvfile, delimiter=';', quotechar='"')
for row in reader:
geometries_shapes[row[0]] = row[1]
geometries_shapes = get_geometries_shapes(tmp_path)

for poi_id, zones in excluded_zones.items():
geometry_id = excluded_geometries_ids.get(poi_id)
if not geometry_id:
logger.error("{} could not be found in poi.txt".format(row[0]))
logging.getLogger(__name__).error("{} could not be found in poi.txt".format(poi_id))
continue

shape = geometries_shapes.get(geometry_id)
if not shape:
logger.error("{} could not be found in geometries.txt".format(geometry_id))
logging.getLogger(__name__).error("{} could not be found in geometries.txt".format(geometry_id))
continue

for i, zone in enumerate(zones):
Expand Down
Loading