Skip to content

Commit 4cc64b6

Browse files
authored
Merge pull request #4157 from hove-io/enrich_addresses/cache
[tyr worker] enrich-ntfs-with-addresses : use previous ntfs as cache
2 parents 7525558 + 14c0727 commit 4cc64b6

File tree

3 files changed

+31
-4
lines changed

3 files changed

+31
-4
lines changed

docker/debian8/Dockerfile-tyr-worker

+1-1
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,7 @@
11
FROM navitia/master
22

33
# Install binary enrich-ntfs-with-addresses from tartare-tools
4-
ENV TARTARE_TOOLS_VERSION="v0.36.4"
4+
ENV TARTARE_TOOLS_VERSION="v0.37.0"
55
ARG GITHUB_TOKEN
66
RUN git config --global url."https://x-access-token:${GITHUB_TOKEN}@github.com/hove-io/".insteadOf "ssh://[email protected]/hove-io/"
77
RUN git clone -b ${TARTARE_TOOLS_VERSION} --depth 1 https://x-access-token:${GITHUB_TOKEN}@github.com/hove-io/tartare-tools

source/tyr/tyr/binarisation.py

+19-3
Original file line numberDiff line numberDiff line change
@@ -1125,11 +1125,11 @@ def poi2mimir(self, instance_name, input, autocomplete_version, job_id=None, dat
11251125
@celery.task(bind=True)
11261126
def fusio2s3(self, instance_config, filename, job_id, dataset_uid):
11271127
"""Zip fusio file and launch fusio2s3"""
1128-
filename = enrich_ntfs_with_addresses(filename, job_id, dataset_uid)
1128+
filename = enrich_ntfs_with_addresses("fusio", instance_config, filename, job_id, dataset_uid)
11291129
_inner_2s3(self, "fusio", instance_config, filename, job_id, dataset_uid)
11301130

11311131

1132-
def enrich_ntfs_with_addresses(filename, job_id, dataset_uid):
1132+
def enrich_ntfs_with_addresses(dataset_type, instance_config, filename, job_id, dataset_uid):
11331133
"""launch enrich-ntfs-with-addresses"""
11341134

11351135
job = models.Job.query.get(job_id)
@@ -1141,10 +1141,23 @@ def enrich_ntfs_with_addresses(filename, job_id, dataset_uid):
11411141

11421142
file_dir = os.path.dirname(filename)
11431143
file_basename = os.path.basename(filename)
1144-
output_dir = file_dir + "/enriched_with_addresses"
1144+
output_dir = file_dir + "/for_loki"
11451145
os.makedirs(output_dir, 0o755)
11461146
output = output_dir + "/" + file_basename
11471147

1148+
previous_ntfs_path = output_dir + "/previous_ntfs.zip"
1149+
1150+
file_key = "{coverage}/{dataset_type}.zip".format(coverage=instance_config.name, dataset_type=dataset_type)
1151+
1152+
use_previous_ntfs = True
1153+
1154+
try:
1155+
minio_wrapper = MinioWrapper()
1156+
minio_wrapper.get_file(file_key, previous_ntfs_path)
1157+
except:
1158+
logger.warning("no previous ntfs found")
1159+
use_previous_ntfs = False
1160+
11481161
try:
11491162
params = [
11501163
"--input",
@@ -1155,6 +1168,9 @@ def enrich_ntfs_with_addresses(filename, job_id, dataset_uid):
11551168
current_app.config['BRAGI_URL'],
11561169
]
11571170

1171+
if use_previous_ntfs:
1172+
params.extend(["--previous-ntfs", previous_ntfs_path])
1173+
11581174
res = None
11591175
with collect_metric("enrich-ntfs-with-addresses", job, dataset_uid):
11601176
res = launch_exec("enrich-ntfs-with-addresses", params, logger)

source/tyr/tyr/minio.py

+11
Original file line numberDiff line numberDiff line change
@@ -78,6 +78,17 @@ def upload_file(
7878
)
7979
client.fput_object(self.bucket_name, file_key, filename, metadata=metadata, content_type=content_type)
8080

81+
def get_file(self, object_name, file_path):
82+
if self.use_iam_provider:
83+
self.retrieve_credentials()
84+
client = Minio(
85+
endpoint=self.endpoint,
86+
access_key=self.access_key,
87+
secret_key=self.secret_key,
88+
session_token=self.session_token,
89+
)
90+
return client.fget_object(self.bucket_name, object_name, file_path)
91+
8192
def retrieve_credentials(self):
8293
"""Retrieve credentials from ECS IAM Role"""
8394

0 commit comments

Comments
 (0)