|
| 1 | +# -*- coding: utf-8 -*- |
| 2 | +from bs4 import BeautifulSoup |
| 3 | +import pandas as pd |
| 4 | + |
| 5 | + |
| 6 | +class KMLConverter(object): |
| 7 | + def __init__(self, filepath): |
| 8 | + self.filepath = filepath |
| 9 | + self.postes = [] |
| 10 | + self.parse() |
| 11 | + |
| 12 | + def clean_key(self, key): |
| 13 | + return { |
| 14 | + u'DÉPARTEMENT': 'departement', |
| 15 | + 'NB DE SAUVETEURS SNSM': 'nb_sauveteurs', |
| 16 | + 'CP': 'code_postal', |
| 17 | + 'VILLE': 'ville', |
| 18 | + }[key] |
| 19 | + |
| 20 | + def parse_coordinates(self, value): |
| 21 | + if value is None: |
| 22 | + return None, None |
| 23 | + parts = map(float, value.text.split(',')) |
| 24 | + latitude, longitude = parts[1], parts[0] |
| 25 | + return latitude, longitude |
| 26 | + |
| 27 | + def parse(self): |
| 28 | + with open(self.filepath, 'r') as f: |
| 29 | + soup = BeautifulSoup(f, 'lxml') |
| 30 | + for placemark in soup.folder.find_all('placemark'): |
| 31 | + poste = {} |
| 32 | + poste['nom'] = placemark.find('name').text |
| 33 | + poste['latitude'], poste['longitude'] = self.parse_coordinates( |
| 34 | + placemark.find('coordinates') |
| 35 | + ) |
| 36 | + for data in placemark.find_all('data'): |
| 37 | + key, value = data['name'], data.text.strip() |
| 38 | + if key != 'gx_media_links': |
| 39 | + cleaned_key = self.clean_key(key) |
| 40 | + if cleaned_key == 'nb_sauveteurs': |
| 41 | + poste[cleaned_key] = int(float(value)) |
| 42 | + else: |
| 43 | + poste[cleaned_key] = value |
| 44 | + self.postes.append(poste) |
| 45 | + |
| 46 | + def to_csv(self, filepath): |
| 47 | + df = pd.DataFrame(self.postes) |
| 48 | + df.to_csv(filepath, encoding='utf-8', index=False) |
0 commit comments