@@ -6,6 +6,7 @@ const util = require('util');
6
6
const blacklist = require ( 'pelias-blacklist-stream/loader' ) ( ) ;
7
7
const analysis = require ( '../lib/analysis' ) ;
8
8
const language = dir ( '../config/language' ) ;
9
+ const LOW_POPULATION_THRESHOLD = 2000 ;
9
10
10
11
// list of languages / tags we favour in cases of deduplication
11
12
const LANG_PREFS = [ 'eng' , 'und' ] ;
@@ -97,12 +98,25 @@ function insertWofRecord( wof, next ){
97
98
}
98
99
}
99
100
100
- // add 'name:*'
101
- for ( var attr in wof ) {
102
- // https://github.com/whosonfirst/whosonfirst-names
103
- // names: preferred|colloquial|variant|unknown
104
- var match = attr . match ( / ^ n a m e : ( [ a - z ] { 3 } ) _ x _ ( p r e f e r r e d | c o l l o q u i a l | v a r i a n t ) $ / ) ;
105
- if ( match ) {
101
+ // note: skip all `name:*` fields when we suspect that they were sourced from
102
+ // machine transliteration via WikiData.
103
+ // see: https://github.com/whosonfirst-data/whosonfirst-data/issues/799
104
+ const hasDeadOrObscureLanguages = _ . has ( wof , 'name:vol_x_preferred' ) ;
105
+ const isLowOrUnknownPopulation = _ . get ( doc , 'population' , 0 ) < LOW_POPULATION_THRESHOLD ;
106
+ const isMegaCity = _ . get ( doc , 'wof:megacity' , 0 ) === 1 ;
107
+ const isCapitalCity = ! _ . isEmpty ( _ . get ( doc , 'wof:capital_of' ) ) ;
108
+ const isLikelyTransliterated = (
109
+ hasDeadOrObscureLanguages && isLowOrUnknownPopulation && ! isMegaCity && ! isCapitalCity
110
+ ) ;
111
+ if ( ! isLikelyTransliterated ) {
112
+
113
+ // add 'name:*' fields
114
+ for ( var attr in wof ) {
115
+ // https://github.com/whosonfirst/whosonfirst-names
116
+ // names: preferred|colloquial|variant|unknown
117
+ const match = attr . match ( / ^ n a m e : ( [ a - z ] { 3 } ) _ x _ ( p r e f e r r e d | c o l l o q u i a l | v a r i a n t ) $ / ) ;
118
+ if ( ! match ) { continue ; }
119
+
106
120
// Fix for https://github.com/pelias/placeholder/pull/126
107
121
// Transform iso codes 639-2/B to 639-2/T
108
122
const lang = language . alternatives [ match [ 1 ] ] || match [ 1 ] ;
@@ -127,6 +141,7 @@ function insertWofRecord( wof, next ){
127
141
doc . names [ lang ] = wof [ attr ] ;
128
142
}
129
143
}
144
+
130
145
}
131
146
}
132
147
0 commit comments