%%javascript
$.getScript('https://kmahelona.github.io/ipython_notebook_goodies/ipython_notebook_toc.js')
$('head').append('<style>table {font-family: "Doulos SIL"; font-size=5em;}')
from IPython.display import display
from IPython.display import HTML
from IPython.display import Image
import csv
import collections
import matplotlib
import random
def get_inventories(aggregated, consonants):
with open(aggregated) as f:
aggregated_data = [i for i in csv.DictReader(f, delimiter='\t')]
with open(consonants) as f:
segments = [segment for segment in csv.DictReader(f, delimiter='\t')]
inventories = []
for inventory in aggregated_data:
inventory_consonants = int(inventory['Consonants'])
if inventory_consonants <= 14 and 'dialect' not in inventory['LanguageName']:
# Get rid of ugly language labels
lang_name = inventory['LanguageName']
if 'Maxakali' in lang_name:
continue
if lang_name.isupper() or lang_name.islower():
inventory['LanguageName'] = inventory['LanguageName'].title()
# Map the genus to actual language family name
try:
inventory['LanguageFamilyRoot'] = genus_to_family[inventory['LanguageFamilyGenus']]
except KeyError:
# If the language is unclassified, we'll have to skip it 😢
continue
inventory['Segments'] = []
for segment in segments:
if inventory['InventoryID'] == segment['InventoryID']:
inventory['Segments'].append(segment['Phoneme'])
for key in ['Phonemes', 'Tones', 'Population', 'Trump', 'Country', 'Vowels']:
del inventory[key]
if inventory_consonants != len(inventory['Segments']):
print('Invalid inv: {}'.format(inventory))
else:
inventories.append(inventory)
else:
return inventories
def get_features_dict(features_file, phonemes_file):
with open(phonemes_file) as f:
phonemes = [i for i in csv.DictReader(f, delimiter='\t')]
phoneme_classes = {i['Phoneme']: i['CombinedClass'] for i in phonemes}
with open(features_file) as f:
features = [i for i in csv.DictReader(f, delimiter='\t')]
features_d = {}
# NOTE: if we need non-present features, disable deep copy
for segment in features:
# Only act on consonants
if segment['syllabic'] == '+':
continue
else:
segment_copy = {}
for feature in segment:
if segment[feature] == '0':
segment_copy[feature] = None
elif segment[feature] == '+':
segment_copy[feature] = True
elif segment[feature] == '-':
segment_copy[feature] = False
elif segment[feature] == '+,-' or segment[feature] == '-,+':
segment_copy[feature] = True
segment_copy['complex'] = True
try:
segment_copy['class'] = phoneme_classes[segment['segment']]
except KeyError:
segment_copy['class'] = 'c-d-c'
features_d[segment['segment']] = segment_copy
else:
return features_d
def get_genus_map(family_file, genus_file):
# Initialise families and genera
with open(family_file) as f:
families = [i for i in csv.DictReader(f)]
with open(genus_file) as f:
genera = [i for i in csv.DictReader(f)]
# Map a genus to a family, since the source data provides an ID instead of a pretty name
families_map = {family['pk']: family['name'] for family in families}
genus_to_family = {genus['name']: families_map[genus['family_pk']] for genus in genera}
return genus_to_family
def is_voiced(segment):
return features_d[segment]['periodicGlottalSource']
def is_plosive(segment):
is_consonantal = features_d[segment]['consonantal'] is True
is_not_sonorant = features_d[segment]['sonorant'] is False
is_not_continuant = features_d[segment]['continuant'] is False
is_not_strident = features_d[segment]['strident'] is None or features_d[segment]['strident'] is False
is_not_delayed_release = features_d[segment]['delayedRelease'] is None or features_d[segment]['delayedRelease'] is False
return is_consonantal and is_not_sonorant and is_not_continuant and is_not_strident and is_not_delayed_release
def is_affricate(segment):
is_consonantal = features_d[segment]['consonantal'] is True
is_not_sonorant = features_d[segment]['sonorant'] is False
is_not_continuant = features_d[segment]['continuant'] is False
is_strident = features_d[segment]['strident'] is True
return is_consonantal and is_not_sonorant and is_not_continuant and is_strident
def is_fricative(segment):
is_syllabic = features_d[segment]['syllabic'] is False
is_not_sonorant = features_d[segment]['sonorant'] is False
is_continuant = features_d[segment]['continuant'] is True
return is_syllabic and is_not_sonorant and is_continuant
def is_nasal(segment):
is_consonantal = features_d[segment]['consonantal'] is True
is_nasal = features_d[segment]['nasal'] is True
is_continuant = features_d[segment]['continuant'] is False
return is_consonantal and is_nasal and is_continuant
def is_liquid(segment):
is_consonantal = features_d[segment]['consonantal'] is True
is_continuant = features_d[segment]['continuant'] is True
is_sonorant = features_d[segment]['sonorant'] is True
is_not_strident = features_d[segment]['strident'] is False or features_d[segment]['strident'] is None
has_lateral_feature = features_d[segment]['lateral'] is not None
return is_consonantal and is_continuant and is_not_strident and is_sonorant and has_lateral_feature
def is_rothic(segment):
is_consonantal = features_d[segment]['consonantal'] is True
is_not_lateral = features_d[segment]['lateral'] is False
return is_consonantal and is_liquid(segment) and is_not_lateral
def sanity_checks():
plosives = 'p b t d ʈ ɖ c ɟ k ɡ q ɢ ʔ'.split()
fricatives = 'ɸ β f v θ ð s z ʃ ʒ ʂ ʐ ç ʝ x ɣ χ ʁ ħ ʕ h ɦ'.split()
laterals = 'l̪ l ɭ ʎ ʟ'.split()
nasals = ' m ɱ n̪ n ɳ ɲ ŋ ɴ'.split()
voiced = 'b d ɖ ɟ ɡ ɢ β v ð z ʒ ʐ ʝ ɣ ʁ ʕ'.split()
voiceless = 'p t ʈ c k q ʔ ɸ f θ s ʃ ʂ ç x χ'.split()
for plosive in plosives:
assert is_plosive(plosive)
assert not is_affricate(plosive)
assert not is_nasal(plosive)
assert not is_liquid(plosive)
for fricative in fricatives:
assert is_fricative(fricative)
assert not is_affricate(fricative)
assert not is_plosive(fricative)
assert not is_nasal(fricative)
assert not is_liquid(fricative)
for lateral in laterals:
assert is_liquid(lateral)
for nasal in nasals:
assert is_nasal(nasal)
assert not is_fricative(nasal)
assert not is_affricate(nasal)
assert not is_plosive(nasal)
assert not is_liquid(nasal)
assert not is_plosive('t̠ʃ') and is_affricate('t̠ʃ')
assert is_liquid('r')
assert is_rothic('r')
for c in voiced:
assert is_voiced(c), c
for c in voiceless:
assert not is_voiced(c), c
features_d = get_features_dict('phoible-segments-features.tsv', 'phoible-phonemes.tsv')
genus_to_family = get_genus_map('family.csv', 'genus.csv')
inventories = get_inventories('phoible-aggregated.tsv', 'phoible-consonants.tsv')
# Make sure we won't make phonological errors 😉
sanity_checks()
Here we'll filter out the inventories that are possibly duplicates. The criterion will be multiple occurences of an identifier made of the language code and the language name concatenated in lower case with stripped spaces. The inventory we'll keep is the one with a PHOIBLE or SAPHON inventory preferably.
def get_canonical_name(inventory):
code = inventory['LanguageCode']
name = inventory['LanguageName']
name = name.replace(' ', '')
name = name.lower()
return '{}{}'.format(code, name)
def filter_inventories(inventories):
codes = [get_canonical_name(i) for i in inventories]
dupes = [item for item, count in collections.Counter(codes).items() if count > 1]
dupe_sources = {}
for code in dupes:
for inventory in inventories:
inventory_id = get_canonical_name(inventory)
if inventory_id == code:
try:
dupe_sources[inventory_id].append(inventory['Source'])
except KeyError:
dupe_sources[inventory_id] = [inventory['Source']]
filtered_inventories = []
for inventory in inventories:
inventory_id = get_canonical_name(inventory)
if inventory_id in dupes:
source = inventory['Source']
if source != 'PH' and 'PH' in dupe_sources[inventory_id]:
continue
elif source != 'SAPHON' and 'SAPHON' in dupe_sources[inventory_id]:
continue
elif source != 'UPSID' and 'UPSID' in dupe_sources[inventory_id]:
continue
else:
filtered_inventories.append(inventory)
else:
filtered_inventories.append(inventory)
return filtered_inventories
inventories = filter_inventories(inventories)[:160]
# aleatori: inventories = random.sample(inventories, 160)
inventories.sort(key=lambda k: k['LanguageFamilyRoot']+k['LanguageFamilyGenus'])
html_table = ['<table>']
total_segments = 0
# Build the header
html_table.append('<tr>')
for header in ['Familia', 'Genus', 'Area', 'Llengua', '#', 'Segments']:
html_table.append('<td><b>{}</b></td>'.format(header))
else:
html_table.append('</tr>')
# Add the actual payload
for inventory in inventories:
html_table.append('<tr>')
for attribute in ['LanguageFamilyRoot', 'LanguageFamilyGenus', 'Area', 'LanguageName']:
html_table.append('<td>{}</td>'.format(inventory[attribute]))
else:
html_table.append('<td>{}</td>'.format(len(inventory['Segments'])))
total_segments += len(inventory['Segments'])
html_table.append('<td>')
html_table.append(' '.join(sorted(inventory['Segments'])))
html_table.append('</font></td>')
html_table.append('</tr>')
else:
html_table.append('</table>')
print('Número de llengües: {}'.format(len(inventories)))
print('Mitjana de fonemes per llengua {}'.format(total_segments / len(inventories)))
# Construct the table and display it
html_table = ''.join(html_table)
html_table = HTML(html_table)
display(html_table)
inventories.sort(key=lambda k: k['LanguageName'])
sistemes_secundaris = [
'Awtu, Aspiració',
'Auyana, Allargament',
'Comanche, Labialització',
'Dadibi, Aspiració',
'Dani, Labialització',
'ekari, Alliberament lateral',
'fuzhou, Aspiració',
'gaviao do para, Aspiració',
'guajajara, Labialització',
'kayabi, Labialització',
'kitsijis, Aspiració',
'karajo, Aspiració',
'krinkati-tinbira, Aspiració',
'kuikura-kalapalo, Palatalització',
'northern paiute, Labialització',
'pompeian, Labialització',
'sanuma, Aspiració',
'shawnee, Llargada',
'shirihana, Aspiració',
'wantoat, Labialització',
'wariwari, Labialització / Glotalitzacio',
'yawa, Palatalització']
langs_with_aspiration = 0
langs_with_labialitzation = 0
langs_with_palatalitzation = 0
langs_with_glotalization = 0
langs_with_duration = 0
langs_with_lateralrelease = 0
html_table = ['<table>']
html_table.append('<tr><td>Llengua</td><td>Sistema secundari</td>')
for s in sistemes_secundaris:
if 'Labialització' in s:
langs_with_labialitzation += 1
if 'Aspiració' in s:
langs_with_aspiration += 1
if 'Palatalitzaci' in s:
langs_with_palatalitzation += 1
if 'Llargada' in s:
langs_with_duration += 1
if 'Glotalitzacio' in s:
langs_with_glotalization += 1
if 'Alliberament' in s:
langs_with_lateralrelease += 1
html_table.append('<tr>')
for attr in s.split(','):
html_table.append('<td>{}</td>'.format(attr.title()))
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
print('Labialitzacio: {} {:.2%}'.format(langs_with_labialitzation, langs_with_labialitzation / len(inventories)))
print('Aspiracio: {} {:.2%}'.format(langs_with_aspiration, langs_with_aspiration / len(inventories)))
print('Palatalitzacio: {} {:.2%}'.format(langs_with_palatalitzation, langs_with_palatalitzation / len(inventories)))
print('Duracio: {} {:.2%}'.format(langs_with_duration, langs_with_duration / len(inventories)))
print('Glotalitzacio: {} {:.2%}'.format(langs_with_glotalization, langs_with_glotalization / len(inventories)))
print('Alliberament lateral: {} {:.2%}'.format(langs_with_lateralrelease, langs_with_lateralrelease / len(inventories)))
all_consonants = []
for inventory in inventories:
for consonant in inventory['Segments']:
all_consonants.append(consonant)
all_consonants = sorted(all_consonants)
all_consonants_count = []
counted = []
for consonant in all_consonants:
if consonant not in counted:
counted.append(consonant)
count = all_consonants.count(consonant)
all_consonants_count.append([consonant, count,
'{:.2%}'.format(count / len(inventories))])
all_consonants_count.sort(key=lambda f: f[1], reverse=True)
all_consonants_uniq = [c[0] for c in all_consonants_count]
html_table = ['<table>']
html_table.append('<tr><td>Fonema</td><td>Ocurrències</td><td>Percentatge</td></tr>')
for consonant in all_consonants_count:
html_table.append('<tr>')
for attribute in consonant:
html_table.append('<td>{}</td>'.format(attribute))
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
print(len(all_consonants_uniq))
langs_with_obstruents = []
for inventory in inventories:
obstruents_voiced = []
obstruents_voiceless = []
for segment in inventory['Segments']:
if features_d[segment]['sonorant'] is False:
if is_voiced(segment):
obstruents_voiced.append(segment)
else:
obstruents_voiceless.append(segment)
else:
langs_with_obstruents.append({inventory['LanguageName']: [obstruents_voiced, obstruents_voiceless]})
html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Llengua</td><td>obstruents sonores</td><td>obstruents sordes</td><td>total (sonores + sordes)</td>')
for lang in langs_with_obstruents:
for inv in lang:
voiced_inv = lang[inv][0]
voiceless_inv = lang[inv][1]
joined_inv = list(voiced_inv + voiceless_inv)
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
html_table.append('<td>{}</td>'.format(' '.join(voiced_inv)))
html_table.append('<td>{}</td>'.format(' '.join(voiceless_inv)))
html_table.append('<td>{}</td>'.format("{} ({} + {})".format(len(joined_inv),
len(voiced_inv),
len(voiceless_inv))))
#for item in sorted(lang[inv]):
# html_table.append('<td>{}</td>'.format(item))
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
langs_with_plosives = []
more_voiceless_plosives = 0
for inventory in inventories:
plosives_voiced = []
plosives_voiceless = []
for segment in inventory['Segments']:
if is_plosive(segment):
if is_voiced(segment):
plosives_voiced.append(segment)
else:
plosives_voiceless.append(segment)
else:
langs_with_plosives.append({inventory['LanguageName']: [plosives_voiced, plosives_voiceless]})
html_table = ['<table>']
html_table.append('<tr><td>Llengua</td><td>plosives sonores</td><td>plosives sordes</td><td>total (sonores + sordes)</td>')
for lang in langs_with_plosives:
for inv in lang:
voiced_inv = lang[inv][0]
voiceless_inv = lang[inv][1]
joined_inv = list(voiced_inv + voiceless_inv)
if len(voiceless_inv) > len(voiced_inv):
more_voiceless_plosives += 1
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
html_table.append('<td>{}</td>'.format(' '.join(voiced_inv)))
html_table.append('<td>{}</td>'.format(' '.join(voiceless_inv)))
html_table.append('<td>{}</td>'.format("{} ({} + {})".format(len(joined_inv),
len(voiced_inv),
len(voiceless_inv))))
#for item in sorted(lang[inv]):
# html_table.append('<td>{}</td>'.format(item))
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
print('El nombre d’oclusives sordes és normalment major o igual al d’oclusives sonores: {:.2%} ({} / {})'.format(more_voiceless_plosives / len(langs_with_plosives),
more_voiceless_plosives, len(langs_with_plosives)))
langs_with_fricatives = []
for inventory in inventories:
fricatives_voiced = []
fricatives_voiceless = []
for segment in inventory['Segments']:
if is_fricative(segment):
if is_voiced(segment):
fricatives_voiced.append(segment)
else:
fricatives_voiceless.append(segment)
else:
langs_with_fricatives.append({inventory['LanguageName']: [fricatives_voiced, fricatives_voiceless]})
html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Llengua</td><td>fricatives sonores</td><td>fricatives sordes</td><td>total (sonores + sordes)</td>')
for lang in langs_with_fricatives:
for inv in lang:
voiced_inv = lang[inv][0]
voiceless_inv = lang[inv][1]
joined_inv = list(voiced_inv + voiceless_inv)
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
html_table.append('<td>{}</td>'.format(' '.join(voiced_inv)))
html_table.append('<td>{}</td>'.format(' '.join(voiceless_inv)))
html_table.append('<td>{}</td>'.format("{} ({} + {})".format(len(joined_inv),
len(voiced_inv),
len(voiceless_inv))))
#for item in sorted(lang[inv]):
# html_table.append('<td>{}</td>'.format(item))
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
langs_with_nasals = []
for inventory in inventories:
nasals = []
for segment in inventory['Segments']:
try:
if is_nasal(segment):
nasals.append(segment)
except KeyError:
pass
else:
langs_with_nasals.append({inventory['LanguageName']: sorted(nasals)})
html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Llengua</td><td>no. de nasals</td><td colspan=0>Nasals</td></tr>')
for lang in langs_with_nasals:
for inv in sorted(lang):
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
html_table.append('<td>{}</td>'.format(len(lang[inv])))
for item in sorted(lang[inv]):
html_table.append('<td>{}</td>'.format(item))
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
Codi per comprovar els universals
langs_with_liquides = []
at_least_one_liquid = 0
at_least_two_liquid = 0
has_laterality_contrast = 0
for inventory in inventories:
liquides = []
laterality = []
for segment in inventory['Segments']:
try:
if is_liquid(segment):
laterality.append(features_d[segment]['lateral'])
liquides.append(segment)
except KeyError:
pass
else:
if len(liquides) >= 1:
at_least_one_liquid += 1
if len(liquides) >= 2:
at_least_two_liquid += 1
if laterality.count(True) > 0 and laterality.count(False) > 0:
has_laterality_contrast += 1
langs_with_liquides.append({inventory['LanguageName']: sorted(liquides)})
html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Llengua</td><td>Numero de liquides</td><td colspan=0>Liquides</td></tr>')
for lang in langs_with_liquides:
for inv in sorted(lang):
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
html_table.append('<td>{}</td>'.format(len(lang[inv])))
for item in sorted(lang[inv]):
html_table.append('<td>{}</td>'.format(item))
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
print("Tenen almenys, una líquida: {:2%} ({} / {})".format(at_least_one_liquid / len(inventories),
at_least_one_liquid,
len(inventories)))
print('{} {} {:.2%} ({} / {})'.format('Les lengües amb dues o més líquides generalment',
'tenen una lateral i lateralitat contrastiva',
has_laterality_contrast / at_least_two_liquid,
has_laterality_contrast, at_least_two_liquid))
langs_hierarchy = []
for inventory in inventories:
subsystems = []
dentoalveolars = [segment for segment in inventory['Segments'] if features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
subsystems.append(dentoalveolars)
labials = [segment for segment in inventory['Segments'] if features_d[segment]['labial']]
subsystems.append(labials)
velars = [segment for segment in inventory['Segments'] if features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant']]
subsystems.append(velars)
palatals = [segment for segment in inventory['Segments'] if features_d[segment]['coronal'] is True and features_d[segment]['dorsal'] is True]
subsystems.append(palatals)
langs_hierarchy.append({inventory['LanguageName']: subsystems})
html_table = ['<table>']
# Build header
html_table.append('<tr>')
for header in ['Llengua', 'Alveolars', 'Labials', 'Velars',
'Palatals', 'Jerarquia (a > l > v > p)']:
html_table.append('<td>{}</td>'.format(header))
else:
html_table.append('<tr>')
# Add contents
for lang in langs_hierarchy:
for inv in lang:
if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and len(lang[inv][3]) == 0:
continue
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
for item in lang[inv]:
html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))
else:
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
langs_plosive_hierarchy = []
for inventory in inventories:
subsystems = []
dentoalveolars = [segment for segment in inventory['Segments'] if is_plosive(segment) and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
subsystems.append(dentoalveolars)
labials = [segment for segment in inventory['Segments'] if is_plosive(segment) and features_d[segment]['labial']]
subsystems.append(labials)
velars = [segment for segment in inventory['Segments'] if is_plosive(segment) and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant']]
subsystems.append(velars)
palatals = [segment for segment in inventory['Segments'] if is_plosive(segment) and features_d[segment]['coronal'] is True and features_d[segment]['dorsal'] is True]
subsystems.append(palatals)
langs_plosive_hierarchy.append({inventory['LanguageName']: subsystems})
html_table = ['<table>']
# Build header
html_table.append('<tr>')
for header in ['Llengua', 'Oclusives alveolars', 'Oclusives labials', 'Oclusives Velars',
'Oclusives Palatals', 'Jerarquia oclusives (a > l > v > p)']:
html_table.append('<td>{}</td>'.format(header))
else:
html_table.append('<tr>')
# Add contents
followers = 0
for lang in langs_plosive_hierarchy:
for inv in lang:
if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and len(lang[inv][3]) == 0:
continue
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
for item in lang[inv]:
html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
if follows_hierarchy:
followers +=1
html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))
else:
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
print('Followers: {:.2%} ({} / {})'.format(followers / len(inventories), followers, len(inventories)))
langs_affricate_hierarchy = []
for inventory in inventories:
subsystems = []
palatals = [segment for segment in inventory['Segments'] if is_affricate(segment) and not features_d[segment]['anterior'] and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
subsystems.append(palatals)
dentoalveolars = [segment for segment in inventory['Segments'] if is_affricate(segment) and features_d[segment]['anterior'] and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
subsystems.append(dentoalveolars)
labials = [segment for segment in inventory['Segments'] if is_affricate(segment) and features_d[segment]['labial']]
subsystems.append(labials)
velars = [segment for segment in inventory['Segments'] if is_affricate(segment) and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant']]
subsystems.append(velars)
langs_affricate_hierarchy.append({inventory['LanguageName']: subsystems})
html_table = ['<table>']
# Build header
html_table.append('<tr>')
for header in ['Llengua', 'Africades palatals', 'Africades dentoalveolars', 'Africades Velars',
'Africades labials', 'Africades (p > d > v > l)']:
html_table.append('<td>{}</td>'.format(header))
else:
html_table.append('<tr>')
# Add contents
followers = 0
candidates = 0
for lang in langs_affricate_hierarchy:
for inv in lang:
if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and len(lang[inv][3]) == 0:
continue
else:
candidates += 1
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
for item in lang[inv]:
html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
if follows_hierarchy:
followers +=1
html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))
else:
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
print('Followers: {:.2%} ({} / {})'.format(followers / candidates, followers, candidates))
langs_fricative_hierarchy = []
for inventory in inventories:
subsystems = []
dentoalveolars = [segment for segment in inventory['Segments'] if is_fricative(segment) and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
subsystems.append(dentoalveolars)
labials = [segment for segment in inventory['Segments'] if is_fricative(segment) and features_d[segment]['labial']]
subsystems.append(labials)
palatals = [segment for segment in inventory['Segments'] if is_fricative(segment) and features_d[segment]['coronal'] is True and features_d[segment]['dorsal'] is True]
subsystems.append(palatals)
velars = [segment for segment in inventory['Segments'] if is_fricative(segment) and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant']]
subsystems.append(velars)
langs_fricative_hierarchy.append({inventory['LanguageName']: subsystems})
html_table = ['<table>']
# Build header
html_table.append('<tr>')
for header in ['Llengua', 'fricatives alveolars', 'fricatives labials', 'fricatives palatals',
'fricatives velars', 'Jerarquia fricatives (a > l > p > v)']:
html_table.append('<td>{}</td>'.format(header))
else:
html_table.append('<tr>')
# Add contents
followers = 0
for lang in langs_fricative_hierarchy:
for inv in lang:
if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and len(lang[inv][3]) == 0:
continue
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
for item in lang[inv]:
html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
if follows_hierarchy:
followers += 1
html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))
else:
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
print('Followers: {:.2%} ({} / {})'.format(followers / len(inventories), followers, len(inventories)))
# • Nasals: Dento-alveolar > Labial > Velar > Palatal > Retroflexa.
langs_nasal_hierarchy = []
for inventory in inventories:
subsystems = []
dentoalveolars = [segment for segment in inventory['Segments'] if is_nasal(segment) and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
subsystems.append(dentoalveolars)
labials = [segment for segment in inventory['Segments'] if is_nasal(segment) and features_d[segment]['labial']]
subsystems.append(labials)
velars = [segment for segment in inventory['Segments'] if is_nasal(segment) and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant'] and features_d[segment]['coronal'] is False ]
subsystems.append(velars)
palatals = [segment for segment in inventory['Segments'] if is_nasal(segment) and features_d[segment]['coronal'] is True and features_d[segment]['coronal'] is True and features_d[segment]['dorsal'] is True]
subsystems.append(palatals)
langs_nasal_hierarchy.append({inventory['LanguageName']: subsystems})
html_table = ['<table>']
# Build header
html_table.append('<tr>')
for header in ['Llengua', 'nasals alveolars', 'nasals labials', 'nasals velars',
'nasals palatals', 'Jerarquia nasals (a > l > p > v)']:
html_table.append('<td>{}</td>'.format(header))
else:
html_table.append('<tr>')
candidates = 0
followers = 0
# Add contents
for lang in langs_nasal_hierarchy:
candidates += 1
for inv in lang:
if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and len(lang[inv][3]) == 0:
continue
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
for item in lang[inv]:
html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))
else:
if follows_hierarchy:
followers += 1
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
print('Followers: {:.2%} ({} / {})'.format(followers / candidates, followers, candidates))
ptk_in_language = 0
langs_with_ptk = []
langs_without_ptk = []
for inventory in inventories:
inventory_segments = inventory['Segments']
if 'p' in inventory_segments and 't' in inventory_segments and 'k' in inventory_segments:
ptk_in_language += 1
langs_with_ptk.append(inventory['LanguageName'])
else:
langs_without_ptk.append(inventory['LanguageName'])
print('Llengües amb /p t k/: {:.2%} ({} de {})'.format(ptk_in_language / len(inventories),
ptk_in_language,
len(inventories)))
africades = []
for inventory in inventories:
for segment in inventory['Segments']:
try:
if features_d[segment]['continuant'] is False \
and features_d[segment]['sonorant'] is False \
and features_d[segment]['continuant'] is False \
and (features_d[segment]['strident'] is True or features_d[segment]['strident']):
africades.append(segment)
except KeyError:
pass
africades = collections.Counter(africades)
html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>Fonema</td><td>Ocurrències</td>')
for k, v in africades.items():
for inv in lang:
html_table.append('<tr>')
html_table.append('<td>{}</td><td>{}</td>'.format(k, v))
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
followers = 0
possible_followers = 0
for inventory in inventories:
affricates = [s for s in inventory['Segments'] if is_affricate(s)]
if len(affricates) == 1:
possible_followers += 1
if affricates[0] == 't̠ʃ':
followers += 1
else:
print('Segueixen l\'universal {:.2%} ({} de {})'.format(followers / possible_followers,
followers,
possible_followers))
followers = 0
possible_followers = 0
for inventory in inventories:
affricates = [s for s in inventory['Segments'] if is_affricate(s)]
plosives = [s for s in inventory['Segments'] if is_plosive(s)]
possible_followers += 1
if len(affricates) < len(plosives):
followers += 1
else:
print('Segueixen l\'universal {:.2%} ({} de {})'.format(followers / possible_followers,
followers,
possible_followers))
followers_1st_part = 0
followers_2nd_part = 0
candidates = 0
for inventory in inventories:
fricatives = [s for s in inventory['Segments'] if is_fricative(s)]
if len(fricatives) == 1:
candidates += 1
if fricatives[0] == 's':
followers_1st_part += 1
elif fricatives[0] == 'f':
followers_2nd_part += 1
else:
print('Segueixen l\'universal primera part {:.2%} ({} de {})'.format(followers_1st_part / candidates,
followers_1st_part,
candidates))
print('Segueixen l\'universal primera part {:.2%} ({} de {})'.format(followers_2nd_part / candidates,
followers_2nd_part,
candidates))
print('Segueixen l\'universal (total) {:.2%} ({} de {})'.format((followers_1st_part + followers_2nd_part) / candidates,
(followers_1st_part + followers_2nd_part),
candidates))
followers = 0
non_followers = []
candidates = len(langs_with_fricatives)
for lang in langs_with_fricatives:
for _, v in lang.items():
if len(v[0]) > 0 or len(v[1]) > 0:
if len(v[1]) > len(v[0]):
followers += 1
else:
non_followers.append(lang)
else:
print('Segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates,
followers,
candidates))
print("No el segueixen:", non_followers)
candidates = 0
followers = []
for inventory in inventories:
fricatives = [s for s in inventory['Segments'] if is_fricative(s)]
plosives = [s for s in inventory['Segments'] if is_plosive(s)]
if len(fricatives) > 0 or len(plosives) > 0:
candidates += 1
if len(fricatives) > len(plosives):
followers.append(inventory['LanguageName'])
else:
print('Segueixen l\'universal {:.2%} ({} de {})'.format(len(followers) / candidates,
len(followers),
candidates))
print('Llengües amb més fricatives que oclusives: {}'.format(', '.join(followers)))
candidates = 0
followers = []
at_least_one_dentoalveolar = 0
not_at_least_one_dentoalveolar = []
for inventory in inventories:
nasals = [s for s in inventory['Segments'] if is_nasal(s)]
if len(nasals) == 0:
continue
candidates += 1
dentoalveolar = [s for s in nasals if features_d[s]['coronal'] and not features_d[s]['dorsal']]
not_dentoalveolar = [s for s in nasals if not (features_d[s]['coronal'] and not features_d[s]['dorsal'])]
"""
print(inventory['LanguageName'])
print("\tnasals:", nasals)
print("\tdento:", dentoalveolar)
print("\tno dento:", not_dentoalveolar)
"""
if len(dentoalveolar) > 0:
at_least_one_dentoalveolar += 1
else:
not_at_least_one_dentoalveolar.append(inventory)
if len(dentoalveolar) >= len(not_dentoalveolar):
followers.append(inventory['LanguageName'])
print('Tenen, almenys, una nasal dentoalveolar: {:.2%} ({} de {})'.format(at_least_one_dentoalveolar / candidates,
at_least_one_dentoalveolar,
candidates))
print('Tenen tantes o més dentoalveolars que no dentoalveolars {:.2%} ({} de {})'.format(len(followers) / candidates,
len(followers),
candidates))
candidates = 0
followers = []
not_followers = []
for inventory in inventories:
nasals = [s for s in inventory['Segments'] if is_nasal(s)]
if len(nasals) == 0:
continue
candidates += 1
voiced = [s for s in nasals if features_d[s]['periodicGlottalSource']]
voiceless = [s for s in nasals if not features_d[s]['periodicGlottalSource']]
if len(voiced) > len(voiceless):
followers.append(inventory['LanguageName'])
else:
not_followers.append(inventory['LanguageName'])
print('Tenen més nasals sonores que sordes: {:.2%} ({} de {})'.format(len(followers) / candidates, len(followers), candidates))
candidates = 0
followers_1 = []
followers_2 = []
followers_3 = []
non_followers = []
for inventory in inventories:
nasals = [s for s in inventory['Segments'] if is_nasal(s)]
if len(nasals) == 0:
continue
candidates += 1
if len(nasals) >= 1 and 'n' in nasals:
followers_1.append(inventory)
else:
non_followers.append(inventory)
if len(nasals) >= 2 and 'n' in nasals and 'ŋ' in nasals:
followers_2.append(inventory)
elif len(nasals) >= 2 and 'n' in nasals and 'm' in nasals:
followers_3.append(inventory)
else:
non_followers.append(inventory)
print('Tenen almenys una nasal, /n/. {:.2%} ({} de {})'.format(len(followers_1) / candidates, len(followers_1), candidates))
print('Tenen almenys dues nasals: n i ŋ. {:.2%} ({} de {})'.format(len(followers_2) / candidates, len(followers_2), candidates))
print('Tenen almenys dues nasals: n i m. {:.2%} ({} de {})'.format(len(followers_3) / candidates, len(followers_3), candidates))
html_table = ['<font face="Doulos SIL" size=4em><table>']
html_table.append('<tr><td>')
for consonant in all_consonants_uniq:
html_table.append('<td>{}</td>'.format(consonant))
for inventory in inventories:
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inventory['LanguageName']))
count = 0
for consonant in all_consonants_uniq:
if consonant in inventory['Segments']:
html_table.append('<td>X</td>')
count += 1
else:
html_table.append('<td> </td>')
else:
html_table.append('<td>{}</td>'.format(count))
html_table.append('</tr>')
assert count == len(inventory['Segments'])
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
family_tree = []
for inventory in inventories:
family = inventory['LanguageFamilyRoot']
genus = inventory['LanguageFamilyGenus']
name = inventory['LanguageName']
print('{}\t{}\t{}'.format(family, genus, name))
print('{"type": "FeatureCollection", "features": [')
for inventory in inventories:
lon, lat = inventory['Longitude'], inventory['Latitude']
if lon == 'NULL':
continue
else:
lon = lon.replace(':', '.')
lat = lat.replace(':', '.')
print('\t{"type": "Feature", "geometry": {"type": "Point",', end='')
print('"coordinates": ', end='')
coords = '[{:.3f}, {:.3f}]'.format(float(lon), float(lat))
print(coords, end='')
print('}, "properties": {}},')
print('], "properties": {}}')
def get_upsid(aggregated, consonants):
with open(aggregated) as f:
aggregated_data = [i for i in csv.DictReader(f, delimiter='\t')]
with open(consonants) as f:
segments = [segment for segment in csv.DictReader(f, delimiter='\t')]
inventories = []
for inventory in aggregated_data:
inventory['Segments'] = []
if inventory['Source'] == 'UPSID':
for segment in segments:
if inventory['InventoryID'] == segment['InventoryID']:
inventory['Segments'].append(segment['Phoneme'])
for key in ['Phonemes', 'Tones', 'Population', 'Trump', 'Country', 'Vowels']:
del inventory[key]
inventories.append(inventory)
else:
return inventories
upsid_inv = get_upsid('phoible-aggregated.tsv', 'phoible-consonants.tsv')
candidates = 0
followers = 0
for inv in upsid_inv:
candidates += 1
try:
voiced_fric = [i for i in inv['Segments'] if is_fricative(i) and features_d[i]['periodicGlottalSource']]
voiceless_fric = [i for i in inv['Segments'] if is_fricative(i) and features_d[i]['periodicGlottalSource'] is False]
except KeyError:
pass
if len(voiced_fric) < len(voiceless_fric):
followers += 1
print('UPSID: el nombre de fricatives sonores és menor que el de sordes: segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))
candidates = 0
followers = 0
for inv in upsid_inv:
candidates += 1
try:
fric = [i for i in inv['Segments'] if is_fricative(i)]
affric = [i for i in inv['Segments'] if is_affricate(i)]
plosiv = [i for i in inv['Segments'] if is_plosive(i)]
except KeyError:
pass
if len(fric) < (len(plosiv) + len(affric)):
followers += 1
print('UPSID: el nombre de fricatives improbablement es major que oclusives i africades junts: segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))
candidates = 0
followers = 0
for inv in upsid_inv:
candidates += 1
try:
liquid = [i for i in inv['Segments'] if is_fricative(i)]
except KeyError:
pass
if len(liquid) > 0:
followers += 1
print('UPSID: Gairebé totes les llengües tenen almenys una líquida: segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))
candidates = 0
followers = 0
non_followers = []
for inv in upsid_inv:
try:
laterals = [i for i in inv['Segments'] if features_d[i]['lateral']]
except KeyError:
pass
if len(laterals) > 0:
candidates += 1
for lateral in laterals:
f = features_d[lateral]
if f['periodicGlottalSource'] and f['approximant']:
followers += 1
break
else:
if inv['LanguageName'] not in non_followers:
non_followers.append(inv['LanguageName'])
print('UPSID: Una llengua amb una o més laterals té una aproximant lateral sonora.: segueixen l\'universal {:.2%} ({} de {})'.format(followers / candidates, followers, candidates))
print(' no el segueixen', ', '.join(non_followers))
langs_lateral_hierarchy = []
for inventory in inventories:
subsystems = []
dentoalveolars = [segment for segment in inventory['Segments'] if features_d[segment]['lateral'] and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
subsystems.append(dentoalveolars)
retroflex = [segment for segment in inventory['Segments'] if features_d[segment]['lateral'] and not features_d[segment]['anterior'] and not features_d[segment]['dorsal']]
subsystems.append(retroflex)
palatals = [segment for segment in inventory['Segments'] if features_d[segment]['lateral'] and not features_d[segment]['anterior'] and features_d[segment]['coronal'] and not features_d[segment]['dorsal']]
subsystems.append(palatals)
velars = [segment for segment in inventory['Segments'] if features_d[segment]['lateral'] and features_d[segment]['dorsal'] and features_d[segment]['high'] and not features_d[segment]['continuant'] and features_d[segment]['coronal'] is False ]
subsystems.append(velars)
langs_lateral_hierarchy.append({inventory['LanguageName']: subsystems})
html_table = ['<table>']
# Build header
html_table.append('<tr>')
for header in ['Llengua', 'laterals dento', 'laterals retroflex', 'laterals palatal',
'laterals velar', 'Jerarquia laterals (d > r > p > v)']:
html_table.append('<td>{}</td>'.format(header))
else:
html_table.append('<tr>')
candidates = 0
followers = 0
# Add contents
for lang in langs_lateral_hierarchy:
candidates += 1
for inv in lang:
if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0 and len(lang[inv][3]) == 0:
continue
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
for item in lang[inv]:
html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))
else:
if follows_hierarchy:
followers += 1
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))
langs_rothic_hierarchy = []
for inventory in inventories:
subsystems = []
dentoalveolars = [segment for segment in inventory['Segments'] if is_rothic(segment) and features_d[segment]['coronal'] and features_d[segment]['anterior'] and not features_d[segment]['dorsal']]
subsystems.append(dentoalveolars)
retroflex = [segment for segment in inventory['Segments'] if is_rothic(segment) and features_d[segment]['coronal'] and not features_d[segment]['anterior'] and not features_d[segment]['dorsal']]
subsystems.append(retroflex)
uvulars = [segment for segment in inventory['Segments'] if is_rothic(segment) and not features_d[segment]['coronal'] and not features_d[segment]['coronal'] and features_d[segment]['dorsal']]
subsystems.append(uvulars)
langs_rothic_hierarchy.append({inventory['LanguageName']: subsystems})
html_table = ['<table>']
# Build header
html_table.append('<tr>')
for header in ['Llengua', 'rothics dento', 'rothics retroflex', 'rothics uvulars',
'Jerarquia rothics (d > r > u)']:
html_table.append('<td>{}</td>'.format(header))
else:
html_table.append('<tr>')
candidates = 0
followers = 0
# Add contents
for lang in langs_rothic_hierarchy:
candidates += 1
for inv in lang:
if len(lang[inv][0]) == 0 and len(lang[inv][1]) == 0 and len(lang[inv][2]) == 0:
continue
html_table.append('<tr>')
html_table.append('<td>{}</td>'.format(inv))
for item in lang[inv]:
html_table.append('<td>{} [ {} ]</td>'.format(len(item), ' '.join(item)))
follows_hierarchy = len(lang[inv][0]) >= len(lang[inv][1]) and len(lang[inv][1]) >= len(lang[inv][2])
html_table.append('<td>{}<td>'.format("si" if follows_hierarchy else "no"))
else:
if follows_hierarchy:
followers += 1
html_table.append('</tr>')
else:
html_table.append('</table></font>')
display(HTML(''.join(html_table)))