Source code for sanskrit.sounds

# -*- coding: utf-8 -*-
"""
    sanskrit.sounds
    ~~~~~~~~~~~~~~~

    Code for checking and transforming Sanskrit sounds. This module also
    contains basic metrical functions (see :func:`sanskrit.sounds.meter`
    and :func:`sanskrit.sounds.num_syllables`).

    All functions assume SLP1.

    :license: MIT
"""

#: All legal sounds, including anusvara, ardhachandra, and Vedic `'L'`.
ALL_SOUNDS = frozenset("aAiIuUfFxXeEoOMHkKgGNcCjJYwWqQRtTdDnpPbBmyrlLvSzsh'~")

#: All legal tokens, including sounds, punctuation (`'|'`), and whitespace.
ALL_TOKENS = ALL_SOUNDS | {'|', ' ', '\n'}

#: All vowels.
VOWELS = frozenset('aAiIuUfFxXeEoO')

#: Short vowels.
SHORT_VOWELS = frozenset('aiufx')

#: Stop consonants.
STOPS = frozenset('kKgGcCjJwWqQtTdDpPbB')

#: Nasals.
NASALS = frozenset('NYRnm')

#: Semivowels.
SEMIVOWELS = frozenset('yrlLv')

#: Savarga
SAVARGA = frozenset('Szsh')

#: Consonants.
CONSONANTS = STOPS.union(NASALS).union(SEMIVOWELS).union(SAVARGA)

#: Valid word-final sounds.
VALID_FINALS = frozenset('aAiIuUfeEoOkwtpNnmsr')


# General functions
# -----------------

[docs]def clean(phrase, valid): """Remove all characters from `phrase` that are not in `valid`. :param phrase: the phrase to clean :param valid: the set of valid characters. A sensible default is `sounds.ALL_TOKENS`. """ return ''.join([L for L in phrase if L in valid])
[docs]def key_fn(s): """Sorting function for Sanskrit words in SLP1.""" sa = "aAiIuUfFxXeEoOMHkKgGNcCjJYwWqQRtTdDnpPbBmyrlvSzsh '~" en = "123ABCDEFGHIJKLMNOPQRSTUVWabcdefghijklmnopqrstuvwxyz" mapper = dict(zip(sa, en)) mapped = map(mapper.__getitem__, [x for x in s if x in ALL_SOUNDS]) return ''.join(mapped) # Letter transformations # ----------------------
def letter_transform(name, docstring=None): data = { 'shorten': dict(zip('AIUFX', 'aiufx')), 'lengthen': dict(zip('aiufx', 'AIUFX')), 'semivowel': dict(zip('iIuUfFxXeEoO', 'y y v v r r l l ay Ay av Av'.split())), 'aspirate': dict(zip('kgcjwqtdpb', 'KGCJWQTDPB')), 'deaspirate': dict(zip('KGCJWQTDPB', 'kgcjwqtdpb')), 'voice': dict(zip('kKcCwWtTpP', 'gGjJqQdDbB')), 'devoice': dict(zip('gGjJqQdDbB', 'kKcCwWtTpP')), 'nasalize': dict(zip('kKgGhcCjJwWqQtTdDpPbB', 'NNNNNYYYYRRRRnnnnmmmm')), 'dentalize': dict(zip('wWqQRz', 'tTdDns')), 'retroflex': dict(zip('tTdDns', 'wWqQRz')), 'simplify': dict(zip('kgGNhjtTdDpPbBnmsrH', 'kkkkkwttttppppnmHHH')), 'guna': dict(zip('i I u U f F x X'.split(), 'e e o o ar ar al al'.split())), 'vrddhi': dict(zip('a i I u U f F x X e o'.split(), 'A E E O O Ar Ar Al Al E O'.split())), 'samprasarana': dict(zip('yrlv', 'ifxu')) } get = data[name].get def func(L): return get(L, L) if docstring is None: docstring = """{0} `L`. If this is not possible, return `L` unchanged. :param L: the letter to {1} """.format(name.capitalize(), name) func.__name__ = name func.__doc__ = docstring return func shorten = letter_transform('shorten') lengthen = letter_transform('lengthen') semivowel = letter_transform('semivowel') aspirate = letter_transform('aspirate') deaspirate = letter_transform('deaspirate') voice = letter_transform('voice') devoice = letter_transform('devoice') nasalize = letter_transform('nasalize') dentalize = letter_transform('dentalize') retroflex = letter_transform('retroflex') simplify = letter_transform('simplify', docstring=""" Simplify the given letter, if possible. Here, to "simplify" a letter is to reduce it to a sound that is permitted to end a Sanskrit word. For instance, the `c` in `vAc` should be reduced to `k`:: assert simplify('c') == 'k' :param letter: the letter to simplify """ ) guna = letter_transform('guna', docstring=""" Apply guna to the given letter, if possible. """ ) vrddhi = letter_transform('vrddhi', docstring=""" Apply vrddhi to the given letter, if possible. """ ) samprasarana = letter_transform('samprasarana', docstring=""" Apply samprasarana to the given letter, if possible. """ ) del letter_transform # Term transformations # -------------------- class Term(str): def simplify(self): """Simplify the given string using consonant reduction.""" return self[:-1] + simplify(self[-1]) # Meter and metrical properties # -----------------------------
[docs]def num_syllables(phrase): """Find the number of syllables in `phrase`. :param phrase: the phrase to test """ return sum(1 for L in phrase if L in VOWELS)
[docs]def meter(phrase, heavy='_', light='.'): """Find the meter of the given phrase. Results are returned as a list whose elements are either `heavy` and `light`. By the traditional definition, a syllable is **heavy** if one of the following is true: - the vowel is long - the vowel is short and followed by multiple consonants - the vowel is followed by an anusvara or visarga All other syllables are **light**. :param phrase: the phrase to scan :param heavy: used to indicate heavy syllables. By default it's a string, but you can pass in anything. :param light: used to indicate light syllables. By default it's a string, but you can pass in anything. """ scan = [] had_consonant = False # True iff we've seen an anusvara, a visarga, or some conjunct consonants saw_cluster = False append = scan.append # Search for heavy syllable and call all other syllables light. Since # syllable weight can depend on later consonants, we have to look ahead # to determine the proper weight. An easy way to do that is to reverse # the string: for L in clean(phrase, ALL_SOUNDS)[::-1]: if L in VOWELS: if saw_cluster or L not in SHORT_VOWELS: append(heavy) else: append(light) saw_cluster = False elif L in 'MH' or had_consonant: saw_cluster = True had_consonant = L in CONSONANTS return scan[::-1]