Source code for sanskrit.sounds

# -*- coding: utf-8 -*-
"""
    sanskrit.sounds
    ~~~~~~~~~~~~~~~

    Code for checking and transforming Sanskrit sounds. This module also
    contains basic metrical functions (see :func:`sanskrit.sounds.meter`
    and :func:`sanskrit.sounds.num_syllables`).

    All functions assume SLP1.

    :license: MIT
"""

#: All legal sounds, including anusvara, ardhachandra, and Vedic `'L'`.
ALL_SOUNDS = frozenset("aAiIuUfFxXeEoOMHkKgGNcCjJYwWqQRtTdDnpPbBmyrlLvSzsh'~")

#: All legal tokens, including sounds, punctuation (`'|'`), and whitespace.
ALL_TOKENS = ALL_SOUNDS | {'|', ' ', '\n'}

#: All vowels.
VOWELS = frozenset('aAiIuUfFxXeEoO')

#: Short vowels.
SHORT_VOWELS = frozenset('aiufx')

#: Stop consonants.
STOPS = frozenset('kKgGcCjJwWqQtTdDpPbB')

#: Nasals.
NASALS = frozenset('NYRnm')

#: Semivowels.
SEMIVOWELS = frozenset('yrlLv')

#: Savarga
SAVARGA = frozenset('Szsh')

#: Consonants.
CONSONANTS = STOPS.union(NASALS).union(SEMIVOWELS).union(SAVARGA)

#: Valid word-final sounds.
VALID_FINALS = frozenset('aAiIuUfeEoOkwtpNnmsr')


# General functions
# -----------------

[docs]def clean(phrase, valid):
    """Remove all characters from `phrase` that are not in `valid`.

    :param phrase: the phrase to clean
    :param valid: the set of valid characters. A sensible default is
                  `sounds.ALL_TOKENS`.
    """
    return ''.join([L for L in phrase if L in valid])


[docs]def key_fn(s):
    """Sorting function for Sanskrit words in SLP1."""
    sa = "aAiIuUfFxXeEoOMHkKgGNcCjJYwWqQRtTdDnpPbBmyrlvSzsh '~"
    en = "123ABCDEFGHIJKLMNOPQRSTUVWabcdefghijklmnopqrstuvwxyz"
    mapper = dict(zip(sa, en))
    mapped = map(mapper.__getitem__, [x for x in s if x in ALL_SOUNDS])
    return ''.join(mapped)


# Letter transformations
# ----------------------

def letter_transform(name, docstring=None):
    data = {
        'shorten': dict(zip('AIUFX', 'aiufx')),
        'lengthen': dict(zip('aiufx', 'AIUFX')),
        'semivowel': dict(zip('iIuUfFxXeEoO',
                              'y y v v r r l l ay Ay av Av'.split())),
        'aspirate': dict(zip('kgcjwqtdpb',
                             'KGCJWQTDPB')),
        'deaspirate': dict(zip('KGCJWQTDPB',
                               'kgcjwqtdpb')),
        'voice': dict(zip('kKcCwWtTpP',
                          'gGjJqQdDbB')),
        'devoice': dict(zip('gGjJqQdDbB',
                            'kKcCwWtTpP')),
        'nasalize': dict(zip('kKgGhcCjJwWqQtTdDpPbB',
                             'NNNNNYYYYRRRRnnnnmmmm')),
        'dentalize': dict(zip('wWqQRz',
                              'tTdDns')),
        'retroflex': dict(zip('tTdDns',
                              'wWqQRz')),
        'simplify': dict(zip('kgGNhjtTdDpPbBnmsrH',
                             'kkkkkwttttppppnmHHH')),
        'guna': dict(zip('i I u U  f  F  x  X'.split(),
                         'e e o o ar ar al al'.split())),
        'vrddhi': dict(zip('a i I u U  f  F  x  X e o'.split(),
                           'A E E O O Ar Ar Al Al E O'.split())),
        'samprasarana': dict(zip('yrlv', 'ifxu'))
    }

    get = data[name].get

    def func(L):
        return get(L, L)

    if docstring is None:
        docstring = """{0} `L`. If this is not possible, return `L` unchanged.

        :param L: the letter to {1}
        """.format(name.capitalize(), name)

    func.__name__ = name
    func.__doc__ = docstring
    return func


shorten = letter_transform('shorten')
lengthen = letter_transform('lengthen')
semivowel = letter_transform('semivowel')
aspirate = letter_transform('aspirate')
deaspirate = letter_transform('deaspirate')
voice = letter_transform('voice')
devoice = letter_transform('devoice')
nasalize = letter_transform('nasalize')
dentalize = letter_transform('dentalize')
retroflex = letter_transform('retroflex')
simplify = letter_transform('simplify',
                            docstring="""
    Simplify the given letter, if possible.

    Here, to "simplify" a letter is to reduce it to a sound that is permitted
    to end a Sanskrit word. For instance, the `c` in `vAc` should be reduced
    to `k`::

        assert simplify('c') == 'k'

    :param letter: the letter to simplify
    """
                            )


guna = letter_transform('guna',
                        docstring="""
    Apply guna to the given letter, if possible.
    """
                        )


vrddhi = letter_transform('vrddhi',
                          docstring="""
    Apply vrddhi to the given letter, if possible.
    """
                          )


samprasarana = letter_transform('samprasarana',
                                docstring="""
    Apply samprasarana to the given letter, if possible.
    """
                                )

del letter_transform


# Term transformations
# --------------------

class Term(str):

    def simplify(self):
        """Simplify the given string using consonant reduction."""
        return self[:-1] + simplify(self[-1])


# Meter and metrical properties
# -----------------------------

[docs]def num_syllables(phrase):
    """Find the number of syllables in `phrase`.

    :param phrase: the phrase to test
    """
    return sum(1 for L in phrase if L in VOWELS)


[docs]def meter(phrase, heavy='_', light='.'):
    """Find the meter of the given phrase. Results are returned as a list
    whose elements are either `heavy` and `light`.

    By the traditional definition, a syllable is **heavy** if one of the
    following is true:

    - the vowel is long
    - the vowel is short and followed by multiple consonants
    - the vowel is followed by an anusvara or visarga

    All other syllables are **light**.

    :param phrase: the phrase to scan
    :param heavy: used to indicate heavy syllables. By default it's a string,
                  but you can pass in anything.
    :param light: used to indicate light syllables. By default it's a string,
                   but you can pass in anything.
    """
    scan = []
    had_consonant = False

    # True iff we've seen an anusvara, a visarga, or some conjunct consonants
    saw_cluster = False
    append = scan.append

    # Search for heavy syllable and call all other syllables light. Since
    # syllable weight can depend on later consonants, we have to look ahead
    # to determine the proper weight. An easy way to do that is to reverse
    # the string:
    for L in clean(phrase, ALL_SOUNDS)[::-1]:
        if L in VOWELS:
            if saw_cluster or L not in SHORT_VOWELS:
                append(heavy)
            else:
                append(light)

            saw_cluster = False

        elif L in 'MH' or had_consonant:
            saw_cluster = True
        had_consonant = L in CONSONANTS

    return scan[::-1]