#!/usr/bin/env python3

# Copyright © 2012-2022 Jakub Wilk <jwilk@jwilk.net>
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the “Software”), to deal
# in the Software without restriction, including without limitation the rights
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
# copies of the Software, and to permit persons to whom the Software is
# furnished to do so, subject to the following conditions:
#
# The above copyright notice and this permission notice shall be included in
# all copies or substantial portions of the Software.
#
# THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
# SOFTWARE.

import argparse
import collections
import functools
import itertools
import os
import sys

import polib

int(0_0)  # Python >= 3.6 is required

def init_polib():
    # don't assume UTF-8 encoding
    polib.default_encoding = 'ASCII'

def main():
    init_polib()
    ap = argparse.ArgumentParser()
    ap.add_argument('files', metavar='FILE', nargs='+')
    ap.add_argument('--skip-ascii', action='store_true', help='skip ASCII characters')
    ap.add_argument('--stdin', action='store_true', help='read filenames from stdin')
    options = ap.parse_args()
    files = options.files
    if options.stdin:
        files = itertools.chain(
            files,
            (l.rstrip() for l in sys.stdin)
        )
    char_counter = collections.Counter()
    char_files = collections.defaultdict(set)
    n_files = 0
    eprint = functools.partial(print, file=sys.stderr, flush=True)
    for path in files:
        eprint(path, end=' ... ')
        try:
            extension = os.path.splitext(path)[-1]
            if extension == '.po':
                constructor = polib.pofile
            elif extension in {'.mo', '.gmo'}:
                constructor = polib.mofile
            else:
                raise NotImplementedError(repr(extension))
            file = constructor(path)
            for message in file.translated_entries():
                for text in itertools.chain([message.msgstr], message.msgstr_plural.values()):
                    for ch in text:
                        char_counter[ch] += 1
                        char_files[ch].add(path)
            file = None
        except Exception as exc:  # pylint: disable=broad-except
            eprint('error:', exc)
        else:
            eprint('ok')
        n_files += 1
    for n, ch in sorted(((n, ch) for ch, n in char_counter.items()), reverse=True):
        if options.skip_ascii and ord(ch) < 0x80:
            continue
        filecov = len(char_files[ch]) / n_files
        print(f'{n:6} [{filecov:6.1%}] {ch!r}')

if __name__ == '__main__':
    main()

# vim:ts=4 sts=4 sw=4 et
