IronOS/Translations/make_translation.py

#!/usr/bin/env python3

import argparse
import functools
import json
import logging
import os
import re
import subprocess
import sys
from datetime import datetime
from itertools import chain
from pathlib import Path
from typing import Dict, List, TextIO, Tuple, Union

from bdflib import reader as bdfreader
from bdflib.model import Font, Glyph

import font_tables

logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

HERE = Path(__file__).resolve().parent


@functools.lru_cache(maxsize=None)
def cjk_font() -> Font:
    with open(os.path.join(HERE, "wqy-bitmapsong/wenquanyi_9pt.bdf"), "rb") as f:
        return bdfreader.read_bdf(f)


# Loading a single JSON file
def load_json(filename: str, skip_first_line: bool) -> dict:
    with open(filename) as f:
        if skip_first_line:
            f.readline()
        return json.loads(f.read())


def read_translation(json_root: Union[str, Path], lang_code: str) -> dict:
    filename = f"translation_{lang_code}.json"

    file_with_path = os.path.join(json_root, filename)

    try:
        lang = load_json(file_with_path, skip_first_line=False)
    except json.decoder.JSONDecodeError as e:
        logging.error(f"Failed to decode {filename}")
        logging.exception(str(e))
        sys.exit(2)

    validate_langcode_matches_content(filename, lang)

    return lang


def validate_langcode_matches_content(filename: str, content: dict) -> None:
    # Extract lang code from file name
    lang_code = filename[12:-5].upper()
    # ...and the one specified in the JSON file...
    try:
        lang_code_from_json = content["languageCode"]
    except KeyError:
        lang_code_from_json = "(missing)"

    # ...cause they should be the same!
    if lang_code != lang_code_from_json:
        raise ValueError(f"Invalid languageCode {lang_code_from_json} in file {filename}")


def write_start(f: TextIO):
    f.write("// WARNING: THIS FILE WAS AUTO GENERATED BY make_translation.py. PLEASE DO NOT EDIT.\n")
    f.write("\n")
    f.write('#include "Translation.h"\n')


def get_constants() -> List[str]:
    # Extra constants that are used in the firmware that are shared across all languages
    return [
        ("SymbolPlus", "+"),
        ("SymbolMinus", "-"),
        ("SymbolSpace", " "),
        ("SymbolDot", "."),
        ("SymbolDegC", "C"),
        ("SymbolDegF", "F"),
        ("SymbolMinutes", "M"),
        ("SymbolSeconds", "S"),
        ("SymbolWatts", "W"),
        ("SymbolVolts", "V"),
        ("SymbolDC", "DC"),
        ("SymbolCellCount", "S"),
        ("SymbolVersionNumber", buildVersion)
    ]


def get_debug_menu() -> List[str]:
    return [
        datetime.today().strftime("%d-%m-%y"),
        "HW G ",
        "HW M ",
        "HW P ",
        "Time ",
        "Move ",
        "RTip ",
        "CTip ",
        "CHan ",
        "Vin  ",
        "PCB  ",
        "PWR  ",
        "Max  "
    ]


def get_letter_counts(defs: dict, lang: dict) -> List[str]:
    text_list = []
    # iterate over all strings
    obj = lang["menuOptions"]
    for mod in defs["menuOptions"]:
        eid = mod["id"]
        text_list.append(obj[eid]["desc"])

    obj = lang["messages"]
    for mod in defs["messages"]:
        eid = mod["id"]
        if eid not in obj:
            text_list.append(mod["default"])
        else:
            text_list.append(obj[eid])

    obj = lang["characters"]

    for mod in defs["characters"]:
        eid = mod["id"]
        text_list.append(obj[eid])

    obj = lang["menuOptions"]
    for mod in defs["menuOptions"]:
        eid = mod["id"]
        text_list.append(obj[eid]["text2"][0])
        text_list.append(obj[eid]["text2"][1])

    obj = lang["menuGroups"]
    for mod in defs["menuGroups"]:
        eid = mod["id"]
        text_list.append(obj[eid]["text2"][0])
        text_list.append(obj[eid]["text2"][1])

    obj = lang["menuGroups"]
    for mod in defs["menuGroups"]:
        eid = mod["id"]
        text_list.append(obj[eid]["desc"])
    constants = get_constants()
    for x in constants:
        text_list.append(x[1])
    text_list.extend(get_debug_menu())

    # collapse all strings down into the composite letters and store totals for these

    symbol_counts: dict[str, int] = {}
    for line in text_list:
        line = line.replace("\n", "").replace("\r", "")
        line = line.replace("\\n", "").replace("\\r", "")
        if line:
            for letter in line:
                symbol_counts[letter] = symbol_counts.get(letter, 0) + 1
    symbols_by_occurrence = sorted(symbol_counts.items(), key=lambda kv: (kv[1], kv[0]))
    # swap to Big -> little sort order
    symbols_by_occurrence = [x[0] for x in symbols_by_occurrence]
    symbols_by_occurrence.reverse()
    return symbols_by_occurrence


def get_cjk_glyph(sym: str) -> str:
    glyph: Glyph = cjk_font()[ord(sym)]

    data = glyph.data
    src_left, src_bottom, src_w, src_h = glyph.get_bounding_box()
    dst_w = 12
    dst_h = 16

    # The source data is a per-row list of ints. The first item is the bottom-
    # most row. For each row, the LSB is the right-most pixel.
    # Here, (x, y) is the coordinates with origin at the top-left.
    def get_cell(x: int, y: int) -> bool:
        # Adjust x coordinates by actual bounding box.
        adj_x = x - src_left
        if adj_x < 0 or adj_x >= src_w:
            return False
        # Adjust y coordinates by actual bounding box, then place the glyph
        # baseline 3px above the bottom edge to make it centre-ish.
        # This metric is optimized for WenQuanYi Bitmap Song 9pt and assumes
        # each glyph is to be placed in a 12x12px box.
        adj_y = y - (dst_h - src_h - src_bottom - 3)
        if adj_y < 0 or adj_y >= src_h:
            return False
        if data[src_h - adj_y - 1] & (1 << (src_w - adj_x - 1)):
            return True
        else:
            return False

    # A glyph in the font table is divided into upper and lower parts, each by
    # 8px high. Each byte represents half if a column, with the LSB being the
    # top-most pixel. The data goes from the left-most to the right-most column
    # of the top half, then from the left-most to the right-most column of the
    # bottom half.
    s = ""
    for block in range(2):
        for c in range(dst_w):
            b = 0
            for r in range(8):
                if get_cell(c, r + 8 * block):
                    b |= 0x01 << r
            s += f"0x{b:02X},"
    return s


def get_chars_from_font_index(index: int) -> str:
    """
    Converts the font table index into its corresponding string escape
    sequence(s).
    """

    # We want to be able to use more than 254 symbols (excluding \x00 null
    # terminator and \x01 new-line) in the font table but without making all
    # the chars take 2 bytes. To do this, we use \xF1 to \xFF as lead bytes
    # to designate double-byte chars, and leave the remaining as single-byte
    # chars.
    #
    # For the sake of sanity, \x00 always means the end of string, so we skip
    # \xF1\x00 and others in the mapping.
    #
    # Mapping example:
    #
    # 0x02 => 2
    # 0x03 => 3
    # ...
    # 0xEF => 239
    # 0xF0 => 240
    # 0xF1 0x01 => 1 * 0xFF - 15 + 1 = 241
    # 0xF1 0x02 => 1 * 0xFF - 15 + 2 = 242
    # ...
    # 0xF1 0xFF => 1 * 0xFF - 15 + 255 = 495
    # 0xF2 0x01 => 2 * 0xFF - 15 + 1 = 496
    # ...
    # 0xF2 0xFF => 2 * 0xFF - 15 + 255 = 750
    # 0xF3 0x01 => 3 * 0xFF - 15 + 1 = 751
    # ...
    # 0xFF 0xFF => 15 * 0xFF - 15 + 255 = 4065

    if index < 0:
        raise ValueError("index must be positive")
    page = (index + 0x0E) // 0xFF
    if page > 0x0F:
        raise ValueError("page value out of range")
    if page == 0:
        return f"\\x{index:02X}"
    else:
        # Into extended range
        # Leader is 0xFz where z is the page number
        # Following char is the remainder
        leader = page + 0xF0
        value = ((index + 0x0E) % 0xFF) + 0x01

        if leader > 0xFF or value > 0xFF:
            raise ValueError("value is out of range")
        return f"\\x{leader:02X}\\x{value:02X}"


def get_font_map_and_table(text_list: List[str]) -> Tuple[str, Dict[str, str]]:
    # the text list is sorted
    # allocate out these in their order as number codes
    symbol_map = {"\n": "\\x01"}
    index = 2  # start at 2, as 0= null terminator,1 = new line
    forced_first_symbols = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"]

    # Get the font table, which does not include CJK chars
    font_table = font_tables.get_font_map()
    font_small_table = font_tables.get_small_font_map()

    # We want to put all CJK chars after non-CJK ones so that the CJK chars
    # do not need to be in the small font table to save space.
    # We assume all symbols not in the font table to be a CJK char.
    # We also enforce that numbers are first.
    ordered_normal_sym_list: List[str] = forced_first_symbols + [x for x in text_list if x not in forced_first_symbols and x in font_table]
    ordered_cjk_sym_list: List[str] = [x for x in text_list if x not in forced_first_symbols and x not in font_table]

    total_symbol_count = len(ordered_normal_sym_list) + len(ordered_cjk_sym_list)
    # \x00 is for NULL termination and \x01 is for newline, so the maximum
    # number of symbols allowed is as follow (see also the comments in
    # `get_chars_from_font_index`):
    if total_symbol_count > (0x10 * 0xFF - 15) - 2:  # 4063
        logging.error(f"Error, too many used symbols for this version (total {total_symbol_count})")
        sys.exit(1)

    logging.info(f"Generating fonts for {total_symbol_count} symbols")

    for sym in chain(ordered_normal_sym_list, ordered_cjk_sym_list):
        if sym in symbol_map:
            raise ValueError("Symbol not found in symbol map")
        symbol_map[sym] = get_chars_from_font_index(index)
        index += 1

    font_table_strings = []
    font_small_table_strings = []
    for sym in ordered_normal_sym_list:
        if sym not in font_table:
            logging.error(f"Missing Large font element for {sym}")
            sys.exit(1)
        font_line: str = font_table[sym]
        font_table_strings.append(f"{font_line}//{symbol_map[sym]} -> {sym}")
        if sym not in font_small_table:
            logging.error(f"Missing Small font element for {sym}")
            sys.exit(1)
        font_line: str = font_small_table[sym]
        font_small_table_strings.append(f"{font_line}//{symbol_map[sym]} -> {sym}")

    for sym in ordered_cjk_sym_list:
        if sym in font_table:
            raise ValueError("Symbol already exists in font_table")
        font_line = get_cjk_glyph(sym)
        if font_line is None:
            logging.error(f"Missing Large font element for {sym}")
            sys.exit(1)
        font_table_strings.append(f"{font_line}//{symbol_map[sym]} -> {sym}")
        # No data to add to the small font table
        font_small_table_strings.append(f"//                                   {symbol_map[sym]} -> {sym}")

    output_table = "const uint8_t USER_FONT_12[] = {\n"
    for line in font_table_strings:
        # join font table int one large string
        output_table += line + "\n"
    output_table += "};\n"
    output_table += "const uint8_t USER_FONT_6x8[] = {\n"
    for line in font_small_table_strings:
        # join font table int one large string
        output_table += line + "\n"
    output_table += "};\n"
    return output_table, symbol_map


def convert_string(symbol_conversion_table: Dict[str, str], text: str) -> str:
    # convert all of the symbols from the string into escapes for their content
    output_string = ""
    for c in text.replace("\\r", "").replace("\\n", "\n"):
        if c not in symbol_conversion_table:
            logging.error(f"Missing font definition for {c}")
            sys.exit(1)
        else:
            output_string += symbol_conversion_table[c]
    return output_string


def write_language(lang: dict, defs: dict, f: TextIO) -> None:
    language_code: str = lang["languageCode"]
    logging.info(f"Generating block for {language_code}")
    # Iterate over all of the text to build up the symbols & counts
    text_list = get_letter_counts(defs, lang)
    # From the letter counts, need to make a symbol translator & write out the font
    font_table_text, symbol_conversion_table = get_font_map_and_table(text_list)

    try:
        lang_name = lang["languageLocalName"]
    except KeyError:
        lang_name = language_code

    f.write(f"\n// ---- {lang_name} ----\n\n")
    f.write(font_table_text)
    f.write(f"\n// ---- {lang_name} ----\n\n")

    # ----- Writing SettingsDescriptions
    obj = lang["menuOptions"]
    f.write("const char* SettingsDescriptions[] = {\n")

    max_len = 25
    index = 0
    for mod in defs["menuOptions"]:
        eid = mod["id"]
        if "feature" in mod:
            f.write(f"#ifdef {mod['feature']}\n")
        f.write(f"  /* [{index:02d}] {eid.ljust(max_len)[:max_len]} */ ")
        f.write(f"\"{convert_string(symbol_conversion_table, obj[eid]['desc'])}\",//{obj[eid]['desc']} \n")

        if "feature" in mod:
            f.write("#endif\n")
        index += 1

    f.write("};\n\n")

    # ----- Writing Message strings

    obj = lang["messages"]

    for mod in defs["messages"]:
        eid = mod["id"]
        source_text = ""
        if "default" in mod:
            source_text = mod["default"]
        if eid in obj:
            source_text = obj[eid]
        translated_text = convert_string(symbol_conversion_table, source_text)
        source_text = source_text.replace("\n", "_")
        f.write(f'const char* {eid} = "{translated_text}";//{source_text} \n')

    f.write("\n")

    # ----- Writing Characters

    obj = lang["characters"]

    for mod in defs["characters"]:
        eid: str = mod["id"]
        f.write(f'const char* {eid} = "{convert_string(symbol_conversion_table, obj[eid])}";//{obj[eid]} \n')
    f.write("\n")

    # Write out firmware constant options
    constants = get_constants()
    for x in constants:
        f.write(f'const char* {x[0]} = "{convert_string(symbol_conversion_table, x[1])}";//{x[1]} \n')
    f.write("\n")

    # Debug Menu
    f.write("const char* DebugMenu[] = {\n")

    for c in get_debug_menu():
        f.write(f'\t "{convert_string(symbol_conversion_table, c)}",//{c} \n')
    f.write("};\n\n")

    # ----- Writing SettingsDescriptions
    obj = lang["menuOptions"]
    f.write("const char* SettingsShortNames[][2] = {\n")

    max_len = 25
    index = 0
    for mod in defs["menuOptions"]:
        eid = mod["id"]
        if "feature" in mod:
            f.write(f"#ifdef {mod['feature']}\n")
        f.write(f"  /* [{index:02d}] {eid.ljust(max_len)[:max_len]} */ ")
        f.write(f'{{ "{convert_string(symbol_conversion_table, (obj[eid]["text2"][0]))}", "{convert_string(symbol_conversion_table, (obj[eid]["text2"][1]))}" }},//{obj[eid]["text2"]} \n')

        if "feature" in mod:
            f.write("#endif\n")
        index += 1

    f.write("};\n\n")

    # ----- Writing Menu Groups
    obj = lang["menuGroups"]
    f.write(f"const char* SettingsMenuEntries[{len(obj)}] = {{\n")

    max_len = 25
    for mod in defs["menuGroups"]:
        eid = mod["id"]
        f.write(f"  /* {eid.ljust(max_len)[:max_len]} */ ")
        txt = f'{obj[eid]["text2"][0]}\\n{obj[eid]["text2"][1]}'
        f.write(f'"{convert_string(symbol_conversion_table, txt)}",//{obj[eid]["text2"]} \n')

    f.write("};\n\n")

    # ----- Writing Menu Groups Descriptions
    obj = lang["menuGroups"]
    f.write(f"const char* SettingsMenuEntriesDescriptions[{(len(obj))}] = {{\n")

    max_len = 25
    for mod in defs["menuGroups"]:
        eid = mod["id"]
        f.write(f"  /* {eid.ljust(max_len)[:max_len]} */ ")
        f.write(f"\"{convert_string(symbol_conversion_table, (obj[eid]['desc']))}\",//{obj[eid]['desc']} \n")

    f.write("};\n\n")
    f.write(f"const bool HasFahrenheit = {('true' if lang.get('tempUnitFahrenheit', True) else 'false')};\n")

    f.write("\n// Verify SettingsItemIndex values:\n")
    for i, mod in enumerate(defs["menuOptions"]):
        eid = mod["id"]
        f.write(f"static_assert(static_cast<uint8_t>(SettingsItemIndex::{eid}) == {i});\n")


def read_version() -> str:
    with open(HERE.parent / 'source' / 'version.h') as version_file:
        for line in version_file:
            if re.findall(r"^.*(?<=(#define)).*(?<=(BUILD_VERSION))", line):
                line = re.findall(r"\"(.+?)\"", line)
                if line:
                    version = line[0]
                    try:
                        version += f".{subprocess.check_output(['git', 'rev-parse', '--short=7', 'HEAD']).strip().decode('ascii').upper()}"
                    # --short=7: the shorted hash with 7 digits. Increase/decrease if needed!
                    except OSError:
                        version += " git"
    return version


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser()
    parser.add_argument("--output", "-o",
                        help="Target file",
                        type=argparse.FileType("w"),
                        required=True
                        )
    parser.add_argument("languageCode",
                        help="Language to generate")
    return parser.parse_args()


if __name__ == "__main__":
    json_dir = HERE

    args = parse_args()
    try:
        buildVersion = read_version()
    except FileNotFoundError:
        logging.error("error: Could not find version info ")
        sys.exit(1)

    logging.info(f"Build version: {buildVersion}")
    logging.info(f"Making {args.languageCode} from {json_dir}")

    lang_ = read_translation(json_dir, args.languageCode)
    defs_ = load_json(os.path.join(json_dir, "translations_def.js"), True)
    out_ = args.output
    write_start(out_)
    write_language(lang_, defs_, out_)

    logging.info("Done")