Move string compression to Python

... so that the script can know its uncompressed size and calculate the appropriate buffer size.
2025-02-26 07:53:55 +00:00 · 2021-04-16 17:55:30 +08:00
parent 90426b2b22
commit 6f4f4d9733
4 changed files with 111 additions and 68 deletions
--- a/Translations/lzfx.py
+++ b/Translations/lzfx.py
@@ -0,0 +1,91 @@
+import ctypes
+import functools
+import os
+from pathlib import Path
+
+HERE = Path(__file__).resolve().parent
+
+
+@functools.lru_cache(maxsize=None)
+def _liblzfx():
+    so_path = os.path.join(HERE, "../source/Objects/host/lzfx/liblzfx.so")
+    liblzfx = ctypes.cdll.LoadLibrary(so_path)
+    return liblzfx
+
+
+@functools.lru_cache(maxsize=None)
+def _fn_lzfx_compress():
+    """Returns the lzfx_compress C function.
+    ::
+
+        /*  Buffer-to buffer compression.
+
+            Supply pre-allocated input and output buffers via ibuf and obuf, and
+            their size in bytes via ilen and olen.  Buffers may not overlap.
+
+            On success, the function returns a non-negative value and the argument
+            olen contains the compressed size in bytes.  On failure, a negative
+            value is returned and olen is not modified.
+        */
+        int lzfx_compress(const void* ibuf, unsigned int ilen,
+                                void* obuf, unsigned int *olen);
+    """
+
+    fn = _liblzfx().lzfx_compress
+    fn.argtype = [
+        ctypes.c_char_p,
+        ctypes.c_uint,
+        ctypes.c_char_p,
+        ctypes.POINTER(ctypes.c_uint),
+    ]
+    fn.restype = ctypes.c_int
+    return fn
+
+
+def compress(data: bytes) -> bytes:
+    """Returns a bytes object of the lzfx-compressed data."""
+
+    fn_compress = _fn_lzfx_compress()
+
+    output_buffer_len = len(data) + 8
+
+    ibuf = data
+    ilen = len(ibuf)
+    obuf = ctypes.create_string_buffer(output_buffer_len)
+    olen = ctypes.c_uint(output_buffer_len)
+
+    res = fn_compress(ibuf, ilen, obuf, ctypes.byref(olen))
+
+    if res < 0:
+        raise LzfxError(res)
+    else:
+        return bytes(obuf[: olen.value])  # type: ignore
+
+
+class LzfxError(Exception):
+    """Exception raised for lzfx compression or decompression error.
+
+    Attributes:
+        error_code -- The source error code, which is a negative integer
+        error_name -- The constant name of the error
+        message -- explanation of the error
+    """
+
+    # define LZFX_ESIZE      -1      /* Output buffer too small */
+    # define LZFX_ECORRUPT   -2      /* Invalid data for decompression */
+    # define LZFX_EARGS      -3      /* Arguments invalid (NULL) */
+
+    def __init__(self, error_code):
+        self.error_code = error_code
+        if error_code == -1:
+            self.error_name = "LZFX_ESIZE"
+            self.message = "Output buffer too small"
+        elif error_code == -2:
+            self.error_name = "LZFX_ECORRUPT"
+            self.message = "Invalid data for decompression"
+        elif error_code == -3:
+            self.error_name = "LZFX_EARGS"
+            self.message = "Arguments invalid (NULL)"
+        else:
+            self.error_name = "UNKNOWN"
+            self.message = "Unknown error"
--- a/Translations/make_translation.py
+++ b/Translations/make_translation.py
@@ -19,6 +19,7 @@ from bdflib import reader as bdfreader
 from bdflib.model import Font, Glyph

 import font_tables
+import lzfx

 logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

@@ -505,7 +506,7 @@ def prepare_language(lang: dict, defs: dict, build_version: str) -> LanguageData


 def write_language(
-    data: LanguageData, f: TextIO, lzfx_strings: Optional[bytes] = None
+    data: LanguageData, f: TextIO, strings_bin: Optional[bytes] = None
 ) -> None:
    lang = data.lang
    defs = data.defs
@@ -523,7 +524,7 @@ def write_language(
    except KeyError:
        lang_name = language_code

-    if lzfx_strings:
+    if strings_bin:
        f.write('#include "lzfx.h"\n')

    f.write(f"\n// ---- {lang_name} ----\n\n")
@@ -540,7 +541,7 @@ def write_language(
        "extern const uint8_t *const Font_6x8 = USER_FONT_6x8;\n\n"
    )

-    if not lzfx_strings:
+    if not strings_bin:
        translation_strings_and_indices_text = get_translation_strings_and_indices_text(
            lang, defs, symbol_conversion_table
        )
@@ -551,9 +552,13 @@ def write_language(
            "void prepareTranslations() {}\n\n"
        )
    else:
-        write_bytes_as_c_array(f, "translation_data_lzfx", lzfx_strings)
+        compressed = lzfx.compress(strings_bin)
+        logging.info(
+            f"Strings compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})"
+        )
+        write_bytes_as_c_array(f, "translation_data_lzfx", compressed)
        f.write(
-            "static uint8_t translation_data_out_buffer[4096] __attribute__((__aligned__(2)));\n\n"
+            f"static uint8_t translation_data_out_buffer[{len(strings_bin)}] __attribute__((__aligned__(2)));\n\n"
            "const TranslationIndexTable *const Tr = reinterpret_cast<const TranslationIndexTable *>(translation_data_out_buffer);\n"
            "const char *const TranslationStrings = reinterpret_cast<const char *>(translation_data_out_buffer) + sizeof(TranslationIndexTable);\n\n"
            "void prepareTranslations() {\n"
@@ -847,11 +852,11 @@ def parse_args() -> argparse.Namespace:
        dest="input_pickled",
    )
    parser.add_argument(
-        "--lzfx-strings",
-        help="Use compressed TranslationIndices + TranslationStrings data",
+        "--strings-bin",
+        help="Use generated TranslationIndices + TranslationStrings data and compress them",
        type=argparse.FileType("rb"),
        required=False,
-        dest="lzfx_strings",
+        dest="strings_bin",
    )
    parser.add_argument(
        "--output", "-o", help="Target file", type=argparse.FileType("w"), required=True
@@ -895,8 +900,8 @@ def main() -> None:

    out_ = args.output
    write_start(out_)
-    if args.lzfx_strings:
-        write_language(language_data, out_, args.lzfx_strings.read())
+    if args.strings_bin:
+        write_language(language_data, out_, args.strings_bin.read())
    else:
        write_language(language_data, out_)