Move string compression to Python

... so that the script can know its uncompressed size and calculate the
appropriate buffer size.
This commit is contained in:
Alvin Wong
2021-04-16 17:55:30 +08:00
parent 90426b2b22
commit 6f4f4d9733
4 changed files with 111 additions and 68 deletions

91
Translations/lzfx.py Normal file
View File

@@ -0,0 +1,91 @@
import ctypes
import functools
import os
from pathlib import Path
HERE = Path(__file__).resolve().parent
@functools.lru_cache(maxsize=None)
def _liblzfx():
so_path = os.path.join(HERE, "../source/Objects/host/lzfx/liblzfx.so")
liblzfx = ctypes.cdll.LoadLibrary(so_path)
return liblzfx
@functools.lru_cache(maxsize=None)
def _fn_lzfx_compress():
"""Returns the lzfx_compress C function.
::
/* Buffer-to buffer compression.
Supply pre-allocated input and output buffers via ibuf and obuf, and
their size in bytes via ilen and olen. Buffers may not overlap.
On success, the function returns a non-negative value and the argument
olen contains the compressed size in bytes. On failure, a negative
value is returned and olen is not modified.
*/
int lzfx_compress(const void* ibuf, unsigned int ilen,
void* obuf, unsigned int *olen);
"""
fn = _liblzfx().lzfx_compress
fn.argtype = [
ctypes.c_char_p,
ctypes.c_uint,
ctypes.c_char_p,
ctypes.POINTER(ctypes.c_uint),
]
fn.restype = ctypes.c_int
return fn
def compress(data: bytes) -> bytes:
"""Returns a bytes object of the lzfx-compressed data."""
fn_compress = _fn_lzfx_compress()
output_buffer_len = len(data) + 8
ibuf = data
ilen = len(ibuf)
obuf = ctypes.create_string_buffer(output_buffer_len)
olen = ctypes.c_uint(output_buffer_len)
res = fn_compress(ibuf, ilen, obuf, ctypes.byref(olen))
if res < 0:
raise LzfxError(res)
else:
return bytes(obuf[: olen.value]) # type: ignore
class LzfxError(Exception):
"""Exception raised for lzfx compression or decompression error.
Attributes:
error_code -- The source error code, which is a negative integer
error_name -- The constant name of the error
message -- explanation of the error
"""
# define LZFX_ESIZE -1 /* Output buffer too small */
# define LZFX_ECORRUPT -2 /* Invalid data for decompression */
# define LZFX_EARGS -3 /* Arguments invalid (NULL) */
def __init__(self, error_code):
self.error_code = error_code
if error_code == -1:
self.error_name = "LZFX_ESIZE"
self.message = "Output buffer too small"
elif error_code == -2:
self.error_name = "LZFX_ECORRUPT"
self.message = "Invalid data for decompression"
elif error_code == -3:
self.error_name = "LZFX_EARGS"
self.message = "Arguments invalid (NULL)"
else:
self.error_name = "UNKNOWN"
self.message = "Unknown error"

View File

@@ -19,6 +19,7 @@ from bdflib import reader as bdfreader
from bdflib.model import Font, Glyph
import font_tables
import lzfx
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
@@ -505,7 +506,7 @@ def prepare_language(lang: dict, defs: dict, build_version: str) -> LanguageData
def write_language(
data: LanguageData, f: TextIO, lzfx_strings: Optional[bytes] = None
data: LanguageData, f: TextIO, strings_bin: Optional[bytes] = None
) -> None:
lang = data.lang
defs = data.defs
@@ -523,7 +524,7 @@ def write_language(
except KeyError:
lang_name = language_code
if lzfx_strings:
if strings_bin:
f.write('#include "lzfx.h"\n')
f.write(f"\n// ---- {lang_name} ----\n\n")
@@ -540,7 +541,7 @@ def write_language(
"extern const uint8_t *const Font_6x8 = USER_FONT_6x8;\n\n"
)
if not lzfx_strings:
if not strings_bin:
translation_strings_and_indices_text = get_translation_strings_and_indices_text(
lang, defs, symbol_conversion_table
)
@@ -551,9 +552,13 @@ def write_language(
"void prepareTranslations() {}\n\n"
)
else:
write_bytes_as_c_array(f, "translation_data_lzfx", lzfx_strings)
compressed = lzfx.compress(strings_bin)
logging.info(
f"Strings compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})"
)
write_bytes_as_c_array(f, "translation_data_lzfx", compressed)
f.write(
"static uint8_t translation_data_out_buffer[4096] __attribute__((__aligned__(2)));\n\n"
f"static uint8_t translation_data_out_buffer[{len(strings_bin)}] __attribute__((__aligned__(2)));\n\n"
"const TranslationIndexTable *const Tr = reinterpret_cast<const TranslationIndexTable *>(translation_data_out_buffer);\n"
"const char *const TranslationStrings = reinterpret_cast<const char *>(translation_data_out_buffer) + sizeof(TranslationIndexTable);\n\n"
"void prepareTranslations() {\n"
@@ -847,11 +852,11 @@ def parse_args() -> argparse.Namespace:
dest="input_pickled",
)
parser.add_argument(
"--lzfx-strings",
help="Use compressed TranslationIndices + TranslationStrings data",
"--strings-bin",
help="Use generated TranslationIndices + TranslationStrings data and compress them",
type=argparse.FileType("rb"),
required=False,
dest="lzfx_strings",
dest="strings_bin",
)
parser.add_argument(
"--output", "-o", help="Target file", type=argparse.FileType("w"), required=True
@@ -895,8 +900,8 @@ def main() -> None:
out_ = args.output
write_start(out_)
if args.lzfx_strings:
write_language(language_data, out_, args.lzfx_strings.read())
if args.strings_bin:
write_language(language_data, out_, args.strings_bin.read())
else:
write_language(language_data, out_)