Move string compression to Python

... so that the script can know its uncompressed size and calculate the
appropriate buffer size.
This commit is contained in:
Alvin Wong
2021-04-16 17:55:30 +08:00
parent 90426b2b22
commit 6f4f4d9733
4 changed files with 111 additions and 68 deletions

91
Translations/lzfx.py Normal file
View File

@@ -0,0 +1,91 @@
import ctypes
import functools
import os
from pathlib import Path
HERE = Path(__file__).resolve().parent
@functools.lru_cache(maxsize=None)
def _liblzfx():
so_path = os.path.join(HERE, "../source/Objects/host/lzfx/liblzfx.so")
liblzfx = ctypes.cdll.LoadLibrary(so_path)
return liblzfx
@functools.lru_cache(maxsize=None)
def _fn_lzfx_compress():
"""Returns the lzfx_compress C function.
::
/* Buffer-to buffer compression.
Supply pre-allocated input and output buffers via ibuf and obuf, and
their size in bytes via ilen and olen. Buffers may not overlap.
On success, the function returns a non-negative value and the argument
olen contains the compressed size in bytes. On failure, a negative
value is returned and olen is not modified.
*/
int lzfx_compress(const void* ibuf, unsigned int ilen,
void* obuf, unsigned int *olen);
"""
fn = _liblzfx().lzfx_compress
fn.argtype = [
ctypes.c_char_p,
ctypes.c_uint,
ctypes.c_char_p,
ctypes.POINTER(ctypes.c_uint),
]
fn.restype = ctypes.c_int
return fn
def compress(data: bytes) -> bytes:
"""Returns a bytes object of the lzfx-compressed data."""
fn_compress = _fn_lzfx_compress()
output_buffer_len = len(data) + 8
ibuf = data
ilen = len(ibuf)
obuf = ctypes.create_string_buffer(output_buffer_len)
olen = ctypes.c_uint(output_buffer_len)
res = fn_compress(ibuf, ilen, obuf, ctypes.byref(olen))
if res < 0:
raise LzfxError(res)
else:
return bytes(obuf[: olen.value]) # type: ignore
class LzfxError(Exception):
"""Exception raised for lzfx compression or decompression error.
Attributes:
error_code -- The source error code, which is a negative integer
error_name -- The constant name of the error
message -- explanation of the error
"""
# define LZFX_ESIZE -1 /* Output buffer too small */
# define LZFX_ECORRUPT -2 /* Invalid data for decompression */
# define LZFX_EARGS -3 /* Arguments invalid (NULL) */
def __init__(self, error_code):
self.error_code = error_code
if error_code == -1:
self.error_name = "LZFX_ESIZE"
self.message = "Output buffer too small"
elif error_code == -2:
self.error_name = "LZFX_ECORRUPT"
self.message = "Invalid data for decompression"
elif error_code == -3:
self.error_name = "LZFX_EARGS"
self.message = "Arguments invalid (NULL)"
else:
self.error_name = "UNKNOWN"
self.message = "Unknown error"

View File

@@ -19,6 +19,7 @@ from bdflib import reader as bdfreader
from bdflib.model import Font, Glyph
import font_tables
import lzfx
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
@@ -505,7 +506,7 @@ def prepare_language(lang: dict, defs: dict, build_version: str) -> LanguageData
def write_language(
data: LanguageData, f: TextIO, lzfx_strings: Optional[bytes] = None
data: LanguageData, f: TextIO, strings_bin: Optional[bytes] = None
) -> None:
lang = data.lang
defs = data.defs
@@ -523,7 +524,7 @@ def write_language(
except KeyError:
lang_name = language_code
if lzfx_strings:
if strings_bin:
f.write('#include "lzfx.h"\n')
f.write(f"\n// ---- {lang_name} ----\n\n")
@@ -540,7 +541,7 @@ def write_language(
"extern const uint8_t *const Font_6x8 = USER_FONT_6x8;\n\n"
)
if not lzfx_strings:
if not strings_bin:
translation_strings_and_indices_text = get_translation_strings_and_indices_text(
lang, defs, symbol_conversion_table
)
@@ -551,9 +552,13 @@ def write_language(
"void prepareTranslations() {}\n\n"
)
else:
write_bytes_as_c_array(f, "translation_data_lzfx", lzfx_strings)
compressed = lzfx.compress(strings_bin)
logging.info(
f"Strings compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})"
)
write_bytes_as_c_array(f, "translation_data_lzfx", compressed)
f.write(
"static uint8_t translation_data_out_buffer[4096] __attribute__((__aligned__(2)));\n\n"
f"static uint8_t translation_data_out_buffer[{len(strings_bin)}] __attribute__((__aligned__(2)));\n\n"
"const TranslationIndexTable *const Tr = reinterpret_cast<const TranslationIndexTable *>(translation_data_out_buffer);\n"
"const char *const TranslationStrings = reinterpret_cast<const char *>(translation_data_out_buffer) + sizeof(TranslationIndexTable);\n\n"
"void prepareTranslations() {\n"
@@ -847,11 +852,11 @@ def parse_args() -> argparse.Namespace:
dest="input_pickled",
)
parser.add_argument(
"--lzfx-strings",
help="Use compressed TranslationIndices + TranslationStrings data",
"--strings-bin",
help="Use generated TranslationIndices + TranslationStrings data and compress them",
type=argparse.FileType("rb"),
required=False,
dest="lzfx_strings",
dest="strings_bin",
)
parser.add_argument(
"--output", "-o", help="Target file", type=argparse.FileType("w"), required=True
@@ -895,8 +900,8 @@ def main() -> None:
out_ = args.output
write_start(out_)
if args.lzfx_strings:
write_language(language_data, out_, args.lzfx_strings.read())
if args.strings_bin:
write_language(language_data, out_, args.strings_bin.read())
else:
write_language(language_data, out_)

View File

@@ -1,48 +0,0 @@
#include <stdio.h>
#include <stdlib.h>
#include "lzfx.h"
/* Program to demonstrate file compression. Don't use it in the real world! */
int main(int argc, char **argv) {
if (argc == 3) {
/* open input */
FILE *f = fopen(argv[1], "rb");
if (!f) {
printf("Error: %s\n", argv[1]);
return 1;
}
/* get size */
fseek(f, 0, SEEK_END);
int inputsize = ftell(f);
fseek(f, 0, SEEK_SET);
/* read */
char *input = malloc(inputsize);
fread(input, inputsize, 1, f);
fclose(f);
/* compress */
int outputsize = inputsize + 1; /* buffer overflow */
char *output = malloc(outputsize);
lzfx_compress(input, inputsize, output, &outputsize);
/* open output */
f = fopen(argv[2], "wb");
if (!f) {
printf("Error: %s\n", argv[1]);
return 1;
}
/* write */
fwrite(output, outputsize, 1, f);
fclose(f);
return 0;
} else {
printf("Compresses a file.\n\nUsage: lzfx-raw input output\n");
return 1;
}
}

View File

@@ -364,10 +364,10 @@ $(OUTPUT_DIR)/Core/Gen/translation.files/%.o: Core/Gen/Translation.%.cpp
@echo Generating $@
@$(CPP) -c $(filter-out -flto -g3,$(CXXFLAGS)) $< -o $@
$(HOST_OUTPUT_DIR)/lzfx/lzfx-host-compress: Core/lzfx/lzfx-host-compress.c Core/lzfx/lzfx.c
$(HOST_OUTPUT_DIR)/lzfx/liblzfx.so: Core/lzfx/lzfx.c
@test -d $(@D) || mkdir -p $(@D)
@echo Building host lzfx tool $@
@$(HOST_CC) -Wno-unused-result -O $^ -o $@
@echo Building host lzfx shared library $@
@$(HOST_CC) -Wno-unused-result -fPIC -shared -O $^ -o $@
$(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.bin: $(OUTPUT_DIR)/Core/Gen/translation.files/%.o
@echo Dumping translation strings data from $<
@@ -378,18 +378,13 @@ $(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.bin: $(OUTPUT_DIR)/Core/Gen/t
@test -s $(@D)/$*.data.TranslationStrings.bin || (rm $(@D)/$*.data.TranslationStrings.bin; echo 'ERROR: Output for .rodata._ZL22TranslationStringsData is empty!' >&2; false)
@cat $(@D)/$*.data.TranslationIndices.bin $(@D)/$*.data.TranslationStrings.bin > $@
$(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.lzfx: $(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.bin $(HOST_OUTPUT_DIR)/lzfx/lzfx-host-compress
@echo Compressing translation strings data for $*
@$(HOST_OUTPUT_DIR)/lzfx/lzfx-host-compress $< $@
@echo Compressed from $$(stat --printf="%s" $<) to $$(stat --printf="%s" $@) bytes
Core/Gen/Translation_lzfx.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.lzfx $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle
Core/Gen/Translation_lzfx.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.bin $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/lzfx/liblzfx.so
@test -d $(@D) || mkdir -p $(@D)
@echo Generating lzfx compressed translation for $*
@python3 ../Translations/make_translation.py \
-o $(PWD)/Core/Gen/Translation_lzfx.$*.cpp \
--input-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/$*.pickle \
--lzfx-strings $(OUTPUT_DIR)/Core/Gen/translation.files/$*.strings.lzfx \
--strings-bin $(OUTPUT_DIR)/Core/Gen/translation.files/$*.strings.bin \
$*