From 1a0b542ae6fd1094306ac26f10119ed96641d5f0 Mon Sep 17 00:00:00 2001 From: alvinhochun Date: Fri, 30 Apr 2021 16:51:13 +0800 Subject: [PATCH] [RFC] Multi-language firmware (second try) (#941) * Impl. sectioned font table in firmware * make_translation.py: Extract build_symbol_conversion_table function * Put translation indices and strings in a struct * Move translation objcopy step to Python * Impl. multi-language firmware demo * Impl. strings-compressed multi-lang firmware demo * Add font compression to multi-lang demo * Refactor Makefile a bit * Fix rules for make < 4.3 * Add more multi-lang groups * Add Pinecil multi-lang CI build * Add lzfx compression license text * Remote multi-language demo group * Fix build after merge * Import code from BriefLZ * Change brieflz for our use case * Change compression to use brieflz * Remove lzfx code * Update license file for brieflz * Exclude brieflz files from format check * Add BriefLZ test --- .github/workflows/push.yml | 53 ++ LICENSE_RELEASE.md | 32 + Translations/brieflz.py | 189 +++++ Translations/brieflz_test.py | 24 + Translations/lzfx.py | 91 --- Translations/make_translation.py | 512 +++++++++++--- Translations/objcopy.py | 25 + Translations/translation_BG.json | 7 + Translations/translation_CS.json | 7 + Translations/translation_DA.json | 7 + Translations/translation_DE.json | 7 + Translations/translation_EN.json | 7 + Translations/translation_ES.json | 7 + Translations/translation_FI.json | 7 + Translations/translation_FR.json | 7 + Translations/translation_HR.json | 7 + Translations/translation_HU.json | 7 + Translations/translation_IT.json | 7 + Translations/translation_JA_JP.json | 4 + Translations/translation_LT.json | 7 + Translations/translation_NL.json | 7 + Translations/translation_NL_BE.json | 7 + Translations/translation_NO.json | 7 + Translations/translation_PL.json | 7 + Translations/translation_PT.json | 7 + Translations/translation_RU.json | 7 + Translations/translation_SK.json | 7 + Translations/translation_SL.json | 7 + Translations/translation_SR_CYRL.json | 7 + Translations/translation_SR_LATN.json | 7 + Translations/translation_SV.json | 7 + Translations/translation_TR.json | 7 + Translations/translation_UK.json | 7 + Translations/translation_YUE_HK.json | 4 + Translations/translation_ZH_CN.json | 4 + Translations/translation_ZH_TW.json | 4 + Translations/translations_def.js | 5 + .../Source/GCC/gcc_gd32vf103_flashxip.ld | 2 +- source/Core/Drivers/OLED.cpp | 50 +- source/Core/Inc/Settings.h | 3 +- source/Core/Inc/Translation.h | 28 +- source/Core/Inc/Translation_multi.h | 42 ++ source/Core/LangSupport/lang_multi.cpp | 92 +++ source/Core/LangSupport/lang_single.cpp | 7 + source/Core/Src/gui.cpp | 1 + source/Core/brieflz/README.md | 41 ++ source/Core/brieflz/brieflz.c | 659 ++++++++++++++++++ source/Core/brieflz/brieflz.h | 183 +++++ source/Core/brieflz/brieflz_btparse.h | 332 +++++++++ source/Core/brieflz/brieflz_hashbucket.h | 262 +++++++ source/Core/brieflz/brieflz_lazy.h | 192 +++++ source/Core/brieflz/brieflz_leparse.h | 256 +++++++ source/Core/brieflz/depack.c | 271 +++++++ source/Core/lzfx/README.md | 79 --- source/Core/lzfx/lzfx.c | 357 ---------- source/Core/lzfx/lzfx.h | 95 --- source/Makefile | 140 +++- 57 files changed, 3447 insertions(+), 767 deletions(-) create mode 100644 Translations/brieflz.py create mode 100644 Translations/brieflz_test.py delete mode 100644 Translations/lzfx.py create mode 100644 Translations/objcopy.py create mode 100644 source/Core/Inc/Translation_multi.h create mode 100644 source/Core/LangSupport/lang_multi.cpp create mode 100644 source/Core/LangSupport/lang_single.cpp create mode 100644 source/Core/brieflz/README.md create mode 100644 source/Core/brieflz/brieflz.c create mode 100644 source/Core/brieflz/brieflz.h create mode 100644 source/Core/brieflz/brieflz_btparse.h create mode 100644 source/Core/brieflz/brieflz_hashbucket.h create mode 100644 source/Core/brieflz/brieflz_lazy.h create mode 100644 source/Core/brieflz/brieflz_leparse.h create mode 100644 source/Core/brieflz/depack.c delete mode 100644 source/Core/lzfx/README.md delete mode 100644 source/Core/lzfx/lzfx.c delete mode 100644 source/Core/lzfx/lzfx.h diff --git a/.github/workflows/push.yml b/.github/workflows/push.yml index ba2af0f4..ca2e4abc 100644 --- a/.github/workflows/push.yml +++ b/.github/workflows/push.yml @@ -48,6 +48,51 @@ jobs: source/Hexfile/LICENSE_RELEASE.md if-no-files-found: error + build_multi-lang: + runs-on: ubuntu-20.04 + strategy: + matrix: + model: ["Pinecil"] + fail-fast: true + + steps: + - uses: actions/checkout@v2 + + - name: chmod + run: chmod +x setup.sh && chmod +x source/build.sh && sudo mkdir -p /build/cache && sudo chmod -R 777 /build + + - name: Cached compiler source files + uses: actions/cache@v2.1.5 + env: + cache-name: cache-compilers + with: + path: /build/cache + key: ${{ runner.os }}-build-${{ env.cache-name }} + restore-keys: | + ${{ runner.os }}- + + - name: setup + run: ./setup.sh + + - name: build ${{ matrix.model }} + run: cd source && make -j$(nproc) model="${{ matrix.model }}" firmware-multi_compressed_European firmware-multi_compressed_Bulgarian+Russian+Serbian+Ukrainian firmware-multi_Chinese+Japanese + + - name: copy license text + run: | + cp LICENSE source/Hexfile/LICENSE + cp LICENSE_RELEASE.md source/Hexfile/LICENSE_RELEASE.md + + - name: Archive ${{ matrix.model }} artifacts + uses: actions/upload-artifact@v2 + with: + name: ${{ matrix.model }}_multi-lang + path: | + source/Hexfile/${{ matrix.model }}_*.hex + source/Hexfile/${{ matrix.model }}_*.bin + source/Hexfile/LICENSE + source/Hexfile/LICENSE_RELEASE.md + if-no-files-found: error + tests: runs-on: ubuntu-20.04 steps: @@ -59,6 +104,14 @@ jobs: - name: Run python tests run: cd Translations && chmod +x make_translation_test.py && ./make_translation_test.py + - name: Run BriefLZ tests + run: | + cd source + make Objects/host/brieflz/libbrieflz.so + cd ../Translations + chmod +x brieflz_test.py + ./brieflz_test.py + check_formatting: runs-on: ubuntu-20.04 diff --git a/LICENSE_RELEASE.md b/LICENSE_RELEASE.md index 94e9eebb..b41fcceb 100644 --- a/LICENSE_RELEASE.md +++ b/LICENSE_RELEASE.md @@ -7,6 +7,7 @@ This document outlines the license of IronOS and its dependencies. - NMSIS: Apache-2.0 - GD32VF103 board files: BSD-3-Clause - WenQuanYi Bitmap Song font: GPL-2.0-or-later +- BriefLZ compression library: Zlib The source code of IronOS can be obtained on the [IronOS GitHub repo][gh]. @@ -178,3 +179,34 @@ You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA ``` + + +BriefLZ compression library +--- + +* Only applies to multi-language builds. + +``` +The zlib License (Zlib) + +Copyright (c) 2002-2020 Joergen Ibsen + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +``` diff --git a/Translations/brieflz.py b/Translations/brieflz.py new file mode 100644 index 00000000..5b3c08cf --- /dev/null +++ b/Translations/brieflz.py @@ -0,0 +1,189 @@ +import ctypes +import functools +import os +from pathlib import Path + +HERE = Path(__file__).resolve().parent + + +@functools.lru_cache(maxsize=None) +def _libbrieflz(): + so_path = os.path.join(HERE, "../source/Objects/host/brieflz/libbrieflz.so") + libbrieflz = ctypes.cdll.LoadLibrary(so_path) + return libbrieflz + + +@functools.lru_cache(maxsize=None) +def _fn_blz_max_packed_size(): + """Returns the blz_max_packed_size C function. + :: + + /** + * Get bound on compressed data size. + * + * @see blz_pack + * + * @param src_size number of bytes to compress + * @return maximum size of compressed data + */ + BLZ_API size_t + blz_max_packed_size(size_t src_size); + """ + + fn = _libbrieflz().blz_max_packed_size + fn.argtype = [ + ctypes.c_size_t, + ] + fn.restype = ctypes.c_size_t + return fn + + +def blz_max_packed_size(src_size: int) -> int: + """Get bound on compressed data size.""" + fn_blz_max_packed_size = _fn_blz_max_packed_size() + return int(fn_blz_max_packed_size(src_size)) + + +@functools.lru_cache(maxsize=None) +def _fn_blz_workmem_size_level(): + """Returns the blz_workmem_size_level C function. + :: + + /** + * Get required size of `workmem` buffer. + * + * @see blz_pack_level + * + * @param src_size number of bytes to compress + * @param level compression level + * @return required size in bytes of `workmem` buffer + */ + BLZ_API size_t + blz_workmem_size_level(size_t src_size, int level); + """ + + fn = _libbrieflz().blz_workmem_size_level + fn.argtype = [ + ctypes.c_size_t, + ctypes.c_int, + ] + fn.restype = ctypes.c_size_t + return fn + + +def blz_workmem_size_level(src_size: int, level: int) -> int: + """Get required size of `workmem` buffer.""" + fn_blz_workmem_size_level = _fn_blz_workmem_size_level() + return int(fn_blz_workmem_size_level(src_size, level)) + + +@functools.lru_cache(maxsize=None) +def _fn_blz_pack_level(): + """Returns the blz_pack_level C function. + :: + + /** + * Compress `src_size` bytes of data from `src` to `dst`. + * + * Compression levels between 1 and 9 offer a trade-off between + * time/space and ratio. Level 10 is optimal but very slow. + * + * @param src pointer to data + * @param dst pointer to where to place compressed data + * @param src_size number of bytes to compress + * @param workmem pointer to memory for temporary use + * @param level compression level + * @return size of compressed data + */ + BLZ_API unsigned long + blz_pack_level(const void *src, void *dst, unsigned long src_size, + void *workmem, int level); + """ + + fn = _libbrieflz().blz_pack_level + fn.argtype = [ + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_ulong, + ctypes.c_char_p, + ctypes.c_int, + ] + fn.restype = ctypes.c_ulong + return fn + + +def compress(data: bytes) -> bytes: + """Returns a bytes object of the brieflz-compressed data.""" + + fn_blz_pack_level = _fn_blz_pack_level() + + output_buffer_len = blz_max_packed_size(len(data)) + + src = data + dst = ctypes.create_string_buffer(output_buffer_len) + src_size = len(src) + workmem = ctypes.create_string_buffer(blz_workmem_size_level(len(data), 10)) + level = 10 + + res = fn_blz_pack_level(src, dst, src_size, workmem, level) + + if res == 0: + raise BriefLZError() + else: + return bytes(dst[:res]) # type: ignore + + +@functools.lru_cache(maxsize=None) +def _fn_blz_depack_srcsize(): + """Returns the blz_depack_srcsize C function. + :: + + /** + * Decompress `src_size` bytes of data from `src` to `dst`. + * + * This function is unsafe. If the provided data is malformed, it may + * read more than `src_size` from the `src` buffer. + * + * @param src pointer to compressed data + * @param dst pointer to where to place decompressed data + * @param src_size size of the compressed data + * @return size of decompressed data + */ + BLZ_API unsigned long + blz_depack_srcsize(const void *src, void *dst, unsigned long src_size); + """ + + fn = _libbrieflz().blz_depack_srcsize + fn.argtype = [ + ctypes.c_char_p, + ctypes.c_char_p, + ctypes.c_ulong, + ] + fn.restype = ctypes.c_ulong + return fn + + +def depack_srcsize(data: bytes, expected_depack_size: int) -> bytes: + """Returns a bytes object of the uncompressed data.""" + + fn_blz_depack_srcsize = _fn_blz_depack_srcsize() + + output_buffer_len = expected_depack_size * 2 + + src = data + dst = ctypes.create_string_buffer(output_buffer_len) + src_size = len(src) + + res = fn_blz_depack_srcsize(src, dst, src_size) + + if res == 0: + raise BriefLZError() + else: + return bytes(dst[:res]) # type: ignore + + +class BriefLZError(Exception): + """Exception raised for brieflz compression or decompression error.""" + + def __init__(self): + pass diff --git a/Translations/brieflz_test.py b/Translations/brieflz_test.py new file mode 100644 index 00000000..ee711091 --- /dev/null +++ b/Translations/brieflz_test.py @@ -0,0 +1,24 @@ +#!/usr/bin/env python3 +import brieflz +import unittest + + +TEST_DATA = ( + b"Lorem ipsum dolor sit amet, consectetur adipiscing elit. " + b"Ut consequat mattis orci ac laoreet. Duis ac turpis tempus, varius lacus non, dignissim lectus. " + b"Curabitur quis metus luctus, sollicitudin ipsum at, dictum metus. " + b"Cras sed est nec ex tempor tincidunt in at ante. Vivamus laoreet urna eget lectus euismod feugiat. " + b"Duis a massa ac metus pellentesque interdum. Nunc congue, est faucibus convallis commodo, justo nibh sagittis augue, sed tristique urna neque vitae urna. " + b"Donec quis orci et purus imperdiet sollicitudin." +) + + +class TestBriefLZ(unittest.TestCase): + def test_roundtrip(self): + packed = brieflz.compress(TEST_DATA) + depacked = brieflz.depack_srcsize(packed, len(TEST_DATA)) + self.assertEqual(depacked, TEST_DATA) + + +if __name__ == "__main__": + unittest.main() diff --git a/Translations/lzfx.py b/Translations/lzfx.py deleted file mode 100644 index 9f389673..00000000 --- a/Translations/lzfx.py +++ /dev/null @@ -1,91 +0,0 @@ -import ctypes -import functools -import os -from pathlib import Path - -HERE = Path(__file__).resolve().parent - - -@functools.lru_cache(maxsize=None) -def _liblzfx(): - so_path = os.path.join(HERE, "../source/Objects/host/lzfx/liblzfx.so") - liblzfx = ctypes.cdll.LoadLibrary(so_path) - return liblzfx - - -@functools.lru_cache(maxsize=None) -def _fn_lzfx_compress(): - """Returns the lzfx_compress C function. - :: - - /* Buffer-to buffer compression. - - Supply pre-allocated input and output buffers via ibuf and obuf, and - their size in bytes via ilen and olen. Buffers may not overlap. - - On success, the function returns a non-negative value and the argument - olen contains the compressed size in bytes. On failure, a negative - value is returned and olen is not modified. - */ - int lzfx_compress(const void* ibuf, unsigned int ilen, - void* obuf, unsigned int *olen); - """ - - fn = _liblzfx().lzfx_compress - fn.argtype = [ - ctypes.c_char_p, - ctypes.c_uint, - ctypes.c_char_p, - ctypes.POINTER(ctypes.c_uint), - ] - fn.restype = ctypes.c_int - return fn - - -def compress(data: bytes) -> bytes: - """Returns a bytes object of the lzfx-compressed data.""" - - fn_compress = _fn_lzfx_compress() - - output_buffer_len = len(data) + 8 - - ibuf = data - ilen = len(ibuf) - obuf = ctypes.create_string_buffer(output_buffer_len) - olen = ctypes.c_uint(output_buffer_len) - - res = fn_compress(ibuf, ilen, obuf, ctypes.byref(olen)) - - if res < 0: - raise LzfxError(res) - else: - return bytes(obuf[: olen.value]) # type: ignore - - -class LzfxError(Exception): - """Exception raised for lzfx compression or decompression error. - - Attributes: - error_code -- The source error code, which is a negative integer - error_name -- The constant name of the error - message -- explanation of the error - """ - - # define LZFX_ESIZE -1 /* Output buffer too small */ - # define LZFX_ECORRUPT -2 /* Invalid data for decompression */ - # define LZFX_EARGS -3 /* Arguments invalid (NULL) */ - - def __init__(self, error_code): - self.error_code = error_code - if error_code == -1: - self.error_name = "LZFX_ESIZE" - self.message = "Output buffer too small" - elif error_code == -2: - self.error_name = "LZFX_ECORRUPT" - self.message = "Invalid data for decompression" - elif error_code == -3: - self.error_name = "LZFX_EARGS" - self.message = "Arguments invalid (NULL)" - else: - self.error_name = "UNKNOWN" - self.message = "Unknown error" diff --git a/Translations/make_translation.py b/Translations/make_translation.py index 4eb7a3b7..8c6274c2 100755 --- a/Translations/make_translation.py +++ b/Translations/make_translation.py @@ -19,7 +19,8 @@ from bdflib import reader as bdfreader from bdflib.model import Font, Glyph import font_tables -import lzfx +import brieflz +import objcopy logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) @@ -118,7 +119,9 @@ def get_debug_menu() -> List[str]: ] -def get_letter_counts(defs: dict, lang: dict, build_version: str) -> List[str]: +def get_letter_counts( + defs: dict, lang: dict, build_version: str +) -> Tuple[List[str], Dict[str, int]]: text_list = [] # iterate over all strings obj = lang["menuOptions"] @@ -187,10 +190,12 @@ def get_letter_counts(defs: dict, lang: dict, build_version: str) -> List[str]: symbol_counts[letter] = symbol_counts.get(letter, 0) + 1 # swap to Big -> little sort order symbols_by_occurrence = [ - x[0] for x in sorted(symbol_counts.items(), key=lambda kv: (kv[1], kv[0])) + x[0] + for x in sorted( + symbol_counts.items(), key=lambda kv: (kv[1], kv[0]), reverse=True + ) ] - symbols_by_occurrence.reverse() - return symbols_by_occurrence + return symbols_by_occurrence, symbol_counts def get_cjk_glyph(sym: str) -> bytes: @@ -383,20 +388,14 @@ def get_font_map_per_font(text_list: List[str], fonts: List[str]) -> FontMapsPer return FontMapsPerFont(font12_maps, font06_maps, sym_lists) -def get_font_map_and_table( - text_list: List[str], fonts: List[str] -) -> Tuple[List[str], FontMap, Dict[str, bytes]]: - # the text list is sorted - # allocate out these in their order as number codes - symbol_map: Dict[str, bytes] = {"\n": bytes([1])} - index = 2 # start at 2, as 0= null terminator,1 = new line +def get_forced_first_symbols() -> List[str]: forced_first_symbols = ["0", "1", "2", "3", "4", "5", "6", "7", "8", "9"] + return forced_first_symbols - # We enforce that numbers come first. - text_list = forced_first_symbols + [ - x for x in text_list if x not in forced_first_symbols - ] +def get_sym_list_and_font_map( + text_list: List[str], fonts: List[str] +) -> Tuple[List[str], Dict[str, List[str]], FontMap]: font_maps = get_font_map_per_font(text_list, fonts) font12_maps = font_maps.font12_maps font06_maps = font_maps.font06_maps @@ -420,29 +419,50 @@ def get_font_map_and_table( sym_list_both_fonts.append(sym) sym_list = sym_list_both_fonts + sym_list_large_only + return sym_list, font_maps.sym_lists, FontMap(font12_map, font06_map) + + +def build_symbol_conversion_map(sym_list: List[str]) -> Dict[str, bytes]: + forced_first_symbols = get_forced_first_symbols() + if sym_list[: len(forced_first_symbols)] != forced_first_symbols: + raise ValueError("Symbol list does not start with forced_first_symbols.") + + # the text list is sorted + # allocate out these in their order as number codes + symbol_map: Dict[str, bytes] = {"\n": bytes([1])} + index = 2 # start at 2, as 0= null terminator,1 = new line + # Assign symbol bytes by font index for index, sym in enumerate(sym_list, index): assert sym not in symbol_map symbol_map[sym] = get_bytes_from_font_index(index) - return sym_list, FontMap(font12_map, font06_map), symbol_map + return symbol_map def make_font_table_cpp( sym_list: List[str], font_map: FontMap, symbol_map: Dict[str, bytes] ) -> str: - output_table = make_font_table_12_cpp(sym_list, font_map, symbol_map) + output_table = make_font_table_named_cpp( + "USER_FONT_12", sym_list, font_map.font12, symbol_map + ) output_table += make_font_table_06_cpp(sym_list, font_map, symbol_map) return output_table -def make_font_table_12_cpp( - sym_list: List[str], font_map: FontMap, symbol_map: Dict[str, bytes] +def make_font_table_named_cpp( + name: Optional[str], + sym_list: List[str], + font_map: Dict[str, bytes], + symbol_map: Dict[str, bytes], ) -> str: - output_table = "const uint8_t USER_FONT_12[] = {\n" + output_table = "" + if name: + output_table = f"const uint8_t {name}[] = {{\n" for sym in sym_list: - output_table += f"{bytes_to_c_hex(font_map.font12[sym])}//{bytes_to_escaped(symbol_map[sym])} -> {sym}\n" - output_table += "};\n" + output_table += f"{bytes_to_c_hex(font_map[sym])}//{bytes_to_escaped(symbol_map[sym])} -> {sym}\n" + if name: + output_table += f"}}; // {name}\n" return output_table @@ -495,26 +515,102 @@ def write_bytes_as_c_array( @dataclass class LanguageData: - lang: dict + langs: List[dict] defs: dict build_version: str sym_list: List[str] + sym_lists_by_font: Dict[str, List[str]] font_map: FontMap - symbol_conversion_table: Dict[str, bytes] def prepare_language(lang: dict, defs: dict, build_version: str) -> LanguageData: language_code: str = lang["languageCode"] logging.info(f"Preparing language data for {language_code}") # Iterate over all of the text to build up the symbols & counts - text_list = get_letter_counts(defs, lang, build_version) + text_list, _ = get_letter_counts(defs, lang, build_version) # From the letter counts, need to make a symbol translator & write out the font fonts = lang["fonts"] - sym_list, font_map, symbol_conversion_table = get_font_map_and_table( - text_list, fonts - ) + + forced_first_symbols = get_forced_first_symbols() + + # We enforce that numbers come first. + text_list = forced_first_symbols + [ + x for x in text_list if x not in forced_first_symbols + ] + + sym_list, sym_lists_by_font, font_map = get_sym_list_and_font_map(text_list, fonts) return LanguageData( - lang, defs, build_version, sym_list, font_map, symbol_conversion_table + [lang], defs, build_version, sym_list, sym_lists_by_font, font_map + ) + + +def prepare_languages( + langs: List[dict], defs: dict, build_version: str +) -> LanguageData: + language_codes: List[str] = [lang["languageCode"] for lang in langs] + logging.info(f"Preparing language data for {language_codes}") + + forced_first_symbols = get_forced_first_symbols() + + all_fonts = [ + font_tables.NAME_ASCII_BASIC, + font_tables.NAME_LATIN_EXTENDED, + font_tables.NAME_CYRILLIC, + font_tables.NAME_CJK, + ] + + # Build the full font maps + font12_map = {} + font06_map = {} + # Calculate total symbol counts per font: + total_sym_counts: Dict[str, Dict[str, int]] = {} + for lang in langs: + text_list, sym_counts = get_letter_counts(defs, lang, build_version) + fonts = lang["fonts"] + text_list = forced_first_symbols + [ + x for x in text_list if x not in forced_first_symbols + ] + font_maps = get_font_map_per_font(text_list, fonts) + for font in fonts: + font12_map.update(font_maps.font12_maps[font]) + font06_map.update(font_maps.font06_maps[font]) + for font, font_sym_list in font_maps.sym_lists.items(): + font_total_sym_counts = total_sym_counts.get(font, {}) + for sym in font_sym_list: + font_total_sym_counts[sym] = font_total_sym_counts.get( + sym, 0 + ) + sym_counts.get(sym, 0) + total_sym_counts[font] = font_total_sym_counts + + sym_lists_by_font: Dict[str, List[str]] = {} + combined_sym_list = [] + for font in all_fonts: + if font not in total_sym_counts: + continue + # swap to Big -> little sort order + current_sym_list = [ + x[0] + for x in sorted( + total_sym_counts[font].items(), + key=lambda kv: (kv[1], kv[0]), + reverse=True, + ) + ] + if font == font_tables.NAME_ASCII_BASIC: + # We enforce that numbers come first. + current_sym_list = forced_first_symbols + [ + x for x in current_sym_list if x not in forced_first_symbols + ] + sym_lists_by_font[font] = current_sym_list + combined_sym_list.extend(current_sym_list) + + return LanguageData( + langs, + defs, + build_version, + combined_sym_list, + sym_lists_by_font, + FontMap(font12_map, font06_map), ) @@ -524,12 +620,15 @@ def write_language( strings_bin: Optional[bytes] = None, compress_font: bool = False, ) -> None: - lang = data.lang + if len(data.langs) > 1: + raise ValueError("More than 1 languages are provided") + lang = data.langs[0] defs = data.defs build_version = data.build_version sym_list = data.sym_list font_map = data.font_map - symbol_conversion_table = data.symbol_conversion_table + + symbol_conversion_table = build_symbol_conversion_map(sym_list) language_code: str = lang["languageCode"] logging.info(f"Generating block for {language_code}") @@ -540,7 +639,7 @@ def write_language( lang_name = language_code if strings_bin or compress_font: - f.write('#include "lzfx.h"\n') + f.write('#include "brieflz.h"\n') f.write(f"\n// ---- {lang_name} ----\n\n") @@ -549,19 +648,44 @@ def write_language( sym_list, font_map, symbol_conversion_table ) f.write(font_table_text) + f.write( + "const FontSection FontSectionsData[] = {\n" + " {\n" + " .symbol_start = 2,\n" + f" .symbol_end = {len(sym_list) + 2},\n" + " .font12_start_ptr = USER_FONT_12,\n" + " .font06_start_ptr = USER_FONT_6x8,\n" + " },\n" + "};\n" + "const FontSection *const FontSections = FontSectionsData;\n" + "const uint8_t FontSectionsCount = sizeof(FontSectionsData) / sizeof(FontSectionsData[0]);\n" + ) else: font12_uncompressed = bytearray() for sym in sym_list: font12_uncompressed.extend(font_map.font12[sym]) - font12_compressed = lzfx.compress(bytes(font12_uncompressed)) + font12_compressed = brieflz.compress(bytes(font12_uncompressed)) logging.info( f"Font table 12x16 compressed from {len(font12_uncompressed)} to {len(font12_compressed)} bytes (ratio {len(font12_compressed) / len(font12_uncompressed):.3})" ) - write_bytes_as_c_array(f, "font_12x16_lzfx", font12_compressed) + write_bytes_as_c_array(f, "font_12x16_brieflz", font12_compressed) font_table_text = make_font_table_06_cpp( sym_list, font_map, symbol_conversion_table ) f.write(font_table_text) + f.write( + f"static uint8_t font_out_buffer[{len(font12_uncompressed)}];\n" + "const FontSection FontSectionsData[] = {\n" + " {\n" + " .symbol_start = 2,\n" + f" .symbol_end = {len(sym_list) + 2},\n" + " .font12_start_ptr = font_out_buffer,\n" + " .font06_start_ptr = USER_FONT_6x8,\n" + " },\n" + "};\n" + "const FontSection *const FontSections = FontSectionsData;\n" + "const uint8_t FontSectionsCount = sizeof(FontSectionsData) / sizeof(FontSectionsData[0]);\n" + ) f.write(f"\n// ---- {lang_name} ----\n\n") @@ -573,49 +697,38 @@ def write_language( f"const bool HasFahrenheit = {('true' if lang.get('tempUnitFahrenheit', True) else 'false')};\n\n" ) - if not compress_font: - f.write("extern const uint8_t *const Font_12x16 = USER_FONT_12;\n") - else: - f.write( - f"static uint8_t font_out_buffer[{len(font12_uncompressed)}];\n\n" - "extern const uint8_t *const Font_12x16 = font_out_buffer;\n" - ) - f.write("extern const uint8_t *const Font_6x8 = USER_FONT_6x8;\n\n") - if not strings_bin: translation_strings_and_indices_text = get_translation_strings_and_indices_text( lang, defs, symbol_conversion_table ) f.write(translation_strings_and_indices_text) f.write( - "const TranslationIndexTable *const Tr = &TranslationIndices;\n" - "const char *const TranslationStrings = TranslationStringsData;\n\n" + "const TranslationIndexTable *Tr = &translation.indices;\n" + "const char *TranslationStrings = translation.strings;\n\n" ) else: - compressed = lzfx.compress(strings_bin) + compressed = brieflz.compress(strings_bin) logging.info( f"Strings compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})" ) - write_bytes_as_c_array(f, "translation_data_lzfx", compressed) + write_bytes_as_c_array(f, "translation_data_brieflz", compressed) f.write( f"static uint8_t translation_data_out_buffer[{len(strings_bin)}] __attribute__((__aligned__(2)));\n\n" - "const TranslationIndexTable *const Tr = reinterpret_cast(translation_data_out_buffer);\n" - "const char *const TranslationStrings = reinterpret_cast(translation_data_out_buffer) + sizeof(TranslationIndexTable);\n\n" + "const TranslationIndexTable *Tr = reinterpret_cast(translation_data_out_buffer);\n" + "const char *TranslationStrings = reinterpret_cast(translation_data_out_buffer) + sizeof(TranslationIndexTable);\n\n" ) if not strings_bin and not compress_font: f.write("void prepareTranslations() {}\n\n") else: - f.write("void prepareTranslations() {\n" " unsigned int outsize;\n") + f.write("void prepareTranslations() {\n") if compress_font: f.write( - " outsize = sizeof(font_out_buffer);\n" - " lzfx_decompress(font_12x16_lzfx, sizeof(font_12x16_lzfx), font_out_buffer, &outsize);\n" + " blz_depack_srcsize(font_12x16_brieflz, font_out_buffer, sizeof(font_12x16_brieflz));\n" ) if strings_bin: f.write( - " outsize = sizeof(translation_data_out_buffer);\n" - " lzfx_decompress(translation_data_lzfx, sizeof(translation_data_lzfx), translation_data_out_buffer, &outsize);\n" + " blz_depack_srcsize(translation_data_brieflz, translation_data_out_buffer, sizeof(translation_data_brieflz));\n" ) f.write("}\n\n") @@ -623,6 +736,188 @@ def write_language( f.write(sanity_checks_text) +def write_languages( + data: LanguageData, + f: TextIO, + strings_obj_path: Optional[str] = None, + compress_font: bool = False, +) -> None: + defs = data.defs + build_version = data.build_version + combined_sym_list = data.sym_list + sym_lists_by_font = data.sym_lists_by_font + font_map = data.font_map + + symbol_conversion_table = build_symbol_conversion_map(combined_sym_list) + + language_codes: List[str] = [lang["languageCode"] for lang in data.langs] + logging.info(f"Generating block for {language_codes}") + + lang_names = [ + lang.get("languageLocalName", lang["languageCode"]) for lang in data.langs + ] + + f.write('#include "Translation_multi.h"') + + f.write(f"\n// ---- {lang_names} ----\n\n") + + max_decompressed_font_size = 0 + if not compress_font: + font_table_text = "" + font_section_info_text = ( + "const FontSectionDataInfo FontSectionDataInfos[] = {\n" + ) + for font, current_sym_list in sym_lists_by_font.items(): + font_table_text += f"const uint8_t font_table_data_{font}[] = {{\n" + font_table_text += "// 12x16:\n" + font_table_text += make_font_table_named_cpp( + None, + current_sym_list, + font_map.font12, + symbol_conversion_table, + ) + if font != font_tables.NAME_CJK: + font_table_text += "// 6x8:\n" + font_table_text += make_font_table_named_cpp( + None, + current_sym_list, + font_map.font06, # type: ignore[arg-type] + symbol_conversion_table, + ) + font_table_text += f"}}; // font_table_data_{font}\n" + current_sym_start = combined_sym_list.index(current_sym_list[0]) + 2 + font_section_info_text += ( + " {\n" + f" .symbol_start = {current_sym_start},\n" + f" .symbol_count = {len(current_sym_list)},\n" + f" .data_size = sizeof(font_table_data_{font}),\n" + " .data_is_compressed = false,\n" + f" .data_ptr = font_table_data_{font},\n" + " },\n" + ) + + f.write(font_table_text) + font_section_info_text += ( + "};\n" + "const uint8_t FontSectionDataCount = sizeof(FontSectionDataInfos) / sizeof(FontSectionDataInfos[0]);\n\n" + ) + f.write(font_section_info_text) + f.write( + "FontSection DynamicFontSections[4] = {};\n" + "const FontSection *const FontSections = DynamicFontSections;\n" + "const uint8_t FontSectionsCount = sizeof(DynamicFontSections) / sizeof(DynamicFontSections[0]);\n" + ) + else: + font_section_info_text = ( + "const FontSectionDataInfo FontSectionDataInfos[] = {\n" + ) + for font, current_sym_list in sym_lists_by_font.items(): + current_sym_start = combined_sym_list.index(current_sym_list[0]) + 2 + font_uncompressed = bytearray() + for sym in current_sym_list: + font_uncompressed.extend(font_map.font12[sym]) + if font != font_tables.NAME_CJK: + for sym in current_sym_list: + font_uncompressed.extend(font_map.font06[sym]) # type: ignore[arg-type] + font_compressed = brieflz.compress(bytes(font_uncompressed)) + logging.info( + f"Font table for {font} compressed from {len(font_uncompressed)} to {len(font_compressed)} bytes (ratio {len(font_compressed) / len(font_uncompressed):.3})" + ) + max_decompressed_font_size += len(font_uncompressed) + write_bytes_as_c_array(f, f"font_data_brieflz_{font}", font_compressed) + font_section_info_text += ( + " {\n" + f" .symbol_start = {current_sym_start},\n" + f" .symbol_count = {len(current_sym_list)},\n" + f" .data_size = sizeof(font_data_brieflz_{font}),\n" + " .data_is_compressed = true,\n" + f" .data_ptr = font_data_brieflz_{font},\n" + " },\n" + ) + font_section_info_text += ( + "};\n" + "const uint8_t FontSectionDataCount = sizeof(FontSectionDataInfos) / sizeof(FontSectionDataInfos[0]);\n\n" + ) + f.write(font_section_info_text) + f.write( + "FontSection DynamicFontSections[4] = {};\n" + "const FontSection *const FontSections = DynamicFontSections;\n" + "const uint8_t FontSectionsCount = sizeof(DynamicFontSections) / sizeof(DynamicFontSections[0]);\n" + ) + + f.write(f"\n// ---- {lang_names} ----\n\n") + + translation_common_text = get_translation_common_text( + defs, symbol_conversion_table, build_version + ) + f.write(translation_common_text) + f.write( + f"const bool HasFahrenheit = {('true' if any([lang.get('tempUnitFahrenheit', True) for lang in data.langs]) else 'false')};\n\n" + ) + + max_decompressed_translation_size = 0 + if not strings_obj_path: + for lang in data.langs: + lang_code = lang["languageCode"] + translation_strings_and_indices_text = ( + get_translation_strings_and_indices_text( + lang, defs, symbol_conversion_table, suffix=f"_{lang_code}" + ) + ) + f.write(translation_strings_and_indices_text) + f.write("const LanguageMeta LanguageMetas[] = {\n") + for lang in data.langs: + lang_code = lang["languageCode"] + f.write( + " {\n" + # NOTE: Cannot specify C99 designator here due to GCC (g++) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227 + f' /* .code = */ "{lang_code}",\n' + f" .translation_data = reinterpret_cast(&translation_{lang_code}),\n" + f" .translation_size = sizeof(translation_{lang_code}),\n" + f" .translation_is_compressed = false,\n" + " },\n" + ) + f.write("};\n") + else: + for lang in data.langs: + lang_code = lang["languageCode"] + sym_name = objcopy.cpp_var_to_section_name(f"translation_{lang_code}") + strings_bin = objcopy.get_binary_from_obj(strings_obj_path, sym_name) + if len(strings_bin) == 0: + raise ValueError(f"Output for {sym_name} is empty") + max_decompressed_translation_size = max( + max_decompressed_translation_size, len(strings_bin) + ) + compressed = brieflz.compress(strings_bin) + logging.info( + f"Strings for {lang_code} compressed from {len(strings_bin)} to {len(compressed)} bytes (ratio {len(compressed) / len(strings_bin):.3})" + ) + write_bytes_as_c_array( + f, f"translation_data_brieflz_{lang_code}", compressed + ) + f.write("const LanguageMeta LanguageMetas[] = {\n") + for lang in data.langs: + lang_code = lang["languageCode"] + f.write( + " {\n" + # NOTE: Cannot specify C99 designator here due to GCC (g++) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227 + f' /* .code = */ "{lang_code}",\n' + f" .translation_data = translation_data_brieflz_{lang_code},\n" + f" .translation_size = sizeof(translation_data_brieflz_{lang_code}),\n" + f" .translation_is_compressed = true,\n" + " },\n" + ) + f.write("};\n") + f.write( + "const uint8_t LanguageCount = sizeof(LanguageMetas) / sizeof(LanguageMetas[0]);\n\n" + f"alignas(TranslationData) uint8_t translation_data_out_buffer[{max_decompressed_translation_size + max_decompressed_font_size}];\n" + "const uint16_t translation_data_out_buffer_size = sizeof(translation_data_out_buffer);\n\n" + ) + + sanity_checks_text = get_translation_sanity_checks_text(defs) + f.write(sanity_checks_text) + + def get_translation_common_text( defs: dict, symbol_conversion_table: Dict[str, bytes], build_version ) -> str: @@ -652,7 +947,7 @@ class TranslationItem: def get_translation_strings_and_indices_text( - lang: dict, defs: dict, symbol_conversion_table: Dict[str, bytes] + lang: dict, defs: dict, symbol_conversion_table: Dict[str, bytes], suffix: str = "" ) -> str: str_table: List[str] = [] str_group_messages: List[TranslationItem] = [] @@ -780,6 +1075,8 @@ def get_translation_strings_and_indices_text( j = i while backward_sorted_table[j + 1][2].startswith(converted): j += 1 + if j + 1 == len(backward_sorted_table): + break if j != i: str_remapping[str_index] = RemappedTranslationItem( str_index=backward_sorted_table[j][0], @@ -790,7 +1087,8 @@ def get_translation_strings_and_indices_text( str_offsets = [-1] * len(str_table) offset = 0 write_null = False - translation_strings_text = "const char TranslationStringsData[] = {\n" + # NOTE: Cannot specify C99 designator here due to GCC (g++) bug: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=55227 + translation_strings_text = " /* .strings = */ {\n" for i, source_str in enumerate(str_table): if str_remapping[i] is not None: continue @@ -814,33 +1112,37 @@ def get_translation_strings_and_indices_text( for item in group: if item.str_index == j: translation_strings_text += ( - f" // - {pre_info} {item.info}\n" + f" // - {pre_info} {item.info}\n" ) if j == i: - translation_strings_text += f" // {offset: >4}: {escape(source_str)}\n" + translation_strings_text += ( + f" // {offset: >4}: {escape(source_str)}\n" + ) str_offsets[j] = offset else: remapped = str_remapping[j] assert remapped is not None - translation_strings_text += f" // {offset + remapped.str_start_offset: >4}: {escape(str_table[j])}\n" + translation_strings_text += f" // {offset + remapped.str_start_offset: >4}: {escape(str_table[j])}\n" str_offsets[j] = offset + remapped.str_start_offset converted_bytes = convert_string_bytes(symbol_conversion_table, source_str) - translation_strings_text += f' "{bytes_to_escaped(converted_bytes)}"' + translation_strings_text += f' "{bytes_to_escaped(converted_bytes)}"' str_offsets[i] = offset # Add the length and the null terminator offset += len(converted_bytes) + 1 - translation_strings_text += "\n}; // TranslationStringsData\n\n" + translation_strings_text += "\n }, // .strings\n\n" + + str_total_bytes = offset def get_offset(idx: int) -> int: assert str_offsets[idx] >= 0 return str_offsets[idx] - translation_indices_text = "const TranslationIndexTable TranslationIndices = {\n" + translation_indices_text = " .indices = {\n" # ----- Write the messages string indices: for group in [str_group_messages, str_group_messageswarn, str_group_characters]: for item in group: - translation_indices_text += f" .{item.info} = {get_offset(item.str_index)}, // {escape(str_table[item.str_index])}\n" + translation_indices_text += f" .{item.info} = {get_offset(item.str_index)}, // {escape(str_table[item.str_index])}\n" translation_indices_text += "\n" # ----- Write the settings index tables: @@ -851,14 +1153,22 @@ def get_translation_strings_and_indices_text( (str_group_settingmenuentriesdesc, "SettingsMenuEntriesDescriptions"), ]: max_len = 30 - translation_indices_text += f" .{name} = {{\n" + translation_indices_text += f" .{name} = {{\n" for item in group: - translation_indices_text += f" /* {item.info.ljust(max_len)[:max_len]} */ {get_offset(item.str_index)}, // {escape(str_table[item.str_index])}\n" - translation_indices_text += f" }}, // {name}\n\n" + translation_indices_text += f" /* {item.info.ljust(max_len)[:max_len]} */ {get_offset(item.str_index)}, // {escape(str_table[item.str_index])}\n" + translation_indices_text += f" }}, // {name}\n\n" - translation_indices_text += "}; // TranslationIndices\n\n" + translation_indices_text += " }, // .indices\n\n" - return translation_strings_text + translation_indices_text + return ( + "struct {\n" + " TranslationIndexTable indices;\n" + f" char strings[{str_total_bytes}];\n" + f"}} const translation{suffix} = {{\n" + + translation_indices_text + + translation_strings_text + + f"}}; // translation{suffix}\n\n" + ) def get_translation_sanity_checks_text(defs: dict) -> str: @@ -904,11 +1214,11 @@ def parse_args() -> argparse.Namespace: dest="input_pickled", ) parser.add_argument( - "--strings-bin", - help="Use generated TranslationIndices + TranslationStrings data and compress them", + "--strings-obj", + help="Use generated TranslationData by extracting from object file", type=argparse.FileType("rb"), required=False, - dest="strings_bin", + dest="strings_obj", ) parser.add_argument( "--compress-font", @@ -920,7 +1230,12 @@ def parse_args() -> argparse.Namespace: parser.add_argument( "--output", "-o", help="Target file", type=argparse.FileType("w"), required=True ) - parser.add_argument("languageCode", help="Language to generate") + parser.add_argument( + "languageCodes", + metavar="languageCode", + nargs="+", + help="Language(s) to generate", + ) return parser.parse_args() @@ -936,12 +1251,13 @@ def main() -> None: if args.input_pickled: logging.info(f"Reading pickled language data from {args.input_pickled.name}...") language_data = pickle.load(args.input_pickled) - if language_data.lang["languageCode"] != args.languageCode: + language_codes = [lang["languageCode"] for lang in language_data.langs] + if language_codes != args.languageCodes: logging.error( - f"error: languageCode {args.languageCode} does not match language data {language_data.lang['languageCode']}" + f"error: languageCode {args.languageCode} does not match language data {language_codes}" ) sys.exit(1) - logging.info(f"Read language data for {language_data.lang['languageCode']}") + logging.info(f"Read language data for {language_codes}") logging.info(f"Build version: {language_data.build_version}") else: try: @@ -951,23 +1267,45 @@ def main() -> None: sys.exit(1) logging.info(f"Build version: {build_version}") - logging.info(f"Making {args.languageCode} from {json_dir}") + logging.info(f"Making {args.languageCodes} from {json_dir}") - lang_ = read_translation(json_dir, args.languageCode) defs_ = load_json(os.path.join(json_dir, "translations_def.js"), True) - language_data = prepare_language(lang_, defs_, build_version) + if len(args.languageCodes) == 1: + lang_ = read_translation(json_dir, args.languageCodes[0]) + language_data = prepare_language(lang_, defs_, build_version) + else: + langs_ = [ + read_translation(json_dir, lang_code) + for lang_code in args.languageCodes + ] + language_data = prepare_languages(langs_, defs_, build_version) out_ = args.output write_start(out_) - if args.strings_bin: - write_language( - language_data, - out_, - args.strings_bin.read(), - compress_font=args.compress_font, - ) + if len(language_data.langs) == 1: + if args.strings_obj: + sym_name = objcopy.cpp_var_to_section_name("translation") + strings_bin = objcopy.get_binary_from_obj(args.strings_obj.name, sym_name) + if len(strings_bin) == 0: + raise ValueError(f"Output for {sym_name} is empty") + write_language( + language_data, + out_, + strings_bin=strings_bin, + compress_font=args.compress_font, + ) + else: + write_language(language_data, out_, compress_font=args.compress_font) else: - write_language(language_data, out_, compress_font=args.compress_font) + if args.strings_obj: + write_languages( + language_data, + out_, + strings_obj_path=args.strings_obj.name, + compress_font=args.compress_font, + ) + else: + write_languages(language_data, out_, compress_font=args.compress_font) if args.output_pickled: logging.info(f"Writing pickled data to {args.output_pickled.name}") diff --git a/Translations/objcopy.py b/Translations/objcopy.py new file mode 100644 index 00000000..be0cc409 --- /dev/null +++ b/Translations/objcopy.py @@ -0,0 +1,25 @@ +import os +import subprocess +import tempfile + + +if "OBJCOPY" in os.environ: + OBJCOPY = os.environ["OBJCOPY"] +else: + OBJCOPY = "objcopy" + + +def get_binary_from_obj(objfile_path: str, section_name: str) -> bytes: + tmpfd, tmpfile = tempfile.mkstemp() + result = subprocess.run( + [OBJCOPY, "-O", "binary", "-j", section_name, objfile_path, tmpfile] + ) + result.check_returncode() + with open(tmpfd, "rb") as f: + bin: bytes = f.read() + os.remove(tmpfile) + return bin + + +def cpp_var_to_section_name(var_name: str) -> str: + return f".rodata._ZL{len(var_name)}{var_name}" diff --git a/Translations/translation_BG.json b/Translations/translation_BG.json index 5c285352..0d746668 100644 --- a/Translations/translation_BG.json +++ b/Translations/translation_BG.json @@ -308,6 +308,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " BG Български" + ], + "desc": "" } } } diff --git a/Translations/translation_CS.json b/Translations/translation_CS.json index 645994f8..56811025 100644 --- a/Translations/translation_CS.json +++ b/Translations/translation_CS.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " CS Český" + ], + "desc": "" } } } diff --git a/Translations/translation_DA.json b/Translations/translation_DA.json index 1541e74b..88c9ec3a 100644 --- a/Translations/translation_DA.json +++ b/Translations/translation_DA.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " DA Dansk" + ], + "desc": "" } } } diff --git a/Translations/translation_DE.json b/Translations/translation_DE.json index efd46499..703e2b16 100644 --- a/Translations/translation_DE.json +++ b/Translations/translation_DE.json @@ -306,6 +306,13 @@ "Dauer" ], "desc": "Dauer des Wachhalteimpulses (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " DE Deutsch" + ], + "desc": "" } } } diff --git a/Translations/translation_EN.json b/Translations/translation_EN.json index d5b67ae4..a17d886c 100644 --- a/Translations/translation_EN.json +++ b/Translations/translation_EN.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " EN English" + ], + "desc": "" } } } diff --git a/Translations/translation_ES.json b/Translations/translation_ES.json index 90afeaa0..69f65d2e 100644 --- a/Translations/translation_ES.json +++ b/Translations/translation_ES.json @@ -306,6 +306,13 @@ "duración" ], "desc": "Duración del impulso de mantenimiento de la vigilia (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " ES Castellano" + ], + "desc": "" } } } diff --git a/Translations/translation_FI.json b/Translations/translation_FI.json index f4ebef8f..5d67d61d 100644 --- a/Translations/translation_FI.json +++ b/Translations/translation_FI.json @@ -302,6 +302,13 @@ "kesto" ], "desc": "Herätyspulssin kesto (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " FI Suomi" + ], + "desc": "" } } } diff --git a/Translations/translation_FR.json b/Translations/translation_FR.json index 1a1cf1f7..fef83c9c 100644 --- a/Translations/translation_FR.json +++ b/Translations/translation_FR.json @@ -305,6 +305,13 @@ "impulsions" ], "desc": "Durée des impulsions pour empêcher la mise en veille (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " FR Français" + ], + "desc": "" } } } diff --git a/Translations/translation_HR.json b/Translations/translation_HR.json index a488a994..a6e1deaa 100644 --- a/Translations/translation_HR.json +++ b/Translations/translation_HR.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " HR Hrvatski" + ], + "desc": "" } } } diff --git a/Translations/translation_HU.json b/Translations/translation_HU.json index 13d2e0ed..427a5c54 100644 --- a/Translations/translation_HU.json +++ b/Translations/translation_HU.json @@ -308,6 +308,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " HU Magyar" + ], + "desc": "" } } } diff --git a/Translations/translation_IT.json b/Translations/translation_IT.json index dfcebc61..45d9cc65 100644 --- a/Translations/translation_IT.json +++ b/Translations/translation_IT.json @@ -305,6 +305,13 @@ "impulso" ], "desc": "Regola la durata dell'«impulso sveglia» [multipli di 250 ms]" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " IT Italiano" + ], + "desc": "" } } } diff --git a/Translations/translation_JA_JP.json b/Translations/translation_JA_JP.json index 20bde0dd..3dc612ce 100644 --- a/Translations/translation_JA_JP.json +++ b/Translations/translation_JA_JP.json @@ -201,6 +201,10 @@ "PowerPulseDuration": { "text2": "パルス時間長", "desc": "電源供給元をオンに保つために使用される、電力パルスの時間長 " + }, + "LanguageSwitch": { + "text2": "言語: 日本語", + "desc": "" } } } diff --git a/Translations/translation_LT.json b/Translations/translation_LT.json index aba3520d..8eda6656 100644 --- a/Translations/translation_LT.json +++ b/Translations/translation_LT.json @@ -308,6 +308,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " LT Lietuvių" + ], + "desc": "" } } } diff --git a/Translations/translation_NL.json b/Translations/translation_NL.json index 5d4d5f6a..85060eab 100644 --- a/Translations/translation_NL.json +++ b/Translations/translation_NL.json @@ -314,6 +314,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " NL Nederlands" + ], + "desc": "" } } } diff --git a/Translations/translation_NL_BE.json b/Translations/translation_NL_BE.json index 00ae61bf..9aafcead 100644 --- a/Translations/translation_NL_BE.json +++ b/Translations/translation_NL_BE.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " NL_BE Vlaams" + ], + "desc": "" } } } diff --git a/Translations/translation_NO.json b/Translations/translation_NO.json index 90401145..c126f69b 100644 --- a/Translations/translation_NO.json +++ b/Translations/translation_NO.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " NO Norsk" + ], + "desc": "" } } } diff --git a/Translations/translation_PL.json b/Translations/translation_PL.json index 4159e226..c9af631a 100644 --- a/Translations/translation_PL.json +++ b/Translations/translation_PL.json @@ -306,6 +306,13 @@ "impulsu mocy" ], "desc": "Długość impulsu mocy zapobiegającego usypianiu powerbanku (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " PL Polski" + ], + "desc": "" } } } diff --git a/Translations/translation_PT.json b/Translations/translation_PT.json index 06b51d37..7bbcc7ea 100644 --- a/Translations/translation_PT.json +++ b/Translations/translation_PT.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " PT Português" + ], + "desc": "" } } } diff --git a/Translations/translation_RU.json b/Translations/translation_RU.json index b3de0c81..1827e672 100644 --- a/Translations/translation_RU.json +++ b/Translations/translation_RU.json @@ -306,6 +306,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " RU Русский" + ], + "desc": "" } } } diff --git a/Translations/translation_SK.json b/Translations/translation_SK.json index 56e9e19d..05a1624c 100644 --- a/Translations/translation_SK.json +++ b/Translations/translation_SK.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SK Slovenčina" + ], + "desc": "" } } } diff --git a/Translations/translation_SL.json b/Translations/translation_SL.json index 622fb026..c964b03a 100644 --- a/Translations/translation_SL.json +++ b/Translations/translation_SL.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SL Slovenščina" + ], + "desc": "" } } } diff --git a/Translations/translation_SR_CYRL.json b/Translations/translation_SR_CYRL.json index 4c0eb951..84fe1352 100644 --- a/Translations/translation_SR_CYRL.json +++ b/Translations/translation_SR_CYRL.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SR Српски" + ], + "desc": "" } } } diff --git a/Translations/translation_SR_LATN.json b/Translations/translation_SR_LATN.json index d5192fe7..07cf7856 100644 --- a/Translations/translation_SR_LATN.json +++ b/Translations/translation_SR_LATN.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SR Srpski" + ], + "desc": "" } } } diff --git a/Translations/translation_SV.json b/Translations/translation_SV.json index b30449c2..cd66de4c 100644 --- a/Translations/translation_SV.json +++ b/Translations/translation_SV.json @@ -305,6 +305,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " SV Svenska" + ], + "desc": "" } } } diff --git a/Translations/translation_TR.json b/Translations/translation_TR.json index 50c3a01f..d95961d7 100644 --- a/Translations/translation_TR.json +++ b/Translations/translation_TR.json @@ -329,6 +329,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " TR Türkçe" + ], + "desc": "" } } } diff --git a/Translations/translation_UK.json b/Translations/translation_UK.json index fbd44a3a..5ec0021b 100644 --- a/Translations/translation_UK.json +++ b/Translations/translation_UK.json @@ -306,6 +306,13 @@ "duration" ], "desc": "Keep-awake-pulse duration (x 250ms)" + }, + "LanguageSwitch": { + "text2": [ + "Language:", + " UK Українська" + ], + "desc": "" } } } diff --git a/Translations/translation_YUE_HK.json b/Translations/translation_YUE_HK.json index 19af3103..0bfd24e0 100644 --- a/Translations/translation_YUE_HK.json +++ b/Translations/translation_YUE_HK.json @@ -201,6 +201,10 @@ "PowerPulseDuration": { "text2": "電源脈衝時長", "desc": "為保持電源喚醒,每次通電脈衝嘅時間長度 " + }, + "LanguageSwitch": { + "text2": "語言: 廣東話", + "desc": "" } } } diff --git a/Translations/translation_ZH_CN.json b/Translations/translation_ZH_CN.json index 5ef42239..7a024672 100644 --- a/Translations/translation_ZH_CN.json +++ b/Translations/translation_ZH_CN.json @@ -201,6 +201,10 @@ "PowerPulseDuration": { "text2": "电源脉冲时长", "desc": "为保持电源唤醒,每次通电脉冲的时间长度 " + }, + "LanguageSwitch": { + "text2": "语言:简体中文", + "desc": "" } } } diff --git a/Translations/translation_ZH_TW.json b/Translations/translation_ZH_TW.json index 5d9aed5f..84f81437 100644 --- a/Translations/translation_ZH_TW.json +++ b/Translations/translation_ZH_TW.json @@ -201,6 +201,10 @@ "PowerPulseDuration": { "text2": "電源脈衝時長", "desc": "為保持電源喚醒,每次通電脈衝的時間長度 " + }, + "LanguageSwitch": { + "text2": "語言:正體中文", + "desc": "" } } } diff --git a/Translations/translations_def.js b/Translations/translations_def.js index 2bbdb9d0..6d0f206e 100644 --- a/Translations/translations_def.js +++ b/Translations/translations_def.js @@ -339,6 +339,11 @@ var def = "id": "PowerPulseDuration", "maxLen": 6, "maxLen2": 13 + }, + { + "id": "LanguageSwitch", + "maxLen": 7, + "maxLen2": 15 } ] } diff --git a/source/Core/BSP/Pine64/Vendor/SoC/gd32vf103/Board/pinecil/Source/GCC/gcc_gd32vf103_flashxip.ld b/source/Core/BSP/Pine64/Vendor/SoC/gd32vf103/Board/pinecil/Source/GCC/gcc_gd32vf103_flashxip.ld index 819ae05f..710bbdd8 100644 --- a/source/Core/BSP/Pine64/Vendor/SoC/gd32vf103/Board/pinecil/Source/GCC/gcc_gd32vf103_flashxip.ld +++ b/source/Core/BSP/Pine64/Vendor/SoC/gd32vf103/Board/pinecil/Source/GCC/gcc_gd32vf103_flashxip.ld @@ -50,7 +50,7 @@ __ILM_RAM_SIZE = 0x00010000; * */ __RAM_BASE = 0x20000000; -__RAM_SIZE = 0x00005000; +__RAM_SIZE = 0x00006800; /********************* Stack / Heap Configuration **************************** * Stack / Heap Configuration diff --git a/source/Core/Drivers/OLED.cpp b/source/Core/Drivers/OLED.cpp index f1e057bc..0555f838 100644 --- a/source/Core/Drivers/OLED.cpp +++ b/source/Core/Drivers/OLED.cpp @@ -113,35 +113,49 @@ void OLED::setFramebuffer(uint8_t *buffer) { * Precursor is the command char that is used to select the table. */ void OLED::drawChar(const uint16_t charCode, const FontStyle fontStyle) { + const uint8_t *currentFont; static uint8_t fontWidth, fontHeight; + uint16_t index; switch (fontStyle) { - case FontStyle::SMALL: - currentFont = Font_6x8; - fontHeight = 8; - fontWidth = 6; - break; case FontStyle::EXTRAS: currentFont = ExtraFontChars; + index = charCode; fontHeight = 16; fontWidth = 12; break; + case FontStyle::SMALL: case FontStyle::LARGE: default: - currentFont = Font_12x16; - fontHeight = 16; - fontWidth = 12; + if (charCode == '\x01' && cursor_y == 0) { // 0x01 is used as new line char + setCursor(0, 8); + return; + } else if (charCode <= 0x01) { + return; + } + currentFont = nullptr; + index = 0; + switch (fontStyle) { + case FontStyle::SMALL: + fontHeight = 8; + fontWidth = 6; + break; + case FontStyle::LARGE: + default: + fontHeight = 16; + fontWidth = 12; + break; + } + for (uint32_t i = 0; i < FontSectionsCount; i++) { + const auto §ion = FontSections[i]; + if (charCode >= section.symbol_start && charCode < section.symbol_end) { + currentFont = fontStyle == FontStyle::SMALL ? section.font06_start_ptr : section.font12_start_ptr; + index = charCode - section.symbol_start; + break; + } + } break; } - - if (charCode == '\x01' && cursor_y == 0) { // 0x01 is used as new line char - setCursor(0, 8); - return; - } else if (charCode <= 0x01) { - return; - } - // First index is \x02 - const uint16_t index = charCode - 2; const uint8_t *charPointer = currentFont + ((fontWidth * (fontHeight / 8)) * index); drawArea(cursor_x, cursor_y, fontWidth, fontHeight, charPointer); cursor_x += fontWidth; @@ -348,7 +362,7 @@ void OLED::debugNumber(int32_t val, FontStyle fontStyle) { void OLED::drawSymbol(uint8_t symbolID) { // draw a symbol to the current cursor location - drawChar(symbolID + 2, FontStyle::EXTRAS); + drawChar(symbolID, FontStyle::EXTRAS); } // Draw an area, but y must be aligned on 0/8 offset diff --git a/source/Core/Inc/Settings.h b/source/Core/Inc/Settings.h index 1a854366..45e3bb20 100644 --- a/source/Core/Inc/Settings.h +++ b/source/Core/Inc/Settings.h @@ -10,7 +10,7 @@ #ifndef SETTINGS_H_ #define SETTINGS_H_ #include -#define SETTINGSVERSION (0x29) +#define SETTINGSVERSION (0x2A) /*Change this if you change the struct below to prevent people getting \ out of sync*/ @@ -59,6 +59,7 @@ typedef struct { uint8_t hallEffectSensitivity; // Operating mode of the hall effect sensor uint8_t accelMissingWarningCounter; // Counter of how many times we have warned we cannot detect the accelerometer uint8_t pdMissingWarningCounter; // Counter of how many times we have warned we cannot detect the pd interface + char uiLanguage[8]; // Selected UI Language code, null-terminated *only if* the length is less than 8 chars uint32_t padding; // This is here for in case we are not an even divisor so // that nothing gets cut off diff --git a/source/Core/Inc/Translation.h b/source/Core/Inc/Translation.h index 62327d7f..84e88844 100644 --- a/source/Core/Inc/Translation.h +++ b/source/Core/Inc/Translation.h @@ -57,6 +57,7 @@ enum class SettingsItemIndex : uint8_t { AnimSpeed, PowerPulseWait, PowerPulseDuration, + LanguageSwitch, NUM_ITEMS, }; @@ -110,11 +111,28 @@ struct TranslationIndexTable { uint16_t SettingsMenuEntriesDescriptions[5]; // unused }; -extern const TranslationIndexTable *const Tr; -extern const char *const TranslationStrings; +extern const TranslationIndexTable *Tr; +extern const char * TranslationStrings; -extern const uint8_t *const Font_12x16; -extern const uint8_t *const Font_6x8; +struct TranslationData { + TranslationIndexTable indices; + // Translation strings follows the translation index table. + // C++ does not support flexible array member as in C, so we use a 1-element + // array as a placeholder. + char strings[1]; +}; + +struct FontSection { + /// Start index of font section, inclusive + uint16_t symbol_start; + /// End index of font section, exclusive + uint16_t symbol_end; + const uint8_t *font12_start_ptr; + const uint8_t *font06_start_ptr; +}; + +extern const FontSection *const FontSections; +extern const uint8_t FontSectionsCount; constexpr uint8_t settings_item_index(const SettingsItemIndex i) { return static_cast(i); } // Use a constexpr function for type-checking. @@ -123,5 +141,7 @@ constexpr uint8_t settings_item_index(const SettingsItemIndex i) { return static const char *translatedString(uint16_t index); void prepareTranslations(); +bool settings_displayLanguageSwitch(void); +bool settings_setLanguageSwitch(void); #endif /* TRANSLATION_H_ */ diff --git a/source/Core/Inc/Translation_multi.h b/source/Core/Inc/Translation_multi.h new file mode 100644 index 00000000..bd6b2528 --- /dev/null +++ b/source/Core/Inc/Translation_multi.h @@ -0,0 +1,42 @@ +#ifndef TRANSLATION_MULTI_H_ +#define TRANSLATION_MULTI_H_ + +#include "Translation.h" + +// The compressed translation data will be decompressed to this buffer. These +// data may include: +// - TranslationData (translation index table and translation strings) +// - Font table(s) +// The translation index table consists of uint16_t (half words) which has a +// 2-byte alignment. Therefore, the declaration of this buffer must include +// the alignment specifier `alignas(TranslationData)` to satisfy its alignment. +// TranslationData must always be decompressed to the start of this buffer. +extern uint8_t translation_data_out_buffer[]; +extern const uint16_t translation_data_out_buffer_size; + +struct FontSectionDataInfo { + uint16_t symbol_start; + uint16_t symbol_count; + uint16_t data_size : 15; + bool data_is_compressed : 1; + + // Font12x16 data followed by font6x8 data + const uint8_t *data_ptr; +}; + +extern const FontSectionDataInfo FontSectionDataInfos[]; +extern const uint8_t FontSectionDataCount; + +extern FontSection DynamicFontSections[]; + +struct LanguageMeta { + char code[8]; + const uint8_t *translation_data; + uint16_t translation_size : 15; + bool translation_is_compressed : 1; +}; + +extern const LanguageMeta LanguageMetas[]; +extern const uint8_t LanguageCount; + +#endif /* TRANSLATION_MULTI_H_ */ diff --git a/source/Core/LangSupport/lang_multi.cpp b/source/Core/LangSupport/lang_multi.cpp new file mode 100644 index 00000000..13d7cd89 --- /dev/null +++ b/source/Core/LangSupport/lang_multi.cpp @@ -0,0 +1,92 @@ +#include "OLED.hpp" +#include "Translation.h" +#include "Translation_multi.h" +#include "brieflz.h" +#include "configuration.h" +#include "gui.hpp" + +const TranslationIndexTable *Tr = nullptr; +const char * TranslationStrings = nullptr; + +static uint8_t selectedLangIndex = 255; + +static void initSelectedLanguageIndex() { + if (selectedLangIndex == 255) { + const char *lang = const_cast(systemSettings.uiLanguage); + for (size_t i = 0; i < LanguageCount; i++) { + if (strncmp(lang, LanguageMetas[i].code, sizeof(systemSettings.uiLanguage)) == 0) { + selectedLangIndex = i; + return; + } + } + // No match, use the first language. + selectedLangIndex = 0; + } +} + +static void writeSelectedLanguageToSettings() { + char *lang = const_cast(systemSettings.uiLanguage); + strncpy(lang, LanguageMetas[selectedLangIndex].code, sizeof(systemSettings.uiLanguage)); +} + +void prepareTranslations() { + initSelectedLanguageIndex(); + if (selectedLangIndex >= LanguageCount) { + // This shouldn't happen. + return; + } + const LanguageMeta &langMeta = LanguageMetas[selectedLangIndex]; + + const TranslationData *translationData; + uint16_t buffer_remaining_size = translation_data_out_buffer_size; + uint8_t * buffer_next_ptr = translation_data_out_buffer; + if (langMeta.translation_is_compressed) { + unsigned int outsize; + outsize = blz_depack_srcsize(langMeta.translation_data, buffer_next_ptr, langMeta.translation_size); + + translationData = reinterpret_cast(buffer_next_ptr); + buffer_remaining_size -= outsize; + buffer_next_ptr += outsize; + } else { + translationData = reinterpret_cast(langMeta.translation_data); + } + Tr = &translationData->indices; + TranslationStrings = translationData->strings; + + memset(DynamicFontSections, 0, FontSectionsCount * sizeof(DynamicFontSections[0])); + for (int i = 0; i < FontSectionDataCount; i++) { + const auto &fontSectionDataInfo = FontSectionDataInfos[i]; + auto & fontSection = DynamicFontSections[i]; + fontSection.symbol_start = fontSectionDataInfo.symbol_start; + fontSection.symbol_end = fontSection.symbol_start + fontSectionDataInfo.symbol_count; + const uint16_t font12_size = fontSectionDataInfo.symbol_count * (12 * 16 / 8); + uint16_t dataSize; + if (fontSectionDataInfo.data_is_compressed) { + unsigned int outsize; + outsize = blz_depack_srcsize(fontSectionDataInfo.data_ptr, buffer_next_ptr, fontSectionDataInfo.data_size); + + fontSection.font12_start_ptr = buffer_next_ptr; + dataSize = outsize; + buffer_remaining_size -= outsize; + buffer_next_ptr += outsize; + } else { + fontSection.font12_start_ptr = fontSectionDataInfo.data_ptr; + dataSize = fontSectionDataInfo.data_size; + } + if (dataSize > font12_size) { + fontSection.font06_start_ptr = fontSection.font12_start_ptr + font12_size; + } + } +} + +bool settings_setLanguageSwitch(void) { + selectedLangIndex = (selectedLangIndex + 1) % LanguageCount; + writeSelectedLanguageToSettings(); + prepareTranslations(); + return selectedLangIndex == (LanguageCount - 1); +} + +bool settings_displayLanguageSwitch(void) { + OLED::printWholeScreen(translatedString(Tr->SettingsShortNames[static_cast(SettingsItemIndex::LanguageSwitch)])); + return false; +} diff --git a/source/Core/LangSupport/lang_single.cpp b/source/Core/LangSupport/lang_single.cpp new file mode 100644 index 00000000..75258521 --- /dev/null +++ b/source/Core/LangSupport/lang_single.cpp @@ -0,0 +1,7 @@ +#include "Translation.h" + +bool settings_setLanguageSwitch(void) { return false; } + +bool settings_displayLanguageSwitch(void) { + return true; // skip +} diff --git a/source/Core/Src/gui.cpp b/source/Core/Src/gui.cpp index 2804f2bb..b5a3b331 100644 --- a/source/Core/Src/gui.cpp +++ b/source/Core/Src/gui.cpp @@ -144,6 +144,7 @@ const menuitem rootSettingsMenu[]{ {0, settings_enterPowerSavingMenu, settings_displayPowerSavingMenu}, /*Sleep Options Menu*/ {0, settings_enterUIMenu, settings_displayUIMenu}, /*UI Menu*/ {0, settings_enterAdvancedMenu, settings_displayAdvancedMenu}, /*Advanced Menu*/ + {0, settings_setLanguageSwitch, settings_displayLanguageSwitch}, /*Language Switch*/ {0, nullptr, nullptr} // end of menu marker. DO NOT REMOVE }; diff --git a/source/Core/brieflz/README.md b/source/Core/brieflz/README.md new file mode 100644 index 00000000..06f7f7da --- /dev/null +++ b/source/Core/brieflz/README.md @@ -0,0 +1,41 @@ +This directory contains file originally by other people. + + +## BriefLZ + +- `brieflz_btparse.h` +- `brieflz_hashbucket.h` +- `brieflz_lazy.h` +- `brieflz_leparse.h` +- `brieflz.c` +- `depack.c` + +The above files are originally obtained from https://github.com/jibsen/brieflz +(commit 0ab07a5). + +### License: + +``` +The zlib License (Zlib) + +Copyright (c) 2002-2020 Joergen Ibsen + +This software is provided 'as-is', without any express or implied +warranty. In no event will the authors be held liable for any damages +arising from the use of this software. + +Permission is granted to anyone to use this software for any purpose, +including commercial applications, and to alter it and redistribute it +freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must + not claim that you wrote the original software. If you use this + software in a product, an acknowledgment in the product + documentation would be appreciated but is not required. + + 2. Altered source versions must be plainly marked as such, and must + not be misrepresented as being the original software. + + 3. This notice may not be removed or altered from any source + distribution. +``` diff --git a/source/Core/brieflz/brieflz.c b/source/Core/brieflz/brieflz.c new file mode 100644 index 00000000..e197f573 --- /dev/null +++ b/source/Core/brieflz/brieflz.c @@ -0,0 +1,659 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// C packer +// +// Copyright (c) 2002-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#include "brieflz.h" + +#include +#include +#include + +#if _MSC_VER >= 1400 +# include +# define BLZ_BUILTIN_MSVC +#elif defined(__clang__) && defined(__has_builtin) +# if __has_builtin(__builtin_clz) +# define BLZ_BUILTIN_GCC +# endif +#elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4) +# define BLZ_BUILTIN_GCC +#endif + +// Type used to store values in workmem. +// +// This is used to store positions and lengths, so src_size has to be within +// the range of this type. +// +typedef uint32_t blz_word; + +#define BLZ_WORD_MAX UINT32_MAX + +// Number of bits of hash to use for lookup. +// +// The size of the lookup table (and thus workmem) depends on this. +// +// Values between 10 and 18 work well. Lower values generally make compression +// speed faster but ratio worse. The default value 17 (128k entries) is a +// compromise. +// +#ifndef BLZ_HASH_BITS +# define BLZ_HASH_BITS 17 +#endif + +#define LOOKUP_SIZE (1UL << BLZ_HASH_BITS) + +#define NO_MATCH_POS ((blz_word) -1) + +// Internal data structure +struct blz_state { + unsigned char *next_out; + unsigned char *tag_out; + unsigned int tag; + int bits_left; +}; + +#if !defined(BLZ_NO_LUT) +static const unsigned short blz_gamma_lookup[512][2] = { + {0, 0}, + {0, 0}, + + {0x00, 2}, {0x02, 2}, + + {0x04, 4}, {0x06, 4}, {0x0C, 4}, {0x0E, 4}, + + {0x14, 6}, {0x16, 6}, {0x1C, 6}, {0x1E, 6}, + {0x34, 6}, {0x36, 6}, {0x3C, 6}, {0x3E, 6}, + + {0x54, 8}, {0x56, 8}, {0x5C, 8}, {0x5E, 8}, + {0x74, 8}, {0x76, 8}, {0x7C, 8}, {0x7E, 8}, + {0xD4, 8}, {0xD6, 8}, {0xDC, 8}, {0xDE, 8}, + {0xF4, 8}, {0xF6, 8}, {0xFC, 8}, {0xFE, 8}, + + {0x154, 10}, {0x156, 10}, {0x15C, 10}, {0x15E, 10}, + {0x174, 10}, {0x176, 10}, {0x17C, 10}, {0x17E, 10}, + {0x1D4, 10}, {0x1D6, 10}, {0x1DC, 10}, {0x1DE, 10}, + {0x1F4, 10}, {0x1F6, 10}, {0x1FC, 10}, {0x1FE, 10}, + {0x354, 10}, {0x356, 10}, {0x35C, 10}, {0x35E, 10}, + {0x374, 10}, {0x376, 10}, {0x37C, 10}, {0x37E, 10}, + {0x3D4, 10}, {0x3D6, 10}, {0x3DC, 10}, {0x3DE, 10}, + {0x3F4, 10}, {0x3F6, 10}, {0x3FC, 10}, {0x3FE, 10}, + + {0x554, 12}, {0x556, 12}, {0x55C, 12}, {0x55E, 12}, + {0x574, 12}, {0x576, 12}, {0x57C, 12}, {0x57E, 12}, + {0x5D4, 12}, {0x5D6, 12}, {0x5DC, 12}, {0x5DE, 12}, + {0x5F4, 12}, {0x5F6, 12}, {0x5FC, 12}, {0x5FE, 12}, + {0x754, 12}, {0x756, 12}, {0x75C, 12}, {0x75E, 12}, + {0x774, 12}, {0x776, 12}, {0x77C, 12}, {0x77E, 12}, + {0x7D4, 12}, {0x7D6, 12}, {0x7DC, 12}, {0x7DE, 12}, + {0x7F4, 12}, {0x7F6, 12}, {0x7FC, 12}, {0x7FE, 12}, + {0xD54, 12}, {0xD56, 12}, {0xD5C, 12}, {0xD5E, 12}, + {0xD74, 12}, {0xD76, 12}, {0xD7C, 12}, {0xD7E, 12}, + {0xDD4, 12}, {0xDD6, 12}, {0xDDC, 12}, {0xDDE, 12}, + {0xDF4, 12}, {0xDF6, 12}, {0xDFC, 12}, {0xDFE, 12}, + {0xF54, 12}, {0xF56, 12}, {0xF5C, 12}, {0xF5E, 12}, + {0xF74, 12}, {0xF76, 12}, {0xF7C, 12}, {0xF7E, 12}, + {0xFD4, 12}, {0xFD6, 12}, {0xFDC, 12}, {0xFDE, 12}, + {0xFF4, 12}, {0xFF6, 12}, {0xFFC, 12}, {0xFFE, 12}, + + {0x1554, 14}, {0x1556, 14}, {0x155C, 14}, {0x155E, 14}, + {0x1574, 14}, {0x1576, 14}, {0x157C, 14}, {0x157E, 14}, + {0x15D4, 14}, {0x15D6, 14}, {0x15DC, 14}, {0x15DE, 14}, + {0x15F4, 14}, {0x15F6, 14}, {0x15FC, 14}, {0x15FE, 14}, + {0x1754, 14}, {0x1756, 14}, {0x175C, 14}, {0x175E, 14}, + {0x1774, 14}, {0x1776, 14}, {0x177C, 14}, {0x177E, 14}, + {0x17D4, 14}, {0x17D6, 14}, {0x17DC, 14}, {0x17DE, 14}, + {0x17F4, 14}, {0x17F6, 14}, {0x17FC, 14}, {0x17FE, 14}, + {0x1D54, 14}, {0x1D56, 14}, {0x1D5C, 14}, {0x1D5E, 14}, + {0x1D74, 14}, {0x1D76, 14}, {0x1D7C, 14}, {0x1D7E, 14}, + {0x1DD4, 14}, {0x1DD6, 14}, {0x1DDC, 14}, {0x1DDE, 14}, + {0x1DF4, 14}, {0x1DF6, 14}, {0x1DFC, 14}, {0x1DFE, 14}, + {0x1F54, 14}, {0x1F56, 14}, {0x1F5C, 14}, {0x1F5E, 14}, + {0x1F74, 14}, {0x1F76, 14}, {0x1F7C, 14}, {0x1F7E, 14}, + {0x1FD4, 14}, {0x1FD6, 14}, {0x1FDC, 14}, {0x1FDE, 14}, + {0x1FF4, 14}, {0x1FF6, 14}, {0x1FFC, 14}, {0x1FFE, 14}, + {0x3554, 14}, {0x3556, 14}, {0x355C, 14}, {0x355E, 14}, + {0x3574, 14}, {0x3576, 14}, {0x357C, 14}, {0x357E, 14}, + {0x35D4, 14}, {0x35D6, 14}, {0x35DC, 14}, {0x35DE, 14}, + {0x35F4, 14}, {0x35F6, 14}, {0x35FC, 14}, {0x35FE, 14}, + {0x3754, 14}, {0x3756, 14}, {0x375C, 14}, {0x375E, 14}, + {0x3774, 14}, {0x3776, 14}, {0x377C, 14}, {0x377E, 14}, + {0x37D4, 14}, {0x37D6, 14}, {0x37DC, 14}, {0x37DE, 14}, + {0x37F4, 14}, {0x37F6, 14}, {0x37FC, 14}, {0x37FE, 14}, + {0x3D54, 14}, {0x3D56, 14}, {0x3D5C, 14}, {0x3D5E, 14}, + {0x3D74, 14}, {0x3D76, 14}, {0x3D7C, 14}, {0x3D7E, 14}, + {0x3DD4, 14}, {0x3DD6, 14}, {0x3DDC, 14}, {0x3DDE, 14}, + {0x3DF4, 14}, {0x3DF6, 14}, {0x3DFC, 14}, {0x3DFE, 14}, + {0x3F54, 14}, {0x3F56, 14}, {0x3F5C, 14}, {0x3F5E, 14}, + {0x3F74, 14}, {0x3F76, 14}, {0x3F7C, 14}, {0x3F7E, 14}, + {0x3FD4, 14}, {0x3FD6, 14}, {0x3FDC, 14}, {0x3FDE, 14}, + {0x3FF4, 14}, {0x3FF6, 14}, {0x3FFC, 14}, {0x3FFE, 14}, + + {0x5554, 16}, {0x5556, 16}, {0x555C, 16}, {0x555E, 16}, + {0x5574, 16}, {0x5576, 16}, {0x557C, 16}, {0x557E, 16}, + {0x55D4, 16}, {0x55D6, 16}, {0x55DC, 16}, {0x55DE, 16}, + {0x55F4, 16}, {0x55F6, 16}, {0x55FC, 16}, {0x55FE, 16}, + {0x5754, 16}, {0x5756, 16}, {0x575C, 16}, {0x575E, 16}, + {0x5774, 16}, {0x5776, 16}, {0x577C, 16}, {0x577E, 16}, + {0x57D4, 16}, {0x57D6, 16}, {0x57DC, 16}, {0x57DE, 16}, + {0x57F4, 16}, {0x57F6, 16}, {0x57FC, 16}, {0x57FE, 16}, + {0x5D54, 16}, {0x5D56, 16}, {0x5D5C, 16}, {0x5D5E, 16}, + {0x5D74, 16}, {0x5D76, 16}, {0x5D7C, 16}, {0x5D7E, 16}, + {0x5DD4, 16}, {0x5DD6, 16}, {0x5DDC, 16}, {0x5DDE, 16}, + {0x5DF4, 16}, {0x5DF6, 16}, {0x5DFC, 16}, {0x5DFE, 16}, + {0x5F54, 16}, {0x5F56, 16}, {0x5F5C, 16}, {0x5F5E, 16}, + {0x5F74, 16}, {0x5F76, 16}, {0x5F7C, 16}, {0x5F7E, 16}, + {0x5FD4, 16}, {0x5FD6, 16}, {0x5FDC, 16}, {0x5FDE, 16}, + {0x5FF4, 16}, {0x5FF6, 16}, {0x5FFC, 16}, {0x5FFE, 16}, + {0x7554, 16}, {0x7556, 16}, {0x755C, 16}, {0x755E, 16}, + {0x7574, 16}, {0x7576, 16}, {0x757C, 16}, {0x757E, 16}, + {0x75D4, 16}, {0x75D6, 16}, {0x75DC, 16}, {0x75DE, 16}, + {0x75F4, 16}, {0x75F6, 16}, {0x75FC, 16}, {0x75FE, 16}, + {0x7754, 16}, {0x7756, 16}, {0x775C, 16}, {0x775E, 16}, + {0x7774, 16}, {0x7776, 16}, {0x777C, 16}, {0x777E, 16}, + {0x77D4, 16}, {0x77D6, 16}, {0x77DC, 16}, {0x77DE, 16}, + {0x77F4, 16}, {0x77F6, 16}, {0x77FC, 16}, {0x77FE, 16}, + {0x7D54, 16}, {0x7D56, 16}, {0x7D5C, 16}, {0x7D5E, 16}, + {0x7D74, 16}, {0x7D76, 16}, {0x7D7C, 16}, {0x7D7E, 16}, + {0x7DD4, 16}, {0x7DD6, 16}, {0x7DDC, 16}, {0x7DDE, 16}, + {0x7DF4, 16}, {0x7DF6, 16}, {0x7DFC, 16}, {0x7DFE, 16}, + {0x7F54, 16}, {0x7F56, 16}, {0x7F5C, 16}, {0x7F5E, 16}, + {0x7F74, 16}, {0x7F76, 16}, {0x7F7C, 16}, {0x7F7E, 16}, + {0x7FD4, 16}, {0x7FD6, 16}, {0x7FDC, 16}, {0x7FDE, 16}, + {0x7FF4, 16}, {0x7FF6, 16}, {0x7FFC, 16}, {0x7FFE, 16}, + {0xD554, 16}, {0xD556, 16}, {0xD55C, 16}, {0xD55E, 16}, + {0xD574, 16}, {0xD576, 16}, {0xD57C, 16}, {0xD57E, 16}, + {0xD5D4, 16}, {0xD5D6, 16}, {0xD5DC, 16}, {0xD5DE, 16}, + {0xD5F4, 16}, {0xD5F6, 16}, {0xD5FC, 16}, {0xD5FE, 16}, + {0xD754, 16}, {0xD756, 16}, {0xD75C, 16}, {0xD75E, 16}, + {0xD774, 16}, {0xD776, 16}, {0xD77C, 16}, {0xD77E, 16}, + {0xD7D4, 16}, {0xD7D6, 16}, {0xD7DC, 16}, {0xD7DE, 16}, + {0xD7F4, 16}, {0xD7F6, 16}, {0xD7FC, 16}, {0xD7FE, 16}, + {0xDD54, 16}, {0xDD56, 16}, {0xDD5C, 16}, {0xDD5E, 16}, + {0xDD74, 16}, {0xDD76, 16}, {0xDD7C, 16}, {0xDD7E, 16}, + {0xDDD4, 16}, {0xDDD6, 16}, {0xDDDC, 16}, {0xDDDE, 16}, + {0xDDF4, 16}, {0xDDF6, 16}, {0xDDFC, 16}, {0xDDFE, 16}, + {0xDF54, 16}, {0xDF56, 16}, {0xDF5C, 16}, {0xDF5E, 16}, + {0xDF74, 16}, {0xDF76, 16}, {0xDF7C, 16}, {0xDF7E, 16}, + {0xDFD4, 16}, {0xDFD6, 16}, {0xDFDC, 16}, {0xDFDE, 16}, + {0xDFF4, 16}, {0xDFF6, 16}, {0xDFFC, 16}, {0xDFFE, 16}, + {0xF554, 16}, {0xF556, 16}, {0xF55C, 16}, {0xF55E, 16}, + {0xF574, 16}, {0xF576, 16}, {0xF57C, 16}, {0xF57E, 16}, + {0xF5D4, 16}, {0xF5D6, 16}, {0xF5DC, 16}, {0xF5DE, 16}, + {0xF5F4, 16}, {0xF5F6, 16}, {0xF5FC, 16}, {0xF5FE, 16}, + {0xF754, 16}, {0xF756, 16}, {0xF75C, 16}, {0xF75E, 16}, + {0xF774, 16}, {0xF776, 16}, {0xF77C, 16}, {0xF77E, 16}, + {0xF7D4, 16}, {0xF7D6, 16}, {0xF7DC, 16}, {0xF7DE, 16}, + {0xF7F4, 16}, {0xF7F6, 16}, {0xF7FC, 16}, {0xF7FE, 16}, + {0xFD54, 16}, {0xFD56, 16}, {0xFD5C, 16}, {0xFD5E, 16}, + {0xFD74, 16}, {0xFD76, 16}, {0xFD7C, 16}, {0xFD7E, 16}, + {0xFDD4, 16}, {0xFDD6, 16}, {0xFDDC, 16}, {0xFDDE, 16}, + {0xFDF4, 16}, {0xFDF6, 16}, {0xFDFC, 16}, {0xFDFE, 16}, + {0xFF54, 16}, {0xFF56, 16}, {0xFF5C, 16}, {0xFF5E, 16}, + {0xFF74, 16}, {0xFF76, 16}, {0xFF7C, 16}, {0xFF7E, 16}, + {0xFFD4, 16}, {0xFFD6, 16}, {0xFFDC, 16}, {0xFFDE, 16}, + {0xFFF4, 16}, {0xFFF6, 16}, {0xFFFC, 16}, {0xFFFE, 16} +}; +#endif + +static int +blz_log2(unsigned long n) +{ + assert(n > 0); + +#if defined(BLZ_BUILTIN_MSVC) + unsigned long msb_pos; + _BitScanReverse(&msb_pos, n); + return (int) msb_pos; +#elif defined(BLZ_BUILTIN_GCC) + return (int) sizeof(n) * CHAR_BIT - 1 - __builtin_clzl(n); +#else + int bits = 0; + + while (n >>= 1) { + ++bits; + } + + return bits; +#endif +} + +static unsigned long +blz_gamma_cost(unsigned long n) +{ + assert(n >= 2); + + return 2 * (unsigned long) blz_log2(n); +} + +static unsigned long +blz_match_cost(unsigned long pos, unsigned long len) +{ + return 1 + blz_gamma_cost(len - 2) + blz_gamma_cost((pos >> 8) + 2) + 8; +} + +// Heuristic to compare matches +static int +blz_match_better(unsigned long cur, unsigned long new_pos, unsigned long new_len, + unsigned long pos, unsigned long len) +{ + const unsigned long offs = cur - pos - 1; + const unsigned long new_offs = cur - new_pos - 1; + + return (new_len > len + 1) + || (new_len >= len + 1 && new_offs / 8 <= offs); +} + +// Heuristic to compare match with match at next position +static int +blz_next_match_better(unsigned long cur, unsigned long new_pos, unsigned long new_len, + unsigned long pos, unsigned long len) +{ + const unsigned long offs = cur - pos - 1; + const unsigned long new_offs = cur + 1 - new_pos - 1; + + return (new_len > len + 1 && new_offs / 8 < offs) + || (new_len > len && new_offs < offs) + || (new_len >= len && new_offs < offs / 4); +} + +static void +blz_putbit(struct blz_state *bs, unsigned int bit) +{ + // Check if tag is full + if (!bs->bits_left--) { + // Store tag + bs->tag_out[0] = bs->tag & 0x00FF; + bs->tag_out[1] = (bs->tag >> 8) & 0x00FF; + + // Init next tag + bs->tag_out = bs->next_out; + bs->next_out += 2; + bs->bits_left = 15; + } + + // Shift bit into tag + bs->tag = (bs->tag << 1) + bit; +} + +static void +blz_putbits(struct blz_state *bs, unsigned long bits, int num) +{ + assert(num >= 0 && num <= 16); + assert((bits & (~0UL << num)) == 0); + + // Shift num bits into tag + unsigned long tag = ((unsigned long) bs->tag << num) | bits; + bs->tag = (unsigned int) tag; + + // Check if tag is full + if (bs->bits_left < num) { + const unsigned int top16 = (unsigned int) (tag >> (num - bs->bits_left)); + + // Store tag + bs->tag_out[0] = top16 & 0x00FF; + bs->tag_out[1] = (top16 >> 8) & 0x00FF; + + // Init next tag + bs->tag_out = bs->next_out; + bs->next_out += 2; + + bs->bits_left += 16; + } + + bs->bits_left -= num; +} + +// Encode val using a universal code based on Elias gamma. +// +// This outputs each bit of val (after the leading one bit) as a pair where +// the first bit is the value, and the second is zero if this was the last +// pair, and one otherwise. +// +// 2 = 10 -> 00 +// 3 = 11 -> 10 +// 4 = 100 -> 01 00 +// 5 = 101 -> 01 10 +// 6 = 110 -> 11 00 +// ... +// +// On modern hardware this variant is slower to decode because we cannot count +// the leading zeroes to get the number of value bits and then read them +// directly. However on constrained hardware, it has the advantage of being +// decodable using only one variable (register) and a tiny loop: +// +// result = 1; +// do { result = (result << 1) + getbit(); } while (getbit()); +// +// Strictly speaking, this is order-1 exp-Golomb, where we interleave the +// value bits with the bits of the unary coding of the length, but I've always +// known it as the gamma2 code. I am not sure where it originated from, but I +// can see I used it in aPLib around 1998. +// +static void +blz_putgamma(struct blz_state *bs, unsigned long val) +{ + assert(val >= 2); + +#if !defined(BLZ_NO_LUT) + // Output small values using lookup + if (val < 512) { + const unsigned int bits = blz_gamma_lookup[val][0]; + const unsigned int shift = blz_gamma_lookup[val][1]; + + blz_putbits(bs, bits, (int) shift); + + return; + } +#endif + + // Create a mask for the second-highest bit of val +#if defined(BLZ_BUILTIN_MSVC) + unsigned long msb_pos; + _BitScanReverse(&msb_pos, val); + unsigned long mask = 1UL << (msb_pos - 1); +#elif defined(BLZ_BUILTIN_GCC) + unsigned long mask = 1UL << ((int) sizeof(val) * CHAR_BIT - 2 - __builtin_clzl(val)); +#else + unsigned long mask = val >> 1; + + // Clear bits except highest + while (mask & (mask - 1)) { + mask &= mask - 1; + } +#endif + + // Output gamma2-encoded bits + blz_putbit(bs, (val & mask) ? 1 : 0); + + while (mask >>= 1) { + blz_putbit(bs, 1); + blz_putbit(bs, (val & mask) ? 1 : 0); + } + + blz_putbit(bs, 0); +} + +static unsigned char* +blz_finalize(struct blz_state *bs) +{ + // Trailing one bit to delimit any literal tags + blz_putbit(bs, 1); + + // Shift last tag into position and store + bs->tag <<= bs->bits_left; + bs->tag_out[0] = bs->tag & 0x00FF; + bs->tag_out[1] = (bs->tag >> 8) & 0x00FF; + + // Return pointer one past end of output + return bs->next_out; +} + +// Hash four bytes starting a p. +// +// This is Fibonacci hashing, also known as Knuth's multiplicative hash. The +// constant is a prime close to 2^32/phi. +// +static unsigned long +blz_hash4_bits(const unsigned char *p, int bits) +{ + assert(bits > 0 && bits <= 32); + + uint32_t val = (uint32_t) p[0] + | ((uint32_t) p[1] << 8) + | ((uint32_t) p[2] << 16) + | ((uint32_t) p[3] << 24); + + return (val * UINT32_C(2654435761)) >> (32 - bits); +} + +static unsigned long +blz_hash4(const unsigned char *p) +{ + return blz_hash4_bits(p, BLZ_HASH_BITS); +} + +size_t +blz_max_packed_size(size_t src_size) +{ + return src_size + src_size / 8 + 64; +} + +size_t +blz_workmem_size(size_t src_size) +{ + (void) src_size; + + return LOOKUP_SIZE * sizeof(blz_word); +} + +// Simple LZSS using hashing. +// +// The lookup table stores the previous position in the input that had a given +// hash value, or NO_MATCH_POS if none. +// +unsigned long +blz_pack(const void *src, void *dst, unsigned long src_size, void *workmem) +{ + struct blz_state bs; + blz_word *const lookup = (blz_word *) workmem; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + unsigned long hash_pos = 0; + unsigned long cur = 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + // Initialize lookup + for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Main compression loop + for (cur = 1; cur <= last_match_pos; ) { + // Update lookup up to current position + while (hash_pos < cur) { + lookup[blz_hash4(&in[hash_pos])] = hash_pos; + hash_pos++; + } + + // Look up match for current position + const unsigned long pos = lookup[blz_hash4(&in[cur])]; + unsigned long len = 0; + + // Check match + if (pos != NO_MATCH_POS) { + const unsigned long len_limit = src_size - cur; + + while (len < len_limit + && in[pos + len] == in[cur + len]) { + ++len; + } + } + + // Output match or literal + // + // When offs >= 0x1FFE00, encoding a match of length 4 + // (37 bits) is longer than encoding 4 literals (36 bits). + // + // The value 0x7E00 is a heuristic that sacrifices some + // length 4 matches in the hope that there will be a better + // match at the next position. + if (len > 4 || (len == 4 && cur - pos - 1 < 0x7E00UL)) { + const unsigned long offs = cur - pos - 1; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, len - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + + cur += len; + } + else { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + } + + // Output any remaining literals + while (cur < src_size) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + + // Trailing one bit to delimit any literal tags + blz_putbit(&bs, 1); + + // Shift last tag into position and store + bs.tag <<= bs.bits_left; + bs.tag_out[0] = bs.tag & 0x00FF; + bs.tag_out[1] = (bs.tag >> 8) & 0x00FF; + + // Return compressed size + return (unsigned long) (bs.next_out - (unsigned char *) dst); +} + +// Include compression algorithms used by blz_pack_level +#include "brieflz_btparse.h" +#include "brieflz_hashbucket.h" +#include "brieflz_lazy.h" +#include "brieflz_leparse.h" + +size_t +blz_workmem_size_level(size_t src_size, int level) +{ + switch (level) { + case 1: + return blz_workmem_size(src_size); + case 2: + return blz_lazy_workmem_size(src_size); + case 3: + return blz_hashbucket_workmem_size(src_size, 2); + case 4: + return blz_hashbucket_workmem_size(src_size, 4); + case 5: + case 6: + case 7: + return blz_leparse_workmem_size(src_size); + case 8: + case 9: + case 10: + return blz_btparse_workmem_size(src_size); + default: + return (size_t) -1; + } +} + +unsigned long +blz_pack_level(const void *src, void *dst, unsigned long src_size, + void *workmem, int level) +{ + switch (level) { + case 1: + return blz_pack(src, dst, src_size, workmem); + case 2: + return blz_pack_lazy(src, dst, src_size, workmem); + case 3: + return blz_pack_hashbucket(src, dst, src_size, workmem, 2, 16); + case 4: + return blz_pack_hashbucket(src, dst, src_size, workmem, 4, 16); + case 5: + return blz_pack_leparse(src, dst, src_size, workmem, 1, 16); + case 6: + return blz_pack_leparse(src, dst, src_size, workmem, 8, 32); + case 7: + return blz_pack_leparse(src, dst, src_size, workmem, 64, 64); + case 8: + return blz_pack_btparse(src, dst, src_size, workmem, 16, 96); + case 9: + return blz_pack_btparse(src, dst, src_size, workmem, 32, 224); + case 10: + return blz_pack_btparse(src, dst, src_size, workmem, ULONG_MAX, ULONG_MAX); + default: + return BLZ_ERROR; + } +} + +// clang -g -O1 -fsanitize=fuzzer,address -DBLZ_FUZZING brieflz.c depack.c +#if defined(BLZ_FUZZING) +#include +#include +#include +#include +#include + +#ifndef BLZ_FUZZ_LEVEL +# define BLZ_FUZZ_LEVEL 1 +#endif + +extern int +LLVMFuzzerTestOneInput(const uint8_t *data, size_t size) +{ + if (size > ULONG_MAX / 2) { return 0; } + void *workmem = malloc(blz_workmem_size_level(size, BLZ_FUZZ_LEVEL)); + void *packed = malloc(blz_max_packed_size(size)); + void *depacked = malloc(size); + if (!workmem || !packed || !depacked) { abort(); } + unsigned long packed_size = blz_pack_level(data, packed, size, workmem, BLZ_FUZZ_LEVEL); + blz_depack(packed, depacked, size); + if (memcmp(data, depacked, size)) { abort(); } + free(depacked); + free(packed); + free(workmem); + return 0; +} +#endif diff --git a/source/Core/brieflz/brieflz.h b/source/Core/brieflz/brieflz.h new file mode 100644 index 00000000..cb6162a4 --- /dev/null +++ b/source/Core/brieflz/brieflz.h @@ -0,0 +1,183 @@ +/* + * BriefLZ - small fast Lempel-Ziv + * + * C/C++ header file + * + * Copyright (c) 2002-2020 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#ifndef BRIEFLZ_H_INCLUDED +#define BRIEFLZ_H_INCLUDED + +#include + +#ifdef __cplusplus +extern "C" { +#endif + +#define BLZ_VER_MAJOR 1 /**< Major version number */ +#define BLZ_VER_MINOR 3 /**< Minor version number */ +#define BLZ_VER_PATCH 0 /**< Patch version number */ +#define BLZ_VER_STRING "1.3.0" /**< Version number as a string */ + +#define BLZ_NO_LUT + +#ifdef BLZ_DLL +# if defined(_WIN32) || defined(__CYGWIN__) +# ifdef BLZ_DLL_EXPORTS +# define BLZ_API __declspec(dllexport) +# else +# define BLZ_API __declspec(dllimport) +# endif +# define BLZ_LOCAL +# else +# if __GNUC__ >= 4 +# define BLZ_API __attribute__ ((visibility ("default"))) +# define BLZ_LOCAL __attribute__ ((visibility ("hidden"))) +# else +# define BLZ_API +# define BLZ_LOCAL +# endif +# endif +#else +# define BLZ_API +# define BLZ_LOCAL +#endif + +/** + * Return value on error. + * + * @see blz_depack_safe + */ +#ifndef BLZ_ERROR +# define BLZ_ERROR ((unsigned long) (-1)) +#endif + +/** + * Get bound on compressed data size. + * + * @see blz_pack + * + * @param src_size number of bytes to compress + * @return maximum size of compressed data + */ +BLZ_API size_t +blz_max_packed_size(size_t src_size); + +/** + * Get required size of `workmem` buffer. + * + * @see blz_pack + * + * @param src_size number of bytes to compress + * @return required size in bytes of `workmem` buffer + */ +BLZ_API size_t +blz_workmem_size(size_t src_size); + +/** + * Compress `src_size` bytes of data from `src` to `dst`. + * + * @param src pointer to data + * @param dst pointer to where to place compressed data + * @param src_size number of bytes to compress + * @param workmem pointer to memory for temporary use + * @return size of compressed data + */ +BLZ_API unsigned long +blz_pack(const void *src, void *dst, unsigned long src_size, void *workmem); + +/** + * Get required size of `workmem` buffer. + * + * @see blz_pack_level + * + * @param src_size number of bytes to compress + * @param level compression level + * @return required size in bytes of `workmem` buffer + */ +BLZ_API size_t +blz_workmem_size_level(size_t src_size, int level); + +/** + * Compress `src_size` bytes of data from `src` to `dst`. + * + * Compression levels between 1 and 9 offer a trade-off between + * time/space and ratio. Level 10 is optimal but very slow. + * + * @param src pointer to data + * @param dst pointer to where to place compressed data + * @param src_size number of bytes to compress + * @param workmem pointer to memory for temporary use + * @param level compression level + * @return size of compressed data + */ +BLZ_API unsigned long +blz_pack_level(const void *src, void *dst, unsigned long src_size, + void *workmem, int level); + +/** + * Decompress `depacked_size` bytes of data from `src` to `dst`. + * + * @param src pointer to compressed data + * @param dst pointer to where to place decompressed data + * @param depacked_size size of decompressed data + * @return size of decompressed data + */ +BLZ_API unsigned long +blz_depack(const void *src, void *dst, unsigned long depacked_size); + +/** + * Decompress `src_size` bytes of data from `src` to `dst`. + * + * This function is unsafe. If the provided data is malformed, it may + * read more than `src_size` from the `src` buffer. + * + * @param src pointer to compressed data + * @param dst pointer to where to place decompressed data + * @param src_size size of the compressed data + * @return size of decompressed data + */ +BLZ_API unsigned long +blz_depack_srcsize(const void *src, void *dst, unsigned long src_size); + +/** + * Decompress `depacked_size` bytes of data from `src` to `dst`. + * + * Reads at most `src_size` bytes from `src`. + * Writes at most `depacked_size` bytes to `dst`. + * + * @param src pointer to compressed data + * @param src_size size of compressed data + * @param dst pointer to where to place decompressed data + * @param depacked_size size of decompressed data + * @return size of decompressed data, `BLZ_ERROR` on error + */ +BLZ_API unsigned long +blz_depack_safe(const void *src, unsigned long src_size, + void *dst, unsigned long depacked_size); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* BRIEFLZ_H_INCLUDED */ diff --git a/source/Core/brieflz/brieflz_btparse.h b/source/Core/brieflz/brieflz_btparse.h new file mode 100644 index 00000000..61d7c175 --- /dev/null +++ b/source/Core/brieflz/brieflz_btparse.h @@ -0,0 +1,332 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// Forwards dynamic programming parse using binary trees +// +// Copyright (c) 2016-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef BRIEFLZ_BTPARSE_H_INCLUDED +#define BRIEFLZ_BTPARSE_H_INCLUDED + +static size_t +blz_btparse_workmem_size(size_t src_size) +{ + return (5 * src_size + 3 + LOOKUP_SIZE) * sizeof(blz_word); +} + +// Forwards dynamic programming parse using binary trees, checking all +// possible matches. +// +// The match search uses a binary tree for each hash entry, which is updated +// dynamically as it is searched by re-rooting the tree at the search string. +// +// This does not result in balanced trees on all inputs, but often works well +// in practice, and has the advantage that we get the matches in order from +// closest and back. +// +// A drawback is the memory requirement of 5 * src_size words, since we cannot +// overlap the arrays in a forwards parse. +// +// This match search method is found in LZMA by Igor Pavlov, libdeflate +// by Eric Biggers, and other libraries. +// +static unsigned long +blz_pack_btparse(const void *src, void *dst, unsigned long src_size, void *workmem, + const unsigned long max_depth, const unsigned long accept_len) +{ + struct blz_state bs; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + if (src_size < 4) { + for (unsigned long i = 1; i < src_size; ++i) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[i]; + } + + // Return compressed size + return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst); + } + + blz_word *const cost = (blz_word *) workmem; + blz_word *const mpos = cost + src_size + 1; + blz_word *const mlen = mpos + src_size + 1; + blz_word *const nodes = mlen + src_size + 1; + blz_word *const lookup = nodes + 2 * src_size; + + // Initialize lookup + for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Since we are not processing the first literal, update tree for + // position 0 + lookup[blz_hash4(&in[0])] = 0; + nodes[0] = NO_MATCH_POS; + nodes[1] = NO_MATCH_POS; + + // Initialize to all literals with infinite cost + for (unsigned long i = 0; i <= src_size; ++i) { + cost[i] = BLZ_WORD_MAX; + mlen[i] = 1; + } + + cost[0] = 0; + cost[1] = 8; + + // Next position where we are going to check matches + // + // This is used to skip matching while still updating the trees when + // we find a match that is accept_len or longer. + // + unsigned long next_match_cur = 1; + + // Phase 1: Find lowest cost path arriving at each position + for (unsigned long cur = 1; cur <= last_match_pos; ++cur) { + // Adjust remaining costs to avoid overflow + if (cost[cur] > BLZ_WORD_MAX - 128) { + blz_word min_cost = BLZ_WORD_MAX; + + for (unsigned long i = cur; i <= src_size; ++i) { + min_cost = cost[i] < min_cost ? cost[i] : min_cost; + } + + for (unsigned long i = cur; i <= src_size; ++i) { + if (cost[i] != BLZ_WORD_MAX) { + cost[i] -= min_cost; + } + } + } + + // Check literal + if (cost[cur + 1] > cost[cur] + 9) { + cost[cur + 1] = cost[cur] + 9; + mlen[cur + 1] = 1; + } + + if (cur > next_match_cur) { + next_match_cur = cur; + } + + unsigned long max_len = 3; + + // Look up first match for current position + // + // pos is the current root of the tree of strings with this + // hash. We are going to re-root the tree so cur becomes the + // new root. + // + const unsigned long hash = blz_hash4(&in[cur]); + unsigned long pos = lookup[hash]; + lookup[hash] = cur; + + blz_word *lt_node = &nodes[2 * cur]; + blz_word *gt_node = &nodes[2 * cur + 1]; + unsigned long lt_len = 0; + unsigned long gt_len = 0; + + assert(pos == NO_MATCH_POS || pos < cur); + + // If we are checking matches, allow lengths up to end of + // input, otherwise compare only up to accept_len + const unsigned long len_limit = cur == next_match_cur ? src_size - cur + : accept_len < src_size - cur ? accept_len + : src_size - cur; + unsigned long num_chain = max_depth; + + // Check matches + for (;;) { + // If at bottom of tree, mark leaf nodes + // + // In case we reached max_depth, this also prunes the + // subtree we have not searched yet and do not know + // where belongs. + // + if (pos == NO_MATCH_POS || num_chain-- == 0) { + *lt_node = NO_MATCH_POS; + *gt_node = NO_MATCH_POS; + + break; + } + + // The string at pos is lexicographically greater than + // a string that matched in the first lt_len positions, + // and less than a string that matched in the first + // gt_len positions, so it must match up to at least + // the minimum of these. + unsigned long len = lt_len < gt_len ? lt_len : gt_len; + + // Find match len + while (len < len_limit && in[pos + len] == in[cur + len]) { + ++len; + } + + // Extend current match if possible + // + // Note that we are checking matches in order from the + // closest and back. This means for a match further + // away, the encoding of all lengths up to the current + // max length will always be longer or equal, so we need + // only consider the extension. + // + if (cur == next_match_cur && len > max_len) { + for (unsigned long i = max_len + 1; i <= len; ++i) { + unsigned long match_cost = blz_match_cost(cur - pos - 1, i); + + assert(match_cost < BLZ_WORD_MAX - cost[cur]); + + unsigned long cost_there = cost[cur] + match_cost; + + if (cost_there < cost[cur + i]) { + cost[cur + i] = cost_there; + mpos[cur + i] = cur - pos - 1; + mlen[cur + i] = i; + } + } + + max_len = len; + + if (len >= accept_len) { + next_match_cur = cur + len; + } + } + + // If we reach maximum match length, the string at pos + // is equal to cur, so we can assign the left and right + // subtrees. + // + // This removes pos from the tree, but we added cur + // which is equal and closer for future matches. + // + if (len >= accept_len || len == len_limit) { + *lt_node = nodes[2 * pos]; + *gt_node = nodes[2 * pos + 1]; + + break; + } + + // Go to previous match and restructure tree + // + // lt_node points to a node that is going to contain + // elements lexicographically less than cur (the search + // string). + // + // If the string at pos is less than cur, we set that + // lt_node to pos. We know that all elements in the + // left subtree are less than pos, and thus less than + // cur, so we point lt_node at the right subtree of + // pos and continue our search there. + // + // The equivalent applies to gt_node when the string at + // pos is greater than cur. + // + if (in[pos + len] < in[cur + len]) { + *lt_node = pos; + lt_node = &nodes[2 * pos + 1]; + assert(*lt_node == NO_MATCH_POS || *lt_node < pos); + pos = *lt_node; + lt_len = len; + } + else { + *gt_node = pos; + gt_node = &nodes[2 * pos]; + assert(*gt_node == NO_MATCH_POS || *gt_node < pos); + pos = *gt_node; + gt_len = len; + } + } + } + + for (unsigned long cur = last_match_pos + 1; cur < src_size; ++cur) { + // Check literal + if (cost[cur + 1] > cost[cur] + 9) { + cost[cur + 1] = cost[cur] + 9; + mlen[cur + 1] = 1; + } + } + + // Phase 2: Follow lowest cost path backwards gathering tokens + unsigned long next_token = src_size; + + for (unsigned long cur = src_size; cur > 1; cur -= mlen[cur], --next_token) { + mlen[next_token] = mlen[cur]; + mpos[next_token] = mpos[cur]; + } + + // Phase 3: Output tokens + unsigned long cur = 1; + + for (unsigned long i = next_token + 1; i <= src_size; cur += mlen[i++]) { + if (mlen[i] == 1) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur]; + } + else { + const unsigned long offs = mpos[i]; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, mlen[i] - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + } + } + + // Return compressed size + return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst); +} + +#endif /* BRIEFLZ_BTPARSE_H_INCLUDED */ diff --git a/source/Core/brieflz/brieflz_hashbucket.h b/source/Core/brieflz/brieflz_hashbucket.h new file mode 100644 index 00000000..99441423 --- /dev/null +++ b/source/Core/brieflz/brieflz_hashbucket.h @@ -0,0 +1,262 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// Lazy parsing with multiple previous positions per hash +// +// Copyright (c) 2016-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef BRIEFLZ_HASHBUCKET_H_INCLUDED +#define BRIEFLZ_HASHBUCKET_H_INCLUDED + +static size_t +blz_hashbucket_workmem_size(size_t src_size, unsigned int bucket_size) +{ + (void) src_size; + + assert(bucket_size > 0); + assert(sizeof(bucket_size) < sizeof(size_t) + || bucket_size < SIZE_MAX / (LOOKUP_SIZE * sizeof(blz_word))); + + return (LOOKUP_SIZE * bucket_size) * sizeof(blz_word); +} + +// Lazy parsing with multiple previous positions per hash. +// +// Instead of storing only the previous position a given hash occured at, +// this stores the last bucket_size such positions in lookup. This means we +// can check each of these and choose the "best". +// +// There are multiple options for maintaining the entries of the buckets, we +// simply insert at the front to maintain the order of matches and avoid extra +// variables. This gives some overhead for moving elements, but as long as +// bucket_size is small and everything fits in a cache line it is pretty fast. +// +// If we find a match that is accept_len or longer, we stop searching. +// +static unsigned long +blz_pack_hashbucket(const void *src, void *dst, unsigned long src_size, void *workmem, + const unsigned int bucket_size, const unsigned long accept_len) +{ + struct blz_state bs; + blz_word *const lookup = (blz_word *) workmem; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + unsigned long hash_pos = 0; + unsigned long cur = 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + assert(bucket_size > 0); + assert(sizeof(bucket_size) < sizeof(unsigned long) + || bucket_size < ULONG_MAX / LOOKUP_SIZE); + + // Initialize lookup + for (unsigned long i = 0; i < LOOKUP_SIZE * bucket_size; ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Main compression loop + for (cur = 1; cur <= last_match_pos; ) { + // Update lookup up to current position + while (hash_pos < cur) { + blz_word *const bucket = &lookup[blz_hash4(&in[hash_pos]) * bucket_size]; + unsigned long next = hash_pos; + + // Insert hash_pos at start of bucket + for (unsigned int i = 0; i < bucket_size; ++i) { + unsigned long tmp = bucket[i]; + bucket[i] = next; + next = tmp; + } + + hash_pos++; + } + + unsigned long best_pos = NO_MATCH_POS; + unsigned long best_len = 0; + + // Look up first match for current position + const blz_word *const bucket = &lookup[blz_hash4(&in[cur]) * bucket_size]; + unsigned long pos = bucket[0]; + unsigned int bucket_idx = 0; + + const unsigned long len_limit = src_size - cur; + + // Check matches + while (pos != NO_MATCH_POS) { + unsigned long len = 0; + + // Check match + if (best_len < len_limit + && in[pos + best_len] == in[cur + best_len]) { + while (len < len_limit && in[pos + len] == in[cur + len]) { + ++len; + } + } + + // Update best match + if (blz_match_better(cur, pos, len, best_pos, best_len)) { + best_pos = pos; + best_len = len; + if (best_len >= accept_len) { + break; + } + } + + // Go to previous match + if (++bucket_idx == bucket_size) { + break; + } + pos = bucket[bucket_idx]; + } + + // Check if match at next position is better + if (best_len > 3 && best_len < accept_len && cur < last_match_pos) { + // Update lookup up to next position + { + blz_word *const next_bucket = &lookup[blz_hash4(&in[hash_pos]) * bucket_size]; + unsigned long next = hash_pos; + + // Insert hash_pos at start of bucket + for (unsigned int i = 0; i < bucket_size; ++i) { + unsigned long tmp = next_bucket[i]; + next_bucket[i] = next; + next = tmp; + } + + hash_pos++; + } + + // Look up first match for next position + const blz_word *const next_bucket = &lookup[blz_hash4(&in[cur + 1]) * bucket_size]; + unsigned long next_pos = next_bucket[0]; + unsigned int next_bucket_idx = 0; + + const unsigned long next_len_limit = src_size - (cur + 1); + + // Check matches + while (next_pos != NO_MATCH_POS) { + unsigned long next_len = 0; + + // Check match + if (best_len - 1 < next_len_limit + && in[next_pos + best_len - 1] == in[cur + 1 + best_len - 1]) { + while (next_len < next_len_limit + && in[next_pos + next_len] == in[cur + 1 + next_len]) { + ++next_len; + } + } + + if (next_len >= best_len) { + // Replace with next match if it extends backwards + if (next_pos > 0 && in[next_pos - 1] == in[cur]) { + if (blz_match_better(cur, next_pos - 1, next_len + 1, best_pos, best_len)) { + best_pos = next_pos - 1; + best_len = next_len + 1; + } + } + else { + // Drop current match if next match is better + if (blz_next_match_better(cur, next_pos, next_len, best_pos, best_len)) { + best_len = 0; + break; + } + } + } + + // Go to previous match + if (++next_bucket_idx == bucket_size) { + break; + } + next_pos = next_bucket[next_bucket_idx]; + } + } + + // Output match or literal + if (best_len > 4 || (best_len == 4 && cur - best_pos - 1 < 0x3FE00UL)) { + const unsigned long offs = cur - best_pos - 1; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, best_len - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + + cur += best_len; + } + else { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + } + + // Output any remaining literals + while (cur < src_size) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + + // Trailing one bit to delimit any literal tags + blz_putbit(&bs, 1); + + // Shift last tag into position and store + bs.tag <<= bs.bits_left; + bs.tag_out[0] = bs.tag & 0x00FF; + bs.tag_out[1] = (bs.tag >> 8) & 0x00FF; + + // Return compressed size + return (unsigned long) (bs.next_out - (unsigned char *) dst); +} + +#endif /* BRIEFLZ_HASHBUCKET_H_INCLUDED */ diff --git a/source/Core/brieflz/brieflz_lazy.h b/source/Core/brieflz/brieflz_lazy.h new file mode 100644 index 00000000..63a278eb --- /dev/null +++ b/source/Core/brieflz/brieflz_lazy.h @@ -0,0 +1,192 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// Lazy (non-greedy) parsing with one-byte-lookahead +// +// Copyright (c) 2016-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef BRIEFLZ_LAZY_H_INCLUDED +#define BRIEFLZ_LAZY_H_INCLUDED + +static size_t +blz_lazy_workmem_size(size_t src_size) +{ + (void) src_size; + + return LOOKUP_SIZE * sizeof(blz_word); +} + +// Lazy (non-greedy) parsing with one-byte-lookahead. +// +// Each time we find a match, we check if there is a better match at the next +// position, and if so encode a literal instead. +// +static unsigned long +blz_pack_lazy(const void *src, void *dst, unsigned long src_size, void *workmem) +{ + struct blz_state bs; + blz_word *const lookup = (blz_word *) workmem; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + unsigned long hash_pos = 0; + unsigned long cur = 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + // Initialize lookup + for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Main compression loop + for (cur = 1; cur <= last_match_pos; ) { + // Update lookup up to current position + while (hash_pos < cur) { + lookup[blz_hash4(&in[hash_pos])] = hash_pos; + hash_pos++; + } + + // Look up match for current position + unsigned long pos = lookup[blz_hash4(&in[cur])]; + unsigned long len = 0; + + // Check match + if (pos != NO_MATCH_POS) { + const unsigned long len_limit = src_size - cur; + + while (len < len_limit + && in[pos + len] == in[cur + len]) { + ++len; + } + } + + // Check if match at next position is better + if (len > 3 && cur < last_match_pos) { + // Update lookup up to next position + lookup[blz_hash4(&in[hash_pos])] = hash_pos; + hash_pos++; + + // Look up match for next position + const unsigned long next_pos = lookup[blz_hash4(&in[cur + 1])]; + unsigned long next_len = 0; + + // Check match + if (next_pos != NO_MATCH_POS && next_pos != pos + 1) { + const unsigned long next_len_limit = src_size - (cur + 1); + + // If last byte matches, so this has a chance to be a better match + if (len - 1 < next_len_limit + && in[next_pos + len - 1] == in[cur + 1 + len - 1]) { + while (next_len < next_len_limit + && in[next_pos + next_len] == in[cur + 1 + next_len]) { + ++next_len; + } + } + } + + if (next_len >= len) { + // Replace with next match if it extends backwards + if (next_pos > 0 && in[next_pos - 1] == in[cur]) { + if (blz_match_better(cur, next_pos - 1, next_len + 1, pos, len)) { + pos = next_pos - 1; + len = next_len + 1; + } + } + else { + // Drop current match if next match is better + if (blz_next_match_better(cur, next_pos, next_len, pos, len)) { + len = 0; + } + } + + } + } + + // Output match or literal + if (len > 4 || (len == 4 && cur - pos - 1 < 0x3FE00UL)) { + const unsigned long offs = cur - pos - 1; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, len - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + + cur += len; + } + else { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + } + + // Output any remaining literals + while (cur < src_size) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[cur++]; + } + + // Trailing one bit to delimit any literal tags + blz_putbit(&bs, 1); + + // Shift last tag into position and store + bs.tag <<= bs.bits_left; + bs.tag_out[0] = bs.tag & 0x00FF; + bs.tag_out[1] = (bs.tag >> 8) & 0x00FF; + + // Return compressed size + return (unsigned long) (bs.next_out - (unsigned char *) dst); +} + +#endif /* BRIEFLZ_LAZY_H_INCLUDED */ diff --git a/source/Core/brieflz/brieflz_leparse.h b/source/Core/brieflz/brieflz_leparse.h new file mode 100644 index 00000000..bb1ecd57 --- /dev/null +++ b/source/Core/brieflz/brieflz_leparse.h @@ -0,0 +1,256 @@ +// +// BriefLZ - small fast Lempel-Ziv +// +// Backwards dynamic programming parse with left-extension of matches +// +// Copyright (c) 2016-2020 Joergen Ibsen +// +// This software is provided 'as-is', without any express or implied +// warranty. In no event will the authors be held liable for any damages +// arising from the use of this software. +// +// Permission is granted to anyone to use this software for any purpose, +// including commercial applications, and to alter it and redistribute it +// freely, subject to the following restrictions: +// +// 1. The origin of this software must not be misrepresented; you must +// not claim that you wrote the original software. If you use this +// software in a product, an acknowledgment in the product +// documentation would be appreciated but is not required. +// +// 2. Altered source versions must be plainly marked as such, and must +// not be misrepresented as being the original software. +// +// 3. This notice may not be removed or altered from any source +// distribution. +// + +#ifndef BRIEFLZ_LEPARSE_H_INCLUDED +#define BRIEFLZ_LEPARSE_H_INCLUDED + +static size_t +blz_leparse_workmem_size(size_t src_size) +{ + return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE) + * sizeof(blz_word); +} + +// Backwards dynamic programming parse with left-extension of matches. +// +// Whenever we find a match that improves the cost at the current position, +// we try to extend this match to the left, and if possible we use that +// left-extension for each position to the left. Since we are processing +// the input from right to left, this matches repeated patterns without +// searching at each position. +// +// Essentially, this improves the worst case for the parsing at a small cost +// in ratio. The match finding is still O(n^2) in number of matches though, +// so may have to limit max_depth on larger block sizes. +// +// This is usually within a few percent of the "optimal" parse with the same +// parameters. +// +static unsigned long +blz_pack_leparse(const void *src, void *dst, unsigned long src_size, void *workmem, + const unsigned long max_depth, const unsigned long accept_len) +{ + struct blz_state bs; + const unsigned char *const in = (const unsigned char *) src; + const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0; + + assert(src_size < BLZ_WORD_MAX); + + // Check for empty input + if (src_size == 0) { + return 0; + } + + bs.next_out = (unsigned char *) dst; + + // First byte verbatim + *bs.next_out++ = in[0]; + + // Check for 1 byte input + if (src_size == 1) { + return 1; + } + + // Initialize first tag + bs.tag_out = bs.next_out; + bs.next_out += 2; + bs.tag = 0; + bs.bits_left = 16; + + if (src_size < 4) { + for (unsigned long i = 1; i < src_size; ++i) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[i]; + } + + // Return compressed size + return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst); + } + + // With a bit of careful ordering we can fit in 3 * src_size words. + // + // The idea is that the lookup is only used in the first phase to + // build the hash chains, so we overlap it with mpos and mlen. + // Also, since we are using prev from right to left in phase two, + // and that is the order we fill in cost, we can overlap these. + // + // One detail is that we actually use src_size + 1 elements of cost, + // but we put mpos after it, where we do not need the first element. + // + blz_word *const prev = (blz_word *) workmem; + blz_word *const mpos = prev + src_size; + blz_word *const mlen = mpos + src_size; + blz_word *const cost = prev; + blz_word *const lookup = mpos; + + // Phase 1: Build hash chains + const int bits = 2 * src_size < LOOKUP_SIZE ? BLZ_HASH_BITS : blz_log2(src_size); + + // Initialize lookup + for (unsigned long i = 0; i < (1UL << bits); ++i) { + lookup[i] = NO_MATCH_POS; + } + + // Build hash chains in prev + if (last_match_pos > 0) { + for (unsigned long i = 0; i <= last_match_pos; ++i) { + const unsigned long hash = blz_hash4_bits(&in[i], bits); + prev[i] = lookup[hash]; + lookup[hash] = i; + } + } + + // Initialize last three positions as literals + mlen[src_size - 3] = 1; + mlen[src_size - 2] = 1; + mlen[src_size - 1] = 1; + + cost[src_size - 3] = 27; + cost[src_size - 2] = 18; + cost[src_size - 1] = 9; + cost[src_size] = 0; + + // Phase 2: Find lowest cost path from each position to end + for (unsigned long cur = last_match_pos; cur > 0; --cur) { + // Since we updated prev to the end in the first phase, we + // do not need to hash, but can simply look up the previous + // position directly. + unsigned long pos = prev[cur]; + + assert(pos == NO_MATCH_POS || pos < cur); + + // Start with a literal + cost[cur] = cost[cur + 1] + 9; + mlen[cur] = 1; + + unsigned long max_len = 3; + + const unsigned long len_limit = src_size - cur; + unsigned long num_chain = max_depth; + + // Go through the chain of prev matches + for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) { + unsigned long len = 0; + + // If next byte matches, so this has a chance to be a longer match + if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) { + // Find match len + while (len < len_limit && in[pos + len] == in[cur + len]) { + ++len; + } + } + + // Extend current match if possible + // + // Note that we are checking matches in order from the + // closest and back. This means for a match further + // away, the encoding of all lengths up to the current + // max length will always be longer or equal, so we need + // only consider the extension. + if (len > max_len) { + unsigned long min_cost = ULONG_MAX; + unsigned long min_cost_len = 3; + + // Find lowest cost match length + for (unsigned long i = max_len + 1; i <= len; ++i) { + unsigned long match_cost = blz_match_cost(cur - pos - 1, i); + assert(match_cost < BLZ_WORD_MAX - cost[cur + i]); + unsigned long cost_here = match_cost + cost[cur + i]; + + if (cost_here < min_cost) { + min_cost = cost_here; + min_cost_len = i; + } + } + + max_len = len; + + // Update cost if cheaper + if (min_cost < cost[cur]) { + cost[cur] = min_cost; + mpos[cur] = pos; + mlen[cur] = min_cost_len; + + // Left-extend current match if possible + if (pos > 0 && in[pos - 1] == in[cur - 1]) { + do { + --cur; + --pos; + ++min_cost_len; + unsigned long match_cost = blz_match_cost(cur - pos - 1, min_cost_len); + assert(match_cost < BLZ_WORD_MAX - cost[cur + min_cost_len]); + unsigned long cost_here = match_cost + cost[cur + min_cost_len]; + cost[cur] = cost_here; + mpos[cur] = pos; + mlen[cur] = min_cost_len; + } while (pos > 0 && in[pos - 1] == in[cur - 1]); + break; + } + } + } + + if (len >= accept_len || len == len_limit) { + break; + } + } + } + + mpos[0] = 0; + mlen[0] = 1; + + // Phase 3: Output compressed data, following lowest cost path + for (unsigned long i = 1; i < src_size; i += mlen[i]) { + if (mlen[i] == 1) { + // Output literal tag + blz_putbit(&bs, 0); + + // Copy literal + *bs.next_out++ = in[i]; + } + else { + const unsigned long offs = i - mpos[i] - 1; + + // Output match tag + blz_putbit(&bs, 1); + + // Output match length + blz_putgamma(&bs, mlen[i] - 2); + + // Output match offset + blz_putgamma(&bs, (offs >> 8) + 2); + *bs.next_out++ = offs & 0x00FF; + } + } + + // Return compressed size + return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst); +} + +#endif /* BRIEFLZ_LEPARSE_H_INCLUDED */ diff --git a/source/Core/brieflz/depack.c b/source/Core/brieflz/depack.c new file mode 100644 index 00000000..b2324cfd --- /dev/null +++ b/source/Core/brieflz/depack.c @@ -0,0 +1,271 @@ +/* + * BriefLZ - small fast Lempel-Ziv + * + * C depacker + * + * Copyright (c) 2002-2018 Joergen Ibsen + * + * This software is provided 'as-is', without any express or implied + * warranty. In no event will the authors be held liable for any damages + * arising from the use of this software. + * + * Permission is granted to anyone to use this software for any purpose, + * including commercial applications, and to alter it and redistribute it + * freely, subject to the following restrictions: + * + * 1. The origin of this software must not be misrepresented; you must + * not claim that you wrote the original software. If you use this + * software in a product, an acknowledgment in the product + * documentation would be appreciated but is not required. + * + * 2. Altered source versions must be plainly marked as such, and must + * not be misrepresented as being the original software. + * + * 3. This notice may not be removed or altered from any source + * distribution. + */ + +#include "brieflz.h" + +/* Internal data structure */ +struct blz_state { + const unsigned char *src; + unsigned char *dst; + unsigned int tag; + int bits_left; +}; + +#if !defined(BLZ_NO_LUT) +static const unsigned char blz_gamma_lookup[256][2] = { + /* 00xxxxxx = 2 */ + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, + + /* 0100xxxx = 4 */ + {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, + {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, + + /* 010100xx = 8 */ + {8, 6}, {8, 6}, {8, 6}, {8, 6}, + + /* 01010100 = 16 01010101 = 16+ 01010110 = 17 01010111 = 17+ */ + {16, 8}, {16, 0}, {17, 8}, {17, 0}, + + /* 010110xx = 9 */ + {9, 6}, {9, 6}, {9, 6}, {9, 6}, + + /* 01011100 = 18 01011101 = 18+ 01011110 = 19 01011111 = 19+ */ + {18, 8}, {18, 0}, {19, 8}, {19, 0}, + + /* 0110xxxx = 5 */ + {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, + {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, + + /* 011100xx = 10 */ + {10, 6}, {10, 6}, {10, 6}, {10, 6}, + + /* 01110100 = 20 01110101 = 20+ 01110110 = 21 01110111 = 21+ */ + {20, 8}, {20, 0}, {21, 8}, {21, 0}, + + /* 011110xx = 11 */ + {11, 6}, {11, 6}, {11, 6}, {11, 6}, + + /* 01111100 = 22 01111101 = 22+ 01111110 = 23 01111111 = 23+ */ + {22, 8}, {22, 0}, {23, 8}, {23, 0}, + + /* 10xxxxxx = 3 */ + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, + + /* 1100xxxx = 6 */ + {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, + {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, + + /* 110100xx = 12 */ + {12, 6}, {12, 6}, {12, 6}, {12, 6}, + + /* 11010100 = 24 11010101 = 24+ 11010110 = 25 11010111 = 25+ */ + {24, 8}, {24, 0}, {25, 8}, {25, 0}, + + /* 110110xx = 13 */ + {13, 6}, {13, 6}, {13, 6}, {13, 6}, + + /* 11011100 = 26 11011101 = 26+ 11011110 = 27 11011111 = 27+ */ + {26, 8}, {26, 0}, {27, 8}, {27, 0}, + + /* 1110xxxx = 7 */ + {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, + {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, + + /* 111100xx = 14 */ + {14, 6}, {14, 6}, {14, 6}, {14, 6}, + + /* 11110100 = 28 11110101 = 28+ 11110110 = 29 11110111 = 29+ */ + {28, 8}, {28, 0}, {29, 8}, {29, 0}, + + /* 111110xx = 15 */ + {15, 6}, {15, 6}, {15, 6}, {15, 6}, + + /* 11111100 = 30 11111101 = 30+ 11111110 = 31 11111111 = 31+ */ + {30, 8}, {30, 0}, {31, 8}, {31, 0} +}; +#endif + +static unsigned int +blz_getbit(struct blz_state *bs) +{ + unsigned int bit; + + /* Check if tag is empty */ + if (!bs->bits_left--) { + /* Load next tag */ + bs->tag = (unsigned int) bs->src[0] + | ((unsigned int) bs->src[1] << 8); + bs->src += 2; + bs->bits_left = 15; + } + + /* Shift bit out of tag */ + bit = (bs->tag & 0x8000) ? 1 : 0; + bs->tag <<= 1; + + return bit; +} + +static unsigned long +blz_getgamma(struct blz_state *bs) +{ + unsigned long result = 1; + +#if !defined(BLZ_NO_LUT) + /* Decode up to 8 bits of gamma2 code using lookup if possible */ + if (bs->bits_left >= 8) { + unsigned int top8 = (bs->tag >> 8) & 0x00FF; + int shift; + + result = blz_gamma_lookup[top8][0]; + shift = (int) blz_gamma_lookup[top8][1]; + + if (shift) { + bs->tag <<= shift; + bs->bits_left -= shift; + return result; + } + + bs->tag <<= 8; + bs->bits_left -= 8; + } +#endif + + /* Input gamma2-encoded bits */ + do { + result = (result << 1) + blz_getbit(bs); + } while (blz_getbit(bs)); + + return result; +} + +unsigned long +blz_depack(const void *src, void *dst, unsigned long depacked_size) +{ + struct blz_state bs; + unsigned long dst_size = 0; + + bs.src = (const unsigned char *) src; + bs.dst = (unsigned char *) dst; + + /* Initialise to one bit left in tag; that bit is zero (a literal) */ + bs.bits_left = 1; + bs.tag = 0x4000; + + /* Main decompression loop */ + while (dst_size < depacked_size) { + if (blz_getbit(&bs)) { + /* Input match length and offset */ + unsigned long len = blz_getgamma(&bs) + 2; + unsigned long off = blz_getgamma(&bs) - 2; + + off = (off << 8) + (unsigned long) *bs.src++ + 1; + + /* Copy match */ + { + const unsigned char *p = bs.dst - off; + unsigned long i; + + for (i = len; i > 0; --i) { + *bs.dst++ = *p++; + } + } + + dst_size += len; + } + else { + /* Copy literal */ + *bs.dst++ = *bs.src++; + + dst_size++; + } + } + + /* Return decompressed size */ + return dst_size; +} + +unsigned long +blz_depack_srcsize(const void *src, void *dst, unsigned long src_size) +{ + struct blz_state bs; + unsigned long dst_size = 0; + const unsigned char *src_end = src + src_size; + + bs.src = (const unsigned char *) src; + bs.dst = (unsigned char *) dst; + + /* Initialise to one bit left in tag; that bit is zero (a literal) */ + bs.bits_left = 1; + bs.tag = 0x4000; + + /* Main decompression loop */ + while (bs.src < src_end) { + if (blz_getbit(&bs)) { + /* Input match length and offset */ + unsigned long len = blz_getgamma(&bs) + 2; + unsigned long off = blz_getgamma(&bs) - 2; + + off = (off << 8) + (unsigned long) *bs.src++ + 1; + + /* Copy match */ + { + const unsigned char *p = bs.dst - off; + unsigned long i; + + for (i = len; i > 0; --i) { + *bs.dst++ = *p++; + } + } + + dst_size += len; + } + else { + /* Copy literal */ + *bs.dst++ = *bs.src++; + + dst_size++; + } + } + + /* Return decompressed size */ + return dst_size; +} diff --git a/source/Core/lzfx/README.md b/source/Core/lzfx/README.md deleted file mode 100644 index 82420cf0..00000000 --- a/source/Core/lzfx/README.md +++ /dev/null @@ -1,79 +0,0 @@ -This directory contains file originally by other people. - - -## Simplified LZFX-based compression library - -- `lzfx.c` -- `lzfx.h` - -The above files are obtained from https://github.com/janding/lzfx (commit -448017f). It is a fork by Jan Ding (GitHub user "janding") based on the LZFX -compression library by Andrew Collette. - -### License: - -``` -LZFX is copyright (c) 2009 Andrew Collette and subject to the BSD license -(below). Original LZF copyright statement follows. - -Copyright (c) 2000-2007 Marc Alexander Lehmann - -Redistribution and use in source and binary forms, with or without modifica- -tion, are permitted provided that the following conditions are met: - - 1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - - 2. Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED -WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- -CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO -EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- -CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; -OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, -WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- -ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED -OF THE POSSIBILITY OF SUCH DAMAGE. -``` - - -## lzfx-boot - -- `lzfx-host-compress.c` (original: `lzfx-raw.c`) - -The above file is obtained from https://github.com/janding/lzfx-boot (commit -88b1596). - -### License: - -``` -BSD 2-Clause License - -Copyright (c) 2017, Jan Ding -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - -* Redistributions of source code must retain the above copyright notice, this - list of conditions and the following disclaimer. - -* Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" -AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE -FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL -DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR -SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER -CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, -OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -``` diff --git a/source/Core/lzfx/lzfx.c b/source/Core/lzfx/lzfx.c deleted file mode 100644 index 81d1560a..00000000 --- a/source/Core/lzfx/lzfx.c +++ /dev/null @@ -1,357 +0,0 @@ -/* - * Copyright (c) 2009 Andrew Collette - * http://lzfx.googlecode.com - * - * Implements an LZF-compatible compressor/decompressor based on the liblzf - * codebase written by Marc Lehmann. This code is released under the BSD - * license. License and original copyright statement follow. - * - * - * Copyright (c) 2000-2008 Marc Alexander Lehmann - * - * Redistribution and use in source and binary forms, with or without modifica- - * tion, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- - * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- - * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- - * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#include "lzfx.h" - -#define LZFX_HSIZE (1 << (LZFX_HLOG)) - -/* We need this for memset */ -#ifdef __cplusplus -#include -#else -#include -#endif - -#if __GNUC__ >= 3 -#define fx_expect_false(expr) __builtin_expect((expr) != 0, 0) -#define fx_expect_true(expr) __builtin_expect((expr) != 0, 1) -#else -#define fx_expect_false(expr) (expr) -#define fx_expect_true(expr) (expr) -#endif - -typedef unsigned char u8; -typedef const u8 * LZSTATE[LZFX_HSIZE]; - -/* Define the hash function */ -#define LZFX_FRST(p) (((p[0]) << 8) | p[1]) -#define LZFX_NEXT(v, p) (((v) << 8) | p[2]) -#define LZFX_IDX(h) (((h >> (3 * 8 - LZFX_HLOG)) - h) & (LZFX_HSIZE - 1)) - -/* These cannot be changed, as they are related to the compressed format. */ -#define LZFX_MAX_LIT (1 << 5) - 1 -#define LZFX_MAX_OFF (1 << 13) -#define LZFX_MAX_REF ((1 << 8) + (1 << 3) - 2) - -static int lzfx_getsize(const void *ibuf, unsigned int ilen, unsigned int *olen); - -/* Compressed format - - There are two kinds of structures in LZF/LZFX: literal runs and back - references. Literals are encoded as follows: - - LLLLL000 - - Back references are encoded as follows. The smallest possible encoded - length value is 1, as otherwise the control byte would be recognized as - a literal run. At least three bytes must match for a back reference - to be inserted. The offset (distance to the desired data in the output - buffer) is encoded as o - 1, as all offsets are at least 1. The binary - format is: - - oooooLLL oooooooo for backrefs of real length < 7 (1 <= L < 7) - ooooo111 LLLLLLLL oooooooo for backrefs of real length >= 7 (L >= 7) -*/ -int lzfx_compress(const void *const ibuf, const unsigned int ilen, void *obuf, unsigned int *const olen) { - - /* Hash table; an array of u8*'s which point - to various locations in the input buffer */ - const u8 *htab[LZFX_HSIZE]; - - const u8 ** hslot; /* Pointer to entry in hash table */ - unsigned int hval; /* Hash value generated by macros above */ - const u8 * ref; /* Pointer to candidate match location in input */ - - const u8 * ip = (const u8 *)ibuf; - const u8 *const in_end = ip + ilen; - - u8 * op = (u8 *)obuf; - const u8 *const out_end = (olen == NULL ? NULL : op + *olen); - - int lit; /* # of bytes in current literal run */ - -#if defined(WIN32) && defined(_M_X64) - unsigned _int64 off; /* workaround for missing POSIX compliance */ -#else - unsigned long off; -#endif - - if (olen == NULL) - return LZFX_EARGS; - if (ibuf == NULL) { - if (ilen != 0) - return LZFX_EARGS; - *olen = 0; - return 0; - } - if (obuf == NULL) - return LZFX_EARGS; - - memset(htab, 0, sizeof(htab)); - - /* Start a literal run. Whenever we do this the output pointer is - advanced because the current byte will hold the encoded length. */ - lit = 0; - op++; - - hval = LZFX_FRST(ip); - - while (ip + 2 < in_end) { /* The NEXT macro reads 2 bytes ahead */ - - hval = LZFX_NEXT(hval, ip); - hslot = htab + LZFX_IDX(hval); - - ref = *hslot; - *hslot = ip; - - if (ref < ip && (off = ip - ref - 1) < LZFX_MAX_OFF && ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */ - && ref > (u8 *)ibuf && ref[0] == ip[0] && ref[1] == ip[1] && ref[2] == ip[2]) { - - unsigned int len = 3; /* We already know 3 bytes match */ - const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ? LZFX_MAX_REF : in_end - ip - 2; - - /* lit == 0: op + 3 must be < out_end (because we undo the run) - lit != 0: op + 3 + 1 must be < out_end */ - if (fx_expect_false(op - !lit + 3 + 1 >= out_end)) - return LZFX_ESIZE; - - op[-lit - 1] = lit << 3; /* Terminate literal run */ - op -= !lit; /* Undo run if length is zero */ - - /* Start checking at the fourth byte */ - while (len < maxlen && ref[len] == ip[len]) - len++; - - /* Format 1: [oooooLLL oooooooo] */ - if (len < 7) { - *op++ = ((off >> 8) << 3) + len; - *op++ = off; - - /* Format 2: [ooooo111 LLLLLLLL oooooooo] */ - } else { - *op++ = ((off >> 8) << 3) + 7; - *op++ = len - 7; - *op++ = off; - } - - lit = 0; - op++; - - ip += len - 1; /* ip = initial ip + #octets - 1 */ - - if (fx_expect_false(ip + 3 >= in_end)) { - ip++; /* Code following expects exit at bottom of loop */ - break; - } - - hval = LZFX_FRST(ip); - hval = LZFX_NEXT(hval, ip); - htab[LZFX_IDX(hval)] = ip; - - ip++; /* ip = initial ip + #octets */ - - } else { - /* Keep copying literal bytes */ - - if (fx_expect_false(op >= out_end)) - return LZFX_ESIZE; - - lit++; - *op++ = *ip++; - - if (fx_expect_false(lit == LZFX_MAX_LIT)) { - op[-lit - 1] = lit << 3; /* stop run */ - lit = 0; - op++; /* start run */ - } - - } /* if() found match in htab */ - - } /* while(ip < ilen -2) */ - - /* At most 3 bytes remain in input. We therefore need 4 bytes available - in the output buffer to store them (3 data + ctrl byte).*/ - if (op + 3 > out_end) - return LZFX_ESIZE; - - while (ip < in_end) { - - lit++; - *op++ = *ip++; - - if (fx_expect_false(lit == LZFX_MAX_LIT)) { - op[-lit - 1] = lit << 3; - lit = 0; - op++; - } - } - - op[-lit - 1] = lit << 3; - op -= !lit; - - *olen = op - (u8 *)obuf; - return 0; -} - -/* Decompressor */ -int lzfx_decompress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen) { - - u8 const * ip = (const u8 *)ibuf; - u8 const *const in_end = ip + ilen; - u8 * op = (u8 *)obuf; - u8 const *const out_end = (olen == NULL ? NULL : op + *olen); - - unsigned int remain_len = 0; - int rc; - - if (olen == NULL) - return LZFX_EARGS; - if (ibuf == NULL) { - if (ilen != 0) - return LZFX_EARGS; - *olen = 0; - return 0; - } - if (obuf == NULL) { - if (olen != 0) - return LZFX_EARGS; - return lzfx_getsize(ibuf, ilen, olen); - } - - do { - unsigned int ctrl = *ip++; - - /* Format LLLLL000: a literal byte string follows, of length L */ - if ((ctrl & 0x7) == 0) { - unsigned int len = ctrl >> 3; - - if (fx_expect_false(op + len > out_end)) { - --ip; /* Rewind to control byte */ - goto guess; - } - if (fx_expect_false(ip + len > in_end)) - return LZFX_ECORRUPT; - - do - *op++ = *ip++; - while (--len); - - /* Format #1 [oooooLLL oooooooo]: backref of length L+1 - ^^^^^ ^^^^^^^^ - A B - #2 [ooooo111 LLLLLLLL oooooooo] backref of length L+7 - ^^^^^ ^^^^^^^^ - A B - In both cases the location of the backref is computed from the - remaining part of the data as follows: - - location = op - A*256 - B - 1 - */ - } else { - - unsigned int len = ctrl & 0x7; - u8 * ref = op - ((ctrl >> 3) << 8) - 1; - - if (len == 7) - len += *ip++; /* i.e. format #2 */ - - if (fx_expect_false(op + len > out_end)) { - ip -= (len >= 7) ? 2 : 1; /* Rewind to control byte */ - goto guess; - } - if (fx_expect_false(ip >= in_end)) - return LZFX_ECORRUPT; - - ref -= *ip++; - - if (fx_expect_false(ref < (u8 *)obuf)) - return LZFX_ECORRUPT; - - do - *op++ = *ref++; - while (--len); - } - - } while (ip < in_end); - - *olen = op - (u8 *)obuf; - - return 0; - -guess: - rc = lzfx_getsize(ip, ilen - (ip - (u8 *)ibuf), &remain_len); - if (rc >= 0) - *olen = remain_len + (op - (u8 *)obuf); - return rc; -} - -/* Guess len. No parameters may be NULL; this is not checked. */ -static int lzfx_getsize(const void *ibuf, unsigned int ilen, unsigned int *olen) { - - u8 const * ip = (const u8 *)ibuf; - u8 const *const in_end = ip + ilen; - int tot_len = 0; - - while (ip < in_end) { - - unsigned int ctrl = *ip++; - - if ((ctrl & 0x7) == 0) { - - if (ip + (ctrl >> 3) > in_end) - return LZFX_ECORRUPT; - - tot_len += (ctrl >> 3); - ip += (ctrl >> 3); - - } else { - - unsigned int len = ctrl & 0x7; - - if (len == 7) { /* i.e. format #2 */ - len += *ip++; - } - - if (ip >= in_end) - return LZFX_ECORRUPT; - - ip++; /* skip the ref byte */ - - tot_len += len; - } - } - - *olen = tot_len; - - return 0; -} diff --git a/source/Core/lzfx/lzfx.h b/source/Core/lzfx/lzfx.h deleted file mode 100644 index 7b6e472e..00000000 --- a/source/Core/lzfx/lzfx.h +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2009 Andrew Collette - * http://lzfx.googlecode.com - * - * Implements an LZF-compatible compressor/decompressor based on the liblzf - * codebase written by Marc Lehmann. This code is released under the BSD - * license. License and original copyright statement follow. - * - * - * Copyright (c) 2000-2008 Marc Alexander Lehmann - * - * Redistribution and use in source and binary forms, with or without modifica- - * tion, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * - * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED - * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER- - * CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO - * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE- - * CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- - * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED - * OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef LZFX_H -#define LZFX_H - -#ifdef __cplusplus -extern "C" { -#endif - -/* Documented behavior, including function signatures and error codes, - is guaranteed to remain unchanged for releases with the same major - version number. Releases of the same major version are also able - to read each other's output, although the output itself is not - guaranteed to be byte-for-byte identical. -*/ -#define LZFX_VERSION_MAJOR 0 -#define LZFX_VERSION_MINOR 1 -#define LZFX_VERSION_STRING "0.1" - -/* Hashtable size (2**LZFX_HLOG entries) */ -#ifndef LZFX_HLOG -#define LZFX_HLOG 16 -#endif - -/* Predefined errors. */ -#define LZFX_ESIZE -1 /* Output buffer too small */ -#define LZFX_ECORRUPT -2 /* Invalid data for decompression */ -#define LZFX_EARGS -3 /* Arguments invalid (NULL) */ - -/* Buffer-to buffer compression. - - Supply pre-allocated input and output buffers via ibuf and obuf, and - their size in bytes via ilen and olen. Buffers may not overlap. - - On success, the function returns a non-negative value and the argument - olen contains the compressed size in bytes. On failure, a negative - value is returned and olen is not modified. -*/ -int lzfx_compress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen); - -/* Buffer-to-buffer decompression. - - Supply pre-allocated input and output buffers via ibuf and obuf, and - their size in bytes via ilen and olen. Buffers may not overlap. - - On success, the function returns a non-negative value and the argument - olen contains the uncompressed size in bytes. On failure, a negative - value is returned. - - If the failure code is LZFX_ESIZE, olen contains the minimum buffer size - required to hold the decompressed data. Otherwise, olen is not modified. - - Supplying a zero *olen is a valid and supported strategy to determine the - required buffer size. This does not require decompression of the entire - stream and is consequently very fast. Argument obuf may be NULL in - this case only. -*/ -int lzfx_decompress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen); - -#ifdef __cplusplus -} /* extern "C" */ -#endif - -#endif diff --git a/source/Makefile b/source/Makefile index b86c3c88..c6a4ebad 100644 --- a/source/Makefile +++ b/source/Makefile @@ -12,6 +12,17 @@ endif ALL_LANGUAGES=BG CS DA DE EN ES FI FR HR HU IT JA_JP LT NL NL_BE NO PL PT RU SK SL SR_CYRL SR_LATN SV TR UK YUE_HK ZH_CN ZH_TW +LANGUAGE_GROUP_CJK_LANGS=EN JA_JP YUE_HK ZH_TW ZH_CN +LANGUAGE_GROUP_CJK_NAME=Chinese+Japanese + +LANGUAGE_GROUP_CYRILLIC_LANGS=EN BG RU SR_CYRL SR_LATN UK +LANGUAGE_GROUP_CYRILLIC_NAME=Bulgarian+Russian+Serbian+Ukrainian + +LANGUAGE_GROUP_EUR_LANGS=EN $(filter-out $(LANGUAGE_GROUP_CJK_LANGS) $(LANGUAGE_GROUP_CYRILLIC_LANGS),$(ALL_LANGUAGES)) +LANGUAGE_GROUP_EUR_NAME=European + +LANGUAGE_GROUPS=CJK CYRILLIC EUR + # Defines for host tools ifeq ($(HOST_CC),) @@ -22,7 +33,7 @@ HOST_OUTPUT_DIR=Objects/host # Enumerate all of the include directories APP_INC_DIR = ./Core/Inc -LZFX_INC_DIR = ./Core/lzfx +BRIEFLZ_INC_DIR = ./Core/brieflz MINIWARE_INC_CMSIS_DEVICE = ./Core/BSP/Miniware/Vendor/CMSIS/Device/ST/STM32F1xx/Include MINIWARE_CMSIS_CORE_INC_DIR = ./Core/BSP/Miniware/Vendor/CMSIS/Include MINIWARE_HAL_INC_DIR = ./Core/BSP/Miniware/Vendor/STM32F1xx_HAL_Driver/Inc @@ -42,16 +53,15 @@ PINE_NMSIS_INC_DIR = ./Core/BSP/Pine64/Vendor/NMSIS/Core/Include PINE_FREERTOS_PORT_INC_DIR = ./Core/BSP/Pine64/Vendor/OS/FreeRTOS/Source/portable/GCC SOURCE_THREADS_DIR = ./Core/Threads SOURCE_CORE_DIR = ./Core/Src -SOURCE_LZFX_DIR = ./Core/lzfx +SOURCE_BRIEFLZ_DIR = ./Core/brieflz SOURCE_DRIVERS_DIR = ./Core/Drivers INC_PD_DRIVERS_DIR = ./Core/Drivers/FUSB302 SOURCE_MIDDLEWARES_DIR = ./Middlewares # Find-all's used for formatting -ALL_INCLUDES = $(shell find ./Core -type f -name '*.h') \ - $(shell find ./Core -type f -name '*.hpp') +ALL_INCLUDES = $(shell find ./Core -path $(BRIEFLZ_INC_DIR) -prune -false -o \( -type f \( -name '*.h' -o -name '*.hpp' \) \) ) + +ALL_SOURCE = $(shell find ./Core -path $(SOURCE_BRIEFLZ_DIR) -prune -false -o \( -type f \( -name '*.c' -o -name '*.cpp' \) \) ) -ALL_SOURCE = $(shell find ./Core -type f -name '*.c') \ - $(shell find ./Core -type f -name '*.cpp') # Device dependent settings ifeq ($(model),$(filter $(model),$(ALL_MINIWARE_MODELS))) $(info Building for Miniware ) @@ -110,7 +120,7 @@ DEV_CXXFLAGS= -MMD -MP -MF "$(@:%.o=%.d)" -MT "$@" endif INCLUDES = -I$(APP_INC_DIR) \ - -I$(LZFX_INC_DIR) \ + -I$(BRIEFLZ_INC_DIR) \ -I$(FRTOS_CMIS_INC_DIR) \ -I$(FRTOS_INC_DIR) \ -I$(DRIVER_INC_DIR) \ @@ -118,14 +128,13 @@ INCLUDES = -I$(APP_INC_DIR) \ -I$(THREADS_INC_DIR) \ -I$(INC_PD_DRIVERS_DIR) \ $(DEVICE_INCLUDES) - -TRANSLATION_FILES=$(wildcard ../../Translations/translation_*.json) + SOURCE := $(shell find $(SOURCE_THREADS_DIR) -type f -name '*.c') \ $(shell find $(SOURCE_CORE_DIR) -type f -name '*.c') \ $(shell find $(SOURCE_DRIVERS_DIR) -type f -name '*.c') \ $(shell find $(DEVICE_BSP_DIR) -type f -name '*.c') \ $(shell find $(SOURCE_MIDDLEWARES_DIR) -type f -name '*.c') \ -$(SOURCE_LZFX_DIR)/lzfx.c +$(SOURCE_BRIEFLZ_DIR)/depack.c SOURCE_CPP := $(shell find $(SOURCE_THREADS_DIR) -type f -name '*.cpp') \ $(shell find $(SOURCE_CORE_DIR) -type f -name '*.cpp') \ $(shell find $(SOURCE_DRIVERS_DIR) -type f -name '*.cpp') \ @@ -310,31 +319,37 @@ all: $(ALL_FIRMWARE_TARGETS) $(HEXFILE_DIR)/$(model)_%.elf : \ $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ $(OUTPUT_DIR)/Core/Gen/Translation.%.o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \ Makefile $(LDSCRIPT) @test -d $(@D) || mkdir -p $(@D) @echo Linking $@ @$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ $(OUTPUT_DIR)/Core/Gen/Translation.$*.o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \ $(LIBS) $(LINKER_FLAGS) -o$@ -Wl,-Map=$@.map $(HEXFILE_DIR)/$(model)_string_compressed_%.elf : \ $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ - $(OUTPUT_DIR)/Core/Gen/Translation_lzfx.%.o \ + $(OUTPUT_DIR)/Core/Gen/Translation_brieflz.%.o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \ Makefile $(LDSCRIPT) @test -d $(@D) || mkdir -p $(@D) @echo Linking $@ @$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ - $(OUTPUT_DIR)/Core/Gen/Translation_lzfx.$*.o \ + $(OUTPUT_DIR)/Core/Gen/Translation_brieflz.$*.o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \ $(LIBS) $(LINKER_FLAGS) -o$@ -Wl,-Map=$@.map $(HEXFILE_DIR)/$(model)_font_compressed_%.elf : \ $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ - $(OUTPUT_DIR)/Core/Gen/Translation_lzfx_font.%.o \ + $(OUTPUT_DIR)/Core/Gen/Translation_brieflz_font.%.o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \ Makefile $(LDSCRIPT) @test -d $(@D) || mkdir -p $(@D) @echo Linking $@ @$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ - $(OUTPUT_DIR)/Core/Gen/Translation_lzfx_font.$*.o \ + $(OUTPUT_DIR)/Core/Gen/Translation_brieflz_font.$*.o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_single.o \ $(LIBS) $(LINKER_FLAGS) -o$@ -Wl,-Map=$@.map $(OUT_OBJS): $(OUTPUT_DIR)/%.o : %.c Makefile @@ -374,38 +389,95 @@ $(OUTPUT_DIR)/Core/Gen/translation.files/%.o: Core/Gen/Translation.%.cpp @echo Generating $@ @$(CPP) -c $(filter-out -flto -g3,$(CXXFLAGS)) $< -o $@ -$(HOST_OUTPUT_DIR)/lzfx/liblzfx.so: Core/lzfx/lzfx.c +$(OUTPUT_DIR)/Core/Gen/translation.files/multi.%.o: Core/Gen/Translation_multi.%.cpp @test -d $(@D) || mkdir -p $(@D) - @echo Building host lzfx shared library $@ - @$(HOST_CC) -Wno-unused-result -fPIC -shared -O $^ -o $@ + @echo Generating $@ + @$(CPP) -c $(filter-out -flto -g3,$(CXXFLAGS)) $< -o $@ -$(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.bin: $(OUTPUT_DIR)/Core/Gen/translation.files/%.o - @echo Dumping translation strings data from $< - @# Extract the raw strings data from the object file - @$(OBJCOPY) -O binary -j .rodata._ZL18TranslationIndices $< $(@D)/$*.data.TranslationIndices.bin - @test -s $(@D)/$*.data.TranslationIndices.bin || (rm $(@D)/$*.data.TranslationIndices.bin; echo 'ERROR: Output for .rodata._ZL18TranslationIndices is empty!' >&2; false) - @$(OBJCOPY) -O binary -j .rodata._ZL22TranslationStringsData $< $(@D)/$*.data.TranslationStrings.bin - @test -s $(@D)/$*.data.TranslationStrings.bin || (rm $(@D)/$*.data.TranslationStrings.bin; echo 'ERROR: Output for .rodata._ZL22TranslationStringsData is empty!' >&2; false) - @cat $(@D)/$*.data.TranslationIndices.bin $(@D)/$*.data.TranslationStrings.bin > $@ - -Core/Gen/Translation_lzfx.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.strings.bin $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/lzfx/liblzfx.so +$(HOST_OUTPUT_DIR)/brieflz/libbrieflz.so: Core/brieflz/brieflz.c Core/brieflz/depack.c @test -d $(@D) || mkdir -p $(@D) - @echo Generating lzfx compressed translation for $* - @python3 ../Translations/make_translation.py \ - -o $(PWD)/Core/Gen/Translation_lzfx.$*.cpp \ + @echo Building host brieflz shared library $@ + @$(HOST_CC) -fPIC -shared -DBLZ_DLL -DBLZ_DLL_EXPORTS -O $^ -o $@ + +Core/Gen/Translation_brieflz.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.o $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/brieflz/libbrieflz.so + @test -d $(@D) || mkdir -p $(@D) + @echo Generating BriefLZ compressed translation for $* + @OBJCOPY=$(OBJCOPY) python3 ../Translations/make_translation.py \ + -o $(PWD)/Core/Gen/Translation_brieflz.$*.cpp \ --input-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/$*.pickle \ - --strings-bin $(OUTPUT_DIR)/Core/Gen/translation.files/$*.strings.bin \ + --strings-obj $(OUTPUT_DIR)/Core/Gen/translation.files/$*.o \ $* -Core/Gen/Translation_lzfx_font.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/lzfx/liblzfx.so +Core/Gen/Translation_brieflz_font.%.cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/%.pickle $(HOST_OUTPUT_DIR)/brieflz/libbrieflz.so @test -d $(@D) || mkdir -p $(@D) - @echo Generating lzfx compressed translation for $* + @echo Generating BriefLZ compressed translation for $* @python3 ../Translations/make_translation.py \ - -o $(PWD)/Core/Gen/Translation_lzfx_font.$*.cpp \ + -o $(PWD)/Core/Gen/Translation_brieflz_font.$*.cpp \ --input-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/$*.pickle \ --compress-font \ $* +# +# The recipes to produce multi-language firmwares: +# + +# Usage: $(eval $(call multi_lang_rule,$(1)=group_code,$(2)=group_name,$(3)=lang_codes)) +define multi_lang_rule + +$(HEXFILE_DIR)/$(model)_multi_$(2).elf : \ + $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ + $(OUTPUT_DIR)/Core/Gen/Translation_multi.$(1).o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_multi.o \ + Makefile $(LDSCRIPT) + @test -d $$(@D) || mkdir -p $$(@D) + @echo Linking $$@ + @$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ + $(OUTPUT_DIR)/Core/Gen/Translation_multi.$(1).o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_multi.o \ + $(LIBS) $(LINKER_FLAGS) -o$$@ -Wl,-Map=$$@.map + +$(HEXFILE_DIR)/$(model)_multi_compressed_$(2).elf : \ + $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ + $(OUTPUT_DIR)/Core/Gen/Translation_brieflz_multi.$(1).o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_multi.o \ + Makefile $(LDSCRIPT) + @test -d $$(@D) || mkdir -p $$(@D) + @echo Linking $$@ + @$(CPP) $(CXXFLAGS) $(OUT_OBJS_S) $(OUT_OBJS) $(OUT_OBJS_CPP) \ + $(OUTPUT_DIR)/Core/Gen/Translation_brieflz_multi.$(1).o \ + $(OUTPUT_DIR)/Core/LangSupport/lang_multi.o \ + $(LIBS) $(LINKER_FLAGS) -o$$@ -Wl,-Map=$$@.map + +Core/Gen/Translation_multi.$(1).cpp: $(patsubst %,../Translations/translation_%.json,$(3)) \ + ../Translations/make_translation.py \ + ../Translations/translations_def.js \ + ../Translations/font_tables.py \ + Makefile ../Translations/wqy-bitmapsong/wenquanyi_9pt.bdf + @test -d Core/Gen || mkdir -p Core/Gen + @test -d $(OUTPUT_DIR)/Core/Gen/translation.files || mkdir -p $(OUTPUT_DIR)/Core/Gen/translation.files + @echo 'Generating translations for multi-language $(2)' + @python3 ../Translations/make_translation.py \ + -o $(PWD)/Core/Gen/Translation_multi.$(1).cpp \ + --output-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).pickle \ + $(3) + +$(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).pickle: Core/Gen/Translation_multi.$(1).cpp + +Core/Gen/Translation_brieflz_multi.$(1).cpp: $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).o $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).pickle $(HOST_OUTPUT_DIR)/brieflz/libbrieflz.so + @test -d $$(@D) || mkdir -p $$(@D) + @echo Generating BriefLZ compressed translation for multi-language $(2) + @OBJCOPY=$(OBJCOPY) python3 ../Translations/make_translation.py \ + -o $(PWD)/Core/Gen/Translation_brieflz_multi.$(1).cpp \ + --input-pickled $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).pickle \ + --strings-obj $(OUTPUT_DIR)/Core/Gen/translation.files/multi.$(1).o \ + --compress-font \ + $(3) + +endef # multi_lang_rule + + +# Add multi-language firmware rules: +$(foreach group_code,$(LANGUAGE_GROUPS),$(eval $(call multi_lang_rule,$(group_code),$(LANGUAGE_GROUP_$(group_code)_NAME),$(LANGUAGE_GROUP_$(group_code)_LANGS)))) clean :