[RFC] Multi-language firmware (second try) (#941)

* Impl. sectioned font table in firmware

* make_translation.py: Extract build_symbol_conversion_table function

* Put translation indices and strings in a struct

* Move translation objcopy step to Python

* Impl. multi-language firmware demo

* Impl. strings-compressed multi-lang firmware demo

* Add font compression to multi-lang demo

* Refactor Makefile a bit

* Fix rules for make < 4.3

* Add more multi-lang groups

* Add Pinecil multi-lang CI build

* Add lzfx compression license text

* Remote multi-language demo group

* Fix build after merge

* Import code from BriefLZ

* Change brieflz for our use case

* Change compression to use brieflz

* Remove lzfx code

* Update license file for brieflz

* Exclude brieflz files from format check

* Add BriefLZ test
This commit is contained in:
alvinhochun
2021-04-30 16:51:13 +08:00
committed by GitHub
parent a51e3e9f03
commit 1a0b542ae6
57 changed files with 3447 additions and 767 deletions

View File

@@ -50,7 +50,7 @@ __ILM_RAM_SIZE = 0x00010000;
* </h>
*/
__RAM_BASE = 0x20000000;
__RAM_SIZE = 0x00005000;
__RAM_SIZE = 0x00006800;
/********************* Stack / Heap Configuration ****************************
* <h> Stack / Heap Configuration

View File

@@ -113,35 +113,49 @@ void OLED::setFramebuffer(uint8_t *buffer) {
* Precursor is the command char that is used to select the table.
*/
void OLED::drawChar(const uint16_t charCode, const FontStyle fontStyle) {
const uint8_t *currentFont;
static uint8_t fontWidth, fontHeight;
uint16_t index;
switch (fontStyle) {
case FontStyle::SMALL:
currentFont = Font_6x8;
fontHeight = 8;
fontWidth = 6;
break;
case FontStyle::EXTRAS:
currentFont = ExtraFontChars;
index = charCode;
fontHeight = 16;
fontWidth = 12;
break;
case FontStyle::SMALL:
case FontStyle::LARGE:
default:
currentFont = Font_12x16;
fontHeight = 16;
fontWidth = 12;
if (charCode == '\x01' && cursor_y == 0) { // 0x01 is used as new line char
setCursor(0, 8);
return;
} else if (charCode <= 0x01) {
return;
}
currentFont = nullptr;
index = 0;
switch (fontStyle) {
case FontStyle::SMALL:
fontHeight = 8;
fontWidth = 6;
break;
case FontStyle::LARGE:
default:
fontHeight = 16;
fontWidth = 12;
break;
}
for (uint32_t i = 0; i < FontSectionsCount; i++) {
const auto &section = FontSections[i];
if (charCode >= section.symbol_start && charCode < section.symbol_end) {
currentFont = fontStyle == FontStyle::SMALL ? section.font06_start_ptr : section.font12_start_ptr;
index = charCode - section.symbol_start;
break;
}
}
break;
}
if (charCode == '\x01' && cursor_y == 0) { // 0x01 is used as new line char
setCursor(0, 8);
return;
} else if (charCode <= 0x01) {
return;
}
// First index is \x02
const uint16_t index = charCode - 2;
const uint8_t *charPointer = currentFont + ((fontWidth * (fontHeight / 8)) * index);
drawArea(cursor_x, cursor_y, fontWidth, fontHeight, charPointer);
cursor_x += fontWidth;
@@ -348,7 +362,7 @@ void OLED::debugNumber(int32_t val, FontStyle fontStyle) {
void OLED::drawSymbol(uint8_t symbolID) {
// draw a symbol to the current cursor location
drawChar(symbolID + 2, FontStyle::EXTRAS);
drawChar(symbolID, FontStyle::EXTRAS);
}
// Draw an area, but y must be aligned on 0/8 offset

View File

@@ -10,7 +10,7 @@
#ifndef SETTINGS_H_
#define SETTINGS_H_
#include <stdint.h>
#define SETTINGSVERSION (0x29)
#define SETTINGSVERSION (0x2A)
/*Change this if you change the struct below to prevent people getting \
out of sync*/
@@ -59,6 +59,7 @@ typedef struct {
uint8_t hallEffectSensitivity; // Operating mode of the hall effect sensor
uint8_t accelMissingWarningCounter; // Counter of how many times we have warned we cannot detect the accelerometer
uint8_t pdMissingWarningCounter; // Counter of how many times we have warned we cannot detect the pd interface
char uiLanguage[8]; // Selected UI Language code, null-terminated *only if* the length is less than 8 chars
uint32_t padding; // This is here for in case we are not an even divisor so
// that nothing gets cut off

View File

@@ -57,6 +57,7 @@ enum class SettingsItemIndex : uint8_t {
AnimSpeed,
PowerPulseWait,
PowerPulseDuration,
LanguageSwitch,
NUM_ITEMS,
};
@@ -110,11 +111,28 @@ struct TranslationIndexTable {
uint16_t SettingsMenuEntriesDescriptions[5]; // unused
};
extern const TranslationIndexTable *const Tr;
extern const char *const TranslationStrings;
extern const TranslationIndexTable *Tr;
extern const char * TranslationStrings;
extern const uint8_t *const Font_12x16;
extern const uint8_t *const Font_6x8;
struct TranslationData {
TranslationIndexTable indices;
// Translation strings follows the translation index table.
// C++ does not support flexible array member as in C, so we use a 1-element
// array as a placeholder.
char strings[1];
};
struct FontSection {
/// Start index of font section, inclusive
uint16_t symbol_start;
/// End index of font section, exclusive
uint16_t symbol_end;
const uint8_t *font12_start_ptr;
const uint8_t *font06_start_ptr;
};
extern const FontSection *const FontSections;
extern const uint8_t FontSectionsCount;
constexpr uint8_t settings_item_index(const SettingsItemIndex i) { return static_cast<uint8_t>(i); }
// Use a constexpr function for type-checking.
@@ -123,5 +141,7 @@ constexpr uint8_t settings_item_index(const SettingsItemIndex i) { return static
const char *translatedString(uint16_t index);
void prepareTranslations();
bool settings_displayLanguageSwitch(void);
bool settings_setLanguageSwitch(void);
#endif /* TRANSLATION_H_ */

View File

@@ -0,0 +1,42 @@
#ifndef TRANSLATION_MULTI_H_
#define TRANSLATION_MULTI_H_
#include "Translation.h"
// The compressed translation data will be decompressed to this buffer. These
// data may include:
// - TranslationData (translation index table and translation strings)
// - Font table(s)
// The translation index table consists of uint16_t (half words) which has a
// 2-byte alignment. Therefore, the declaration of this buffer must include
// the alignment specifier `alignas(TranslationData)` to satisfy its alignment.
// TranslationData must always be decompressed to the start of this buffer.
extern uint8_t translation_data_out_buffer[];
extern const uint16_t translation_data_out_buffer_size;
struct FontSectionDataInfo {
uint16_t symbol_start;
uint16_t symbol_count;
uint16_t data_size : 15;
bool data_is_compressed : 1;
// Font12x16 data followed by font6x8 data
const uint8_t *data_ptr;
};
extern const FontSectionDataInfo FontSectionDataInfos[];
extern const uint8_t FontSectionDataCount;
extern FontSection DynamicFontSections[];
struct LanguageMeta {
char code[8];
const uint8_t *translation_data;
uint16_t translation_size : 15;
bool translation_is_compressed : 1;
};
extern const LanguageMeta LanguageMetas[];
extern const uint8_t LanguageCount;
#endif /* TRANSLATION_MULTI_H_ */

View File

@@ -0,0 +1,92 @@
#include "OLED.hpp"
#include "Translation.h"
#include "Translation_multi.h"
#include "brieflz.h"
#include "configuration.h"
#include "gui.hpp"
const TranslationIndexTable *Tr = nullptr;
const char * TranslationStrings = nullptr;
static uint8_t selectedLangIndex = 255;
static void initSelectedLanguageIndex() {
if (selectedLangIndex == 255) {
const char *lang = const_cast<char *>(systemSettings.uiLanguage);
for (size_t i = 0; i < LanguageCount; i++) {
if (strncmp(lang, LanguageMetas[i].code, sizeof(systemSettings.uiLanguage)) == 0) {
selectedLangIndex = i;
return;
}
}
// No match, use the first language.
selectedLangIndex = 0;
}
}
static void writeSelectedLanguageToSettings() {
char *lang = const_cast<char *>(systemSettings.uiLanguage);
strncpy(lang, LanguageMetas[selectedLangIndex].code, sizeof(systemSettings.uiLanguage));
}
void prepareTranslations() {
initSelectedLanguageIndex();
if (selectedLangIndex >= LanguageCount) {
// This shouldn't happen.
return;
}
const LanguageMeta &langMeta = LanguageMetas[selectedLangIndex];
const TranslationData *translationData;
uint16_t buffer_remaining_size = translation_data_out_buffer_size;
uint8_t * buffer_next_ptr = translation_data_out_buffer;
if (langMeta.translation_is_compressed) {
unsigned int outsize;
outsize = blz_depack_srcsize(langMeta.translation_data, buffer_next_ptr, langMeta.translation_size);
translationData = reinterpret_cast<const TranslationData *>(buffer_next_ptr);
buffer_remaining_size -= outsize;
buffer_next_ptr += outsize;
} else {
translationData = reinterpret_cast<const TranslationData *>(langMeta.translation_data);
}
Tr = &translationData->indices;
TranslationStrings = translationData->strings;
memset(DynamicFontSections, 0, FontSectionsCount * sizeof(DynamicFontSections[0]));
for (int i = 0; i < FontSectionDataCount; i++) {
const auto &fontSectionDataInfo = FontSectionDataInfos[i];
auto & fontSection = DynamicFontSections[i];
fontSection.symbol_start = fontSectionDataInfo.symbol_start;
fontSection.symbol_end = fontSection.symbol_start + fontSectionDataInfo.symbol_count;
const uint16_t font12_size = fontSectionDataInfo.symbol_count * (12 * 16 / 8);
uint16_t dataSize;
if (fontSectionDataInfo.data_is_compressed) {
unsigned int outsize;
outsize = blz_depack_srcsize(fontSectionDataInfo.data_ptr, buffer_next_ptr, fontSectionDataInfo.data_size);
fontSection.font12_start_ptr = buffer_next_ptr;
dataSize = outsize;
buffer_remaining_size -= outsize;
buffer_next_ptr += outsize;
} else {
fontSection.font12_start_ptr = fontSectionDataInfo.data_ptr;
dataSize = fontSectionDataInfo.data_size;
}
if (dataSize > font12_size) {
fontSection.font06_start_ptr = fontSection.font12_start_ptr + font12_size;
}
}
}
bool settings_setLanguageSwitch(void) {
selectedLangIndex = (selectedLangIndex + 1) % LanguageCount;
writeSelectedLanguageToSettings();
prepareTranslations();
return selectedLangIndex == (LanguageCount - 1);
}
bool settings_displayLanguageSwitch(void) {
OLED::printWholeScreen(translatedString(Tr->SettingsShortNames[static_cast<uint8_t>(SettingsItemIndex::LanguageSwitch)]));
return false;
}

View File

@@ -0,0 +1,7 @@
#include "Translation.h"
bool settings_setLanguageSwitch(void) { return false; }
bool settings_displayLanguageSwitch(void) {
return true; // skip
}

View File

@@ -144,6 +144,7 @@ const menuitem rootSettingsMenu[]{
{0, settings_enterPowerSavingMenu, settings_displayPowerSavingMenu}, /*Sleep Options Menu*/
{0, settings_enterUIMenu, settings_displayUIMenu}, /*UI Menu*/
{0, settings_enterAdvancedMenu, settings_displayAdvancedMenu}, /*Advanced Menu*/
{0, settings_setLanguageSwitch, settings_displayLanguageSwitch}, /*Language Switch*/
{0, nullptr, nullptr} // end of menu marker. DO NOT REMOVE
};

View File

@@ -0,0 +1,41 @@
This directory contains file originally by other people.
## BriefLZ
- `brieflz_btparse.h`
- `brieflz_hashbucket.h`
- `brieflz_lazy.h`
- `brieflz_leparse.h`
- `brieflz.c`
- `depack.c`
The above files are originally obtained from https://github.com/jibsen/brieflz
(commit 0ab07a5).
### License:
```
The zlib License (Zlib)
Copyright (c) 2002-2020 Joergen Ibsen
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must
not claim that you wrote the original software. If you use this
software in a product, an acknowledgment in the product
documentation would be appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must
not be misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
```

View File

@@ -0,0 +1,659 @@
//
// BriefLZ - small fast Lempel-Ziv
//
// C packer
//
// Copyright (c) 2002-2020 Joergen Ibsen
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must
// not claim that you wrote the original software. If you use this
// software in a product, an acknowledgment in the product
// documentation would be appreciated but is not required.
//
// 2. Altered source versions must be plainly marked as such, and must
// not be misrepresented as being the original software.
//
// 3. This notice may not be removed or altered from any source
// distribution.
//
#include "brieflz.h"
#include <assert.h>
#include <limits.h>
#include <stdint.h>
#if _MSC_VER >= 1400
# include <intrin.h>
# define BLZ_BUILTIN_MSVC
#elif defined(__clang__) && defined(__has_builtin)
# if __has_builtin(__builtin_clz)
# define BLZ_BUILTIN_GCC
# endif
#elif __GNUC__ > 3 || (__GNUC__ == 3 && __GNUC_MINOR__ >= 4)
# define BLZ_BUILTIN_GCC
#endif
// Type used to store values in workmem.
//
// This is used to store positions and lengths, so src_size has to be within
// the range of this type.
//
typedef uint32_t blz_word;
#define BLZ_WORD_MAX UINT32_MAX
// Number of bits of hash to use for lookup.
//
// The size of the lookup table (and thus workmem) depends on this.
//
// Values between 10 and 18 work well. Lower values generally make compression
// speed faster but ratio worse. The default value 17 (128k entries) is a
// compromise.
//
#ifndef BLZ_HASH_BITS
# define BLZ_HASH_BITS 17
#endif
#define LOOKUP_SIZE (1UL << BLZ_HASH_BITS)
#define NO_MATCH_POS ((blz_word) -1)
// Internal data structure
struct blz_state {
unsigned char *next_out;
unsigned char *tag_out;
unsigned int tag;
int bits_left;
};
#if !defined(BLZ_NO_LUT)
static const unsigned short blz_gamma_lookup[512][2] = {
{0, 0},
{0, 0},
{0x00, 2}, {0x02, 2},
{0x04, 4}, {0x06, 4}, {0x0C, 4}, {0x0E, 4},
{0x14, 6}, {0x16, 6}, {0x1C, 6}, {0x1E, 6},
{0x34, 6}, {0x36, 6}, {0x3C, 6}, {0x3E, 6},
{0x54, 8}, {0x56, 8}, {0x5C, 8}, {0x5E, 8},
{0x74, 8}, {0x76, 8}, {0x7C, 8}, {0x7E, 8},
{0xD4, 8}, {0xD6, 8}, {0xDC, 8}, {0xDE, 8},
{0xF4, 8}, {0xF6, 8}, {0xFC, 8}, {0xFE, 8},
{0x154, 10}, {0x156, 10}, {0x15C, 10}, {0x15E, 10},
{0x174, 10}, {0x176, 10}, {0x17C, 10}, {0x17E, 10},
{0x1D4, 10}, {0x1D6, 10}, {0x1DC, 10}, {0x1DE, 10},
{0x1F4, 10}, {0x1F6, 10}, {0x1FC, 10}, {0x1FE, 10},
{0x354, 10}, {0x356, 10}, {0x35C, 10}, {0x35E, 10},
{0x374, 10}, {0x376, 10}, {0x37C, 10}, {0x37E, 10},
{0x3D4, 10}, {0x3D6, 10}, {0x3DC, 10}, {0x3DE, 10},
{0x3F4, 10}, {0x3F6, 10}, {0x3FC, 10}, {0x3FE, 10},
{0x554, 12}, {0x556, 12}, {0x55C, 12}, {0x55E, 12},
{0x574, 12}, {0x576, 12}, {0x57C, 12}, {0x57E, 12},
{0x5D4, 12}, {0x5D6, 12}, {0x5DC, 12}, {0x5DE, 12},
{0x5F4, 12}, {0x5F6, 12}, {0x5FC, 12}, {0x5FE, 12},
{0x754, 12}, {0x756, 12}, {0x75C, 12}, {0x75E, 12},
{0x774, 12}, {0x776, 12}, {0x77C, 12}, {0x77E, 12},
{0x7D4, 12}, {0x7D6, 12}, {0x7DC, 12}, {0x7DE, 12},
{0x7F4, 12}, {0x7F6, 12}, {0x7FC, 12}, {0x7FE, 12},
{0xD54, 12}, {0xD56, 12}, {0xD5C, 12}, {0xD5E, 12},
{0xD74, 12}, {0xD76, 12}, {0xD7C, 12}, {0xD7E, 12},
{0xDD4, 12}, {0xDD6, 12}, {0xDDC, 12}, {0xDDE, 12},
{0xDF4, 12}, {0xDF6, 12}, {0xDFC, 12}, {0xDFE, 12},
{0xF54, 12}, {0xF56, 12}, {0xF5C, 12}, {0xF5E, 12},
{0xF74, 12}, {0xF76, 12}, {0xF7C, 12}, {0xF7E, 12},
{0xFD4, 12}, {0xFD6, 12}, {0xFDC, 12}, {0xFDE, 12},
{0xFF4, 12}, {0xFF6, 12}, {0xFFC, 12}, {0xFFE, 12},
{0x1554, 14}, {0x1556, 14}, {0x155C, 14}, {0x155E, 14},
{0x1574, 14}, {0x1576, 14}, {0x157C, 14}, {0x157E, 14},
{0x15D4, 14}, {0x15D6, 14}, {0x15DC, 14}, {0x15DE, 14},
{0x15F4, 14}, {0x15F6, 14}, {0x15FC, 14}, {0x15FE, 14},
{0x1754, 14}, {0x1756, 14}, {0x175C, 14}, {0x175E, 14},
{0x1774, 14}, {0x1776, 14}, {0x177C, 14}, {0x177E, 14},
{0x17D4, 14}, {0x17D6, 14}, {0x17DC, 14}, {0x17DE, 14},
{0x17F4, 14}, {0x17F6, 14}, {0x17FC, 14}, {0x17FE, 14},
{0x1D54, 14}, {0x1D56, 14}, {0x1D5C, 14}, {0x1D5E, 14},
{0x1D74, 14}, {0x1D76, 14}, {0x1D7C, 14}, {0x1D7E, 14},
{0x1DD4, 14}, {0x1DD6, 14}, {0x1DDC, 14}, {0x1DDE, 14},
{0x1DF4, 14}, {0x1DF6, 14}, {0x1DFC, 14}, {0x1DFE, 14},
{0x1F54, 14}, {0x1F56, 14}, {0x1F5C, 14}, {0x1F5E, 14},
{0x1F74, 14}, {0x1F76, 14}, {0x1F7C, 14}, {0x1F7E, 14},
{0x1FD4, 14}, {0x1FD6, 14}, {0x1FDC, 14}, {0x1FDE, 14},
{0x1FF4, 14}, {0x1FF6, 14}, {0x1FFC, 14}, {0x1FFE, 14},
{0x3554, 14}, {0x3556, 14}, {0x355C, 14}, {0x355E, 14},
{0x3574, 14}, {0x3576, 14}, {0x357C, 14}, {0x357E, 14},
{0x35D4, 14}, {0x35D6, 14}, {0x35DC, 14}, {0x35DE, 14},
{0x35F4, 14}, {0x35F6, 14}, {0x35FC, 14}, {0x35FE, 14},
{0x3754, 14}, {0x3756, 14}, {0x375C, 14}, {0x375E, 14},
{0x3774, 14}, {0x3776, 14}, {0x377C, 14}, {0x377E, 14},
{0x37D4, 14}, {0x37D6, 14}, {0x37DC, 14}, {0x37DE, 14},
{0x37F4, 14}, {0x37F6, 14}, {0x37FC, 14}, {0x37FE, 14},
{0x3D54, 14}, {0x3D56, 14}, {0x3D5C, 14}, {0x3D5E, 14},
{0x3D74, 14}, {0x3D76, 14}, {0x3D7C, 14}, {0x3D7E, 14},
{0x3DD4, 14}, {0x3DD6, 14}, {0x3DDC, 14}, {0x3DDE, 14},
{0x3DF4, 14}, {0x3DF6, 14}, {0x3DFC, 14}, {0x3DFE, 14},
{0x3F54, 14}, {0x3F56, 14}, {0x3F5C, 14}, {0x3F5E, 14},
{0x3F74, 14}, {0x3F76, 14}, {0x3F7C, 14}, {0x3F7E, 14},
{0x3FD4, 14}, {0x3FD6, 14}, {0x3FDC, 14}, {0x3FDE, 14},
{0x3FF4, 14}, {0x3FF6, 14}, {0x3FFC, 14}, {0x3FFE, 14},
{0x5554, 16}, {0x5556, 16}, {0x555C, 16}, {0x555E, 16},
{0x5574, 16}, {0x5576, 16}, {0x557C, 16}, {0x557E, 16},
{0x55D4, 16}, {0x55D6, 16}, {0x55DC, 16}, {0x55DE, 16},
{0x55F4, 16}, {0x55F6, 16}, {0x55FC, 16}, {0x55FE, 16},
{0x5754, 16}, {0x5756, 16}, {0x575C, 16}, {0x575E, 16},
{0x5774, 16}, {0x5776, 16}, {0x577C, 16}, {0x577E, 16},
{0x57D4, 16}, {0x57D6, 16}, {0x57DC, 16}, {0x57DE, 16},
{0x57F4, 16}, {0x57F6, 16}, {0x57FC, 16}, {0x57FE, 16},
{0x5D54, 16}, {0x5D56, 16}, {0x5D5C, 16}, {0x5D5E, 16},
{0x5D74, 16}, {0x5D76, 16}, {0x5D7C, 16}, {0x5D7E, 16},
{0x5DD4, 16}, {0x5DD6, 16}, {0x5DDC, 16}, {0x5DDE, 16},
{0x5DF4, 16}, {0x5DF6, 16}, {0x5DFC, 16}, {0x5DFE, 16},
{0x5F54, 16}, {0x5F56, 16}, {0x5F5C, 16}, {0x5F5E, 16},
{0x5F74, 16}, {0x5F76, 16}, {0x5F7C, 16}, {0x5F7E, 16},
{0x5FD4, 16}, {0x5FD6, 16}, {0x5FDC, 16}, {0x5FDE, 16},
{0x5FF4, 16}, {0x5FF6, 16}, {0x5FFC, 16}, {0x5FFE, 16},
{0x7554, 16}, {0x7556, 16}, {0x755C, 16}, {0x755E, 16},
{0x7574, 16}, {0x7576, 16}, {0x757C, 16}, {0x757E, 16},
{0x75D4, 16}, {0x75D6, 16}, {0x75DC, 16}, {0x75DE, 16},
{0x75F4, 16}, {0x75F6, 16}, {0x75FC, 16}, {0x75FE, 16},
{0x7754, 16}, {0x7756, 16}, {0x775C, 16}, {0x775E, 16},
{0x7774, 16}, {0x7776, 16}, {0x777C, 16}, {0x777E, 16},
{0x77D4, 16}, {0x77D6, 16}, {0x77DC, 16}, {0x77DE, 16},
{0x77F4, 16}, {0x77F6, 16}, {0x77FC, 16}, {0x77FE, 16},
{0x7D54, 16}, {0x7D56, 16}, {0x7D5C, 16}, {0x7D5E, 16},
{0x7D74, 16}, {0x7D76, 16}, {0x7D7C, 16}, {0x7D7E, 16},
{0x7DD4, 16}, {0x7DD6, 16}, {0x7DDC, 16}, {0x7DDE, 16},
{0x7DF4, 16}, {0x7DF6, 16}, {0x7DFC, 16}, {0x7DFE, 16},
{0x7F54, 16}, {0x7F56, 16}, {0x7F5C, 16}, {0x7F5E, 16},
{0x7F74, 16}, {0x7F76, 16}, {0x7F7C, 16}, {0x7F7E, 16},
{0x7FD4, 16}, {0x7FD6, 16}, {0x7FDC, 16}, {0x7FDE, 16},
{0x7FF4, 16}, {0x7FF6, 16}, {0x7FFC, 16}, {0x7FFE, 16},
{0xD554, 16}, {0xD556, 16}, {0xD55C, 16}, {0xD55E, 16},
{0xD574, 16}, {0xD576, 16}, {0xD57C, 16}, {0xD57E, 16},
{0xD5D4, 16}, {0xD5D6, 16}, {0xD5DC, 16}, {0xD5DE, 16},
{0xD5F4, 16}, {0xD5F6, 16}, {0xD5FC, 16}, {0xD5FE, 16},
{0xD754, 16}, {0xD756, 16}, {0xD75C, 16}, {0xD75E, 16},
{0xD774, 16}, {0xD776, 16}, {0xD77C, 16}, {0xD77E, 16},
{0xD7D4, 16}, {0xD7D6, 16}, {0xD7DC, 16}, {0xD7DE, 16},
{0xD7F4, 16}, {0xD7F6, 16}, {0xD7FC, 16}, {0xD7FE, 16},
{0xDD54, 16}, {0xDD56, 16}, {0xDD5C, 16}, {0xDD5E, 16},
{0xDD74, 16}, {0xDD76, 16}, {0xDD7C, 16}, {0xDD7E, 16},
{0xDDD4, 16}, {0xDDD6, 16}, {0xDDDC, 16}, {0xDDDE, 16},
{0xDDF4, 16}, {0xDDF6, 16}, {0xDDFC, 16}, {0xDDFE, 16},
{0xDF54, 16}, {0xDF56, 16}, {0xDF5C, 16}, {0xDF5E, 16},
{0xDF74, 16}, {0xDF76, 16}, {0xDF7C, 16}, {0xDF7E, 16},
{0xDFD4, 16}, {0xDFD6, 16}, {0xDFDC, 16}, {0xDFDE, 16},
{0xDFF4, 16}, {0xDFF6, 16}, {0xDFFC, 16}, {0xDFFE, 16},
{0xF554, 16}, {0xF556, 16}, {0xF55C, 16}, {0xF55E, 16},
{0xF574, 16}, {0xF576, 16}, {0xF57C, 16}, {0xF57E, 16},
{0xF5D4, 16}, {0xF5D6, 16}, {0xF5DC, 16}, {0xF5DE, 16},
{0xF5F4, 16}, {0xF5F6, 16}, {0xF5FC, 16}, {0xF5FE, 16},
{0xF754, 16}, {0xF756, 16}, {0xF75C, 16}, {0xF75E, 16},
{0xF774, 16}, {0xF776, 16}, {0xF77C, 16}, {0xF77E, 16},
{0xF7D4, 16}, {0xF7D6, 16}, {0xF7DC, 16}, {0xF7DE, 16},
{0xF7F4, 16}, {0xF7F6, 16}, {0xF7FC, 16}, {0xF7FE, 16},
{0xFD54, 16}, {0xFD56, 16}, {0xFD5C, 16}, {0xFD5E, 16},
{0xFD74, 16}, {0xFD76, 16}, {0xFD7C, 16}, {0xFD7E, 16},
{0xFDD4, 16}, {0xFDD6, 16}, {0xFDDC, 16}, {0xFDDE, 16},
{0xFDF4, 16}, {0xFDF6, 16}, {0xFDFC, 16}, {0xFDFE, 16},
{0xFF54, 16}, {0xFF56, 16}, {0xFF5C, 16}, {0xFF5E, 16},
{0xFF74, 16}, {0xFF76, 16}, {0xFF7C, 16}, {0xFF7E, 16},
{0xFFD4, 16}, {0xFFD6, 16}, {0xFFDC, 16}, {0xFFDE, 16},
{0xFFF4, 16}, {0xFFF6, 16}, {0xFFFC, 16}, {0xFFFE, 16}
};
#endif
static int
blz_log2(unsigned long n)
{
assert(n > 0);
#if defined(BLZ_BUILTIN_MSVC)
unsigned long msb_pos;
_BitScanReverse(&msb_pos, n);
return (int) msb_pos;
#elif defined(BLZ_BUILTIN_GCC)
return (int) sizeof(n) * CHAR_BIT - 1 - __builtin_clzl(n);
#else
int bits = 0;
while (n >>= 1) {
++bits;
}
return bits;
#endif
}
static unsigned long
blz_gamma_cost(unsigned long n)
{
assert(n >= 2);
return 2 * (unsigned long) blz_log2(n);
}
static unsigned long
blz_match_cost(unsigned long pos, unsigned long len)
{
return 1 + blz_gamma_cost(len - 2) + blz_gamma_cost((pos >> 8) + 2) + 8;
}
// Heuristic to compare matches
static int
blz_match_better(unsigned long cur, unsigned long new_pos, unsigned long new_len,
unsigned long pos, unsigned long len)
{
const unsigned long offs = cur - pos - 1;
const unsigned long new_offs = cur - new_pos - 1;
return (new_len > len + 1)
|| (new_len >= len + 1 && new_offs / 8 <= offs);
}
// Heuristic to compare match with match at next position
static int
blz_next_match_better(unsigned long cur, unsigned long new_pos, unsigned long new_len,
unsigned long pos, unsigned long len)
{
const unsigned long offs = cur - pos - 1;
const unsigned long new_offs = cur + 1 - new_pos - 1;
return (new_len > len + 1 && new_offs / 8 < offs)
|| (new_len > len && new_offs < offs)
|| (new_len >= len && new_offs < offs / 4);
}
static void
blz_putbit(struct blz_state *bs, unsigned int bit)
{
// Check if tag is full
if (!bs->bits_left--) {
// Store tag
bs->tag_out[0] = bs->tag & 0x00FF;
bs->tag_out[1] = (bs->tag >> 8) & 0x00FF;
// Init next tag
bs->tag_out = bs->next_out;
bs->next_out += 2;
bs->bits_left = 15;
}
// Shift bit into tag
bs->tag = (bs->tag << 1) + bit;
}
static void
blz_putbits(struct blz_state *bs, unsigned long bits, int num)
{
assert(num >= 0 && num <= 16);
assert((bits & (~0UL << num)) == 0);
// Shift num bits into tag
unsigned long tag = ((unsigned long) bs->tag << num) | bits;
bs->tag = (unsigned int) tag;
// Check if tag is full
if (bs->bits_left < num) {
const unsigned int top16 = (unsigned int) (tag >> (num - bs->bits_left));
// Store tag
bs->tag_out[0] = top16 & 0x00FF;
bs->tag_out[1] = (top16 >> 8) & 0x00FF;
// Init next tag
bs->tag_out = bs->next_out;
bs->next_out += 2;
bs->bits_left += 16;
}
bs->bits_left -= num;
}
// Encode val using a universal code based on Elias gamma.
//
// This outputs each bit of val (after the leading one bit) as a pair where
// the first bit is the value, and the second is zero if this was the last
// pair, and one otherwise.
//
// 2 = 10 -> 00
// 3 = 11 -> 10
// 4 = 100 -> 01 00
// 5 = 101 -> 01 10
// 6 = 110 -> 11 00
// ...
//
// On modern hardware this variant is slower to decode because we cannot count
// the leading zeroes to get the number of value bits and then read them
// directly. However on constrained hardware, it has the advantage of being
// decodable using only one variable (register) and a tiny loop:
//
// result = 1;
// do { result = (result << 1) + getbit(); } while (getbit());
//
// Strictly speaking, this is order-1 exp-Golomb, where we interleave the
// value bits with the bits of the unary coding of the length, but I've always
// known it as the gamma2 code. I am not sure where it originated from, but I
// can see I used it in aPLib around 1998.
//
static void
blz_putgamma(struct blz_state *bs, unsigned long val)
{
assert(val >= 2);
#if !defined(BLZ_NO_LUT)
// Output small values using lookup
if (val < 512) {
const unsigned int bits = blz_gamma_lookup[val][0];
const unsigned int shift = blz_gamma_lookup[val][1];
blz_putbits(bs, bits, (int) shift);
return;
}
#endif
// Create a mask for the second-highest bit of val
#if defined(BLZ_BUILTIN_MSVC)
unsigned long msb_pos;
_BitScanReverse(&msb_pos, val);
unsigned long mask = 1UL << (msb_pos - 1);
#elif defined(BLZ_BUILTIN_GCC)
unsigned long mask = 1UL << ((int) sizeof(val) * CHAR_BIT - 2 - __builtin_clzl(val));
#else
unsigned long mask = val >> 1;
// Clear bits except highest
while (mask & (mask - 1)) {
mask &= mask - 1;
}
#endif
// Output gamma2-encoded bits
blz_putbit(bs, (val & mask) ? 1 : 0);
while (mask >>= 1) {
blz_putbit(bs, 1);
blz_putbit(bs, (val & mask) ? 1 : 0);
}
blz_putbit(bs, 0);
}
static unsigned char*
blz_finalize(struct blz_state *bs)
{
// Trailing one bit to delimit any literal tags
blz_putbit(bs, 1);
// Shift last tag into position and store
bs->tag <<= bs->bits_left;
bs->tag_out[0] = bs->tag & 0x00FF;
bs->tag_out[1] = (bs->tag >> 8) & 0x00FF;
// Return pointer one past end of output
return bs->next_out;
}
// Hash four bytes starting a p.
//
// This is Fibonacci hashing, also known as Knuth's multiplicative hash. The
// constant is a prime close to 2^32/phi.
//
static unsigned long
blz_hash4_bits(const unsigned char *p, int bits)
{
assert(bits > 0 && bits <= 32);
uint32_t val = (uint32_t) p[0]
| ((uint32_t) p[1] << 8)
| ((uint32_t) p[2] << 16)
| ((uint32_t) p[3] << 24);
return (val * UINT32_C(2654435761)) >> (32 - bits);
}
static unsigned long
blz_hash4(const unsigned char *p)
{
return blz_hash4_bits(p, BLZ_HASH_BITS);
}
size_t
blz_max_packed_size(size_t src_size)
{
return src_size + src_size / 8 + 64;
}
size_t
blz_workmem_size(size_t src_size)
{
(void) src_size;
return LOOKUP_SIZE * sizeof(blz_word);
}
// Simple LZSS using hashing.
//
// The lookup table stores the previous position in the input that had a given
// hash value, or NO_MATCH_POS if none.
//
unsigned long
blz_pack(const void *src, void *dst, unsigned long src_size, void *workmem)
{
struct blz_state bs;
blz_word *const lookup = (blz_word *) workmem;
const unsigned char *const in = (const unsigned char *) src;
const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
unsigned long hash_pos = 0;
unsigned long cur = 0;
assert(src_size < BLZ_WORD_MAX);
// Check for empty input
if (src_size == 0) {
return 0;
}
bs.next_out = (unsigned char *) dst;
// First byte verbatim
*bs.next_out++ = in[0];
// Check for 1 byte input
if (src_size == 1) {
return 1;
}
// Initialize first tag
bs.tag_out = bs.next_out;
bs.next_out += 2;
bs.tag = 0;
bs.bits_left = 16;
// Initialize lookup
for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
lookup[i] = NO_MATCH_POS;
}
// Main compression loop
for (cur = 1; cur <= last_match_pos; ) {
// Update lookup up to current position
while (hash_pos < cur) {
lookup[blz_hash4(&in[hash_pos])] = hash_pos;
hash_pos++;
}
// Look up match for current position
const unsigned long pos = lookup[blz_hash4(&in[cur])];
unsigned long len = 0;
// Check match
if (pos != NO_MATCH_POS) {
const unsigned long len_limit = src_size - cur;
while (len < len_limit
&& in[pos + len] == in[cur + len]) {
++len;
}
}
// Output match or literal
//
// When offs >= 0x1FFE00, encoding a match of length 4
// (37 bits) is longer than encoding 4 literals (36 bits).
//
// The value 0x7E00 is a heuristic that sacrifices some
// length 4 matches in the hope that there will be a better
// match at the next position.
if (len > 4 || (len == 4 && cur - pos - 1 < 0x7E00UL)) {
const unsigned long offs = cur - pos - 1;
// Output match tag
blz_putbit(&bs, 1);
// Output match length
blz_putgamma(&bs, len - 2);
// Output match offset
blz_putgamma(&bs, (offs >> 8) + 2);
*bs.next_out++ = offs & 0x00FF;
cur += len;
}
else {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[cur++];
}
}
// Output any remaining literals
while (cur < src_size) {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[cur++];
}
// Trailing one bit to delimit any literal tags
blz_putbit(&bs, 1);
// Shift last tag into position and store
bs.tag <<= bs.bits_left;
bs.tag_out[0] = bs.tag & 0x00FF;
bs.tag_out[1] = (bs.tag >> 8) & 0x00FF;
// Return compressed size
return (unsigned long) (bs.next_out - (unsigned char *) dst);
}
// Include compression algorithms used by blz_pack_level
#include "brieflz_btparse.h"
#include "brieflz_hashbucket.h"
#include "brieflz_lazy.h"
#include "brieflz_leparse.h"
size_t
blz_workmem_size_level(size_t src_size, int level)
{
switch (level) {
case 1:
return blz_workmem_size(src_size);
case 2:
return blz_lazy_workmem_size(src_size);
case 3:
return blz_hashbucket_workmem_size(src_size, 2);
case 4:
return blz_hashbucket_workmem_size(src_size, 4);
case 5:
case 6:
case 7:
return blz_leparse_workmem_size(src_size);
case 8:
case 9:
case 10:
return blz_btparse_workmem_size(src_size);
default:
return (size_t) -1;
}
}
unsigned long
blz_pack_level(const void *src, void *dst, unsigned long src_size,
void *workmem, int level)
{
switch (level) {
case 1:
return blz_pack(src, dst, src_size, workmem);
case 2:
return blz_pack_lazy(src, dst, src_size, workmem);
case 3:
return blz_pack_hashbucket(src, dst, src_size, workmem, 2, 16);
case 4:
return blz_pack_hashbucket(src, dst, src_size, workmem, 4, 16);
case 5:
return blz_pack_leparse(src, dst, src_size, workmem, 1, 16);
case 6:
return blz_pack_leparse(src, dst, src_size, workmem, 8, 32);
case 7:
return blz_pack_leparse(src, dst, src_size, workmem, 64, 64);
case 8:
return blz_pack_btparse(src, dst, src_size, workmem, 16, 96);
case 9:
return blz_pack_btparse(src, dst, src_size, workmem, 32, 224);
case 10:
return blz_pack_btparse(src, dst, src_size, workmem, ULONG_MAX, ULONG_MAX);
default:
return BLZ_ERROR;
}
}
// clang -g -O1 -fsanitize=fuzzer,address -DBLZ_FUZZING brieflz.c depack.c
#if defined(BLZ_FUZZING)
#include <limits.h>
#include <stddef.h>
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#ifndef BLZ_FUZZ_LEVEL
# define BLZ_FUZZ_LEVEL 1
#endif
extern int
LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
{
if (size > ULONG_MAX / 2) { return 0; }
void *workmem = malloc(blz_workmem_size_level(size, BLZ_FUZZ_LEVEL));
void *packed = malloc(blz_max_packed_size(size));
void *depacked = malloc(size);
if (!workmem || !packed || !depacked) { abort(); }
unsigned long packed_size = blz_pack_level(data, packed, size, workmem, BLZ_FUZZ_LEVEL);
blz_depack(packed, depacked, size);
if (memcmp(data, depacked, size)) { abort(); }
free(depacked);
free(packed);
free(workmem);
return 0;
}
#endif

View File

@@ -0,0 +1,183 @@
/*
* BriefLZ - small fast Lempel-Ziv
*
* C/C++ header file
*
* Copyright (c) 2002-2020 Joergen Ibsen
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must
* not claim that you wrote the original software. If you use this
* software in a product, an acknowledgment in the product
* documentation would be appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must
* not be misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source
* distribution.
*/
#ifndef BRIEFLZ_H_INCLUDED
#define BRIEFLZ_H_INCLUDED
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
#define BLZ_VER_MAJOR 1 /**< Major version number */
#define BLZ_VER_MINOR 3 /**< Minor version number */
#define BLZ_VER_PATCH 0 /**< Patch version number */
#define BLZ_VER_STRING "1.3.0" /**< Version number as a string */
#define BLZ_NO_LUT
#ifdef BLZ_DLL
# if defined(_WIN32) || defined(__CYGWIN__)
# ifdef BLZ_DLL_EXPORTS
# define BLZ_API __declspec(dllexport)
# else
# define BLZ_API __declspec(dllimport)
# endif
# define BLZ_LOCAL
# else
# if __GNUC__ >= 4
# define BLZ_API __attribute__ ((visibility ("default")))
# define BLZ_LOCAL __attribute__ ((visibility ("hidden")))
# else
# define BLZ_API
# define BLZ_LOCAL
# endif
# endif
#else
# define BLZ_API
# define BLZ_LOCAL
#endif
/**
* Return value on error.
*
* @see blz_depack_safe
*/
#ifndef BLZ_ERROR
# define BLZ_ERROR ((unsigned long) (-1))
#endif
/**
* Get bound on compressed data size.
*
* @see blz_pack
*
* @param src_size number of bytes to compress
* @return maximum size of compressed data
*/
BLZ_API size_t
blz_max_packed_size(size_t src_size);
/**
* Get required size of `workmem` buffer.
*
* @see blz_pack
*
* @param src_size number of bytes to compress
* @return required size in bytes of `workmem` buffer
*/
BLZ_API size_t
blz_workmem_size(size_t src_size);
/**
* Compress `src_size` bytes of data from `src` to `dst`.
*
* @param src pointer to data
* @param dst pointer to where to place compressed data
* @param src_size number of bytes to compress
* @param workmem pointer to memory for temporary use
* @return size of compressed data
*/
BLZ_API unsigned long
blz_pack(const void *src, void *dst, unsigned long src_size, void *workmem);
/**
* Get required size of `workmem` buffer.
*
* @see blz_pack_level
*
* @param src_size number of bytes to compress
* @param level compression level
* @return required size in bytes of `workmem` buffer
*/
BLZ_API size_t
blz_workmem_size_level(size_t src_size, int level);
/**
* Compress `src_size` bytes of data from `src` to `dst`.
*
* Compression levels between 1 and 9 offer a trade-off between
* time/space and ratio. Level 10 is optimal but very slow.
*
* @param src pointer to data
* @param dst pointer to where to place compressed data
* @param src_size number of bytes to compress
* @param workmem pointer to memory for temporary use
* @param level compression level
* @return size of compressed data
*/
BLZ_API unsigned long
blz_pack_level(const void *src, void *dst, unsigned long src_size,
void *workmem, int level);
/**
* Decompress `depacked_size` bytes of data from `src` to `dst`.
*
* @param src pointer to compressed data
* @param dst pointer to where to place decompressed data
* @param depacked_size size of decompressed data
* @return size of decompressed data
*/
BLZ_API unsigned long
blz_depack(const void *src, void *dst, unsigned long depacked_size);
/**
* Decompress `src_size` bytes of data from `src` to `dst`.
*
* This function is unsafe. If the provided data is malformed, it may
* read more than `src_size` from the `src` buffer.
*
* @param src pointer to compressed data
* @param dst pointer to where to place decompressed data
* @param src_size size of the compressed data
* @return size of decompressed data
*/
BLZ_API unsigned long
blz_depack_srcsize(const void *src, void *dst, unsigned long src_size);
/**
* Decompress `depacked_size` bytes of data from `src` to `dst`.
*
* Reads at most `src_size` bytes from `src`.
* Writes at most `depacked_size` bytes to `dst`.
*
* @param src pointer to compressed data
* @param src_size size of compressed data
* @param dst pointer to where to place decompressed data
* @param depacked_size size of decompressed data
* @return size of decompressed data, `BLZ_ERROR` on error
*/
BLZ_API unsigned long
blz_depack_safe(const void *src, unsigned long src_size,
void *dst, unsigned long depacked_size);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif /* BRIEFLZ_H_INCLUDED */

View File

@@ -0,0 +1,332 @@
//
// BriefLZ - small fast Lempel-Ziv
//
// Forwards dynamic programming parse using binary trees
//
// Copyright (c) 2016-2020 Joergen Ibsen
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must
// not claim that you wrote the original software. If you use this
// software in a product, an acknowledgment in the product
// documentation would be appreciated but is not required.
//
// 2. Altered source versions must be plainly marked as such, and must
// not be misrepresented as being the original software.
//
// 3. This notice may not be removed or altered from any source
// distribution.
//
#ifndef BRIEFLZ_BTPARSE_H_INCLUDED
#define BRIEFLZ_BTPARSE_H_INCLUDED
static size_t
blz_btparse_workmem_size(size_t src_size)
{
return (5 * src_size + 3 + LOOKUP_SIZE) * sizeof(blz_word);
}
// Forwards dynamic programming parse using binary trees, checking all
// possible matches.
//
// The match search uses a binary tree for each hash entry, which is updated
// dynamically as it is searched by re-rooting the tree at the search string.
//
// This does not result in balanced trees on all inputs, but often works well
// in practice, and has the advantage that we get the matches in order from
// closest and back.
//
// A drawback is the memory requirement of 5 * src_size words, since we cannot
// overlap the arrays in a forwards parse.
//
// This match search method is found in LZMA by Igor Pavlov, libdeflate
// by Eric Biggers, and other libraries.
//
static unsigned long
blz_pack_btparse(const void *src, void *dst, unsigned long src_size, void *workmem,
const unsigned long max_depth, const unsigned long accept_len)
{
struct blz_state bs;
const unsigned char *const in = (const unsigned char *) src;
const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
assert(src_size < BLZ_WORD_MAX);
// Check for empty input
if (src_size == 0) {
return 0;
}
bs.next_out = (unsigned char *) dst;
// First byte verbatim
*bs.next_out++ = in[0];
// Check for 1 byte input
if (src_size == 1) {
return 1;
}
// Initialize first tag
bs.tag_out = bs.next_out;
bs.next_out += 2;
bs.tag = 0;
bs.bits_left = 16;
if (src_size < 4) {
for (unsigned long i = 1; i < src_size; ++i) {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[i];
}
// Return compressed size
return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
}
blz_word *const cost = (blz_word *) workmem;
blz_word *const mpos = cost + src_size + 1;
blz_word *const mlen = mpos + src_size + 1;
blz_word *const nodes = mlen + src_size + 1;
blz_word *const lookup = nodes + 2 * src_size;
// Initialize lookup
for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
lookup[i] = NO_MATCH_POS;
}
// Since we are not processing the first literal, update tree for
// position 0
lookup[blz_hash4(&in[0])] = 0;
nodes[0] = NO_MATCH_POS;
nodes[1] = NO_MATCH_POS;
// Initialize to all literals with infinite cost
for (unsigned long i = 0; i <= src_size; ++i) {
cost[i] = BLZ_WORD_MAX;
mlen[i] = 1;
}
cost[0] = 0;
cost[1] = 8;
// Next position where we are going to check matches
//
// This is used to skip matching while still updating the trees when
// we find a match that is accept_len or longer.
//
unsigned long next_match_cur = 1;
// Phase 1: Find lowest cost path arriving at each position
for (unsigned long cur = 1; cur <= last_match_pos; ++cur) {
// Adjust remaining costs to avoid overflow
if (cost[cur] > BLZ_WORD_MAX - 128) {
blz_word min_cost = BLZ_WORD_MAX;
for (unsigned long i = cur; i <= src_size; ++i) {
min_cost = cost[i] < min_cost ? cost[i] : min_cost;
}
for (unsigned long i = cur; i <= src_size; ++i) {
if (cost[i] != BLZ_WORD_MAX) {
cost[i] -= min_cost;
}
}
}
// Check literal
if (cost[cur + 1] > cost[cur] + 9) {
cost[cur + 1] = cost[cur] + 9;
mlen[cur + 1] = 1;
}
if (cur > next_match_cur) {
next_match_cur = cur;
}
unsigned long max_len = 3;
// Look up first match for current position
//
// pos is the current root of the tree of strings with this
// hash. We are going to re-root the tree so cur becomes the
// new root.
//
const unsigned long hash = blz_hash4(&in[cur]);
unsigned long pos = lookup[hash];
lookup[hash] = cur;
blz_word *lt_node = &nodes[2 * cur];
blz_word *gt_node = &nodes[2 * cur + 1];
unsigned long lt_len = 0;
unsigned long gt_len = 0;
assert(pos == NO_MATCH_POS || pos < cur);
// If we are checking matches, allow lengths up to end of
// input, otherwise compare only up to accept_len
const unsigned long len_limit = cur == next_match_cur ? src_size - cur
: accept_len < src_size - cur ? accept_len
: src_size - cur;
unsigned long num_chain = max_depth;
// Check matches
for (;;) {
// If at bottom of tree, mark leaf nodes
//
// In case we reached max_depth, this also prunes the
// subtree we have not searched yet and do not know
// where belongs.
//
if (pos == NO_MATCH_POS || num_chain-- == 0) {
*lt_node = NO_MATCH_POS;
*gt_node = NO_MATCH_POS;
break;
}
// The string at pos is lexicographically greater than
// a string that matched in the first lt_len positions,
// and less than a string that matched in the first
// gt_len positions, so it must match up to at least
// the minimum of these.
unsigned long len = lt_len < gt_len ? lt_len : gt_len;
// Find match len
while (len < len_limit && in[pos + len] == in[cur + len]) {
++len;
}
// Extend current match if possible
//
// Note that we are checking matches in order from the
// closest and back. This means for a match further
// away, the encoding of all lengths up to the current
// max length will always be longer or equal, so we need
// only consider the extension.
//
if (cur == next_match_cur && len > max_len) {
for (unsigned long i = max_len + 1; i <= len; ++i) {
unsigned long match_cost = blz_match_cost(cur - pos - 1, i);
assert(match_cost < BLZ_WORD_MAX - cost[cur]);
unsigned long cost_there = cost[cur] + match_cost;
if (cost_there < cost[cur + i]) {
cost[cur + i] = cost_there;
mpos[cur + i] = cur - pos - 1;
mlen[cur + i] = i;
}
}
max_len = len;
if (len >= accept_len) {
next_match_cur = cur + len;
}
}
// If we reach maximum match length, the string at pos
// is equal to cur, so we can assign the left and right
// subtrees.
//
// This removes pos from the tree, but we added cur
// which is equal and closer for future matches.
//
if (len >= accept_len || len == len_limit) {
*lt_node = nodes[2 * pos];
*gt_node = nodes[2 * pos + 1];
break;
}
// Go to previous match and restructure tree
//
// lt_node points to a node that is going to contain
// elements lexicographically less than cur (the search
// string).
//
// If the string at pos is less than cur, we set that
// lt_node to pos. We know that all elements in the
// left subtree are less than pos, and thus less than
// cur, so we point lt_node at the right subtree of
// pos and continue our search there.
//
// The equivalent applies to gt_node when the string at
// pos is greater than cur.
//
if (in[pos + len] < in[cur + len]) {
*lt_node = pos;
lt_node = &nodes[2 * pos + 1];
assert(*lt_node == NO_MATCH_POS || *lt_node < pos);
pos = *lt_node;
lt_len = len;
}
else {
*gt_node = pos;
gt_node = &nodes[2 * pos];
assert(*gt_node == NO_MATCH_POS || *gt_node < pos);
pos = *gt_node;
gt_len = len;
}
}
}
for (unsigned long cur = last_match_pos + 1; cur < src_size; ++cur) {
// Check literal
if (cost[cur + 1] > cost[cur] + 9) {
cost[cur + 1] = cost[cur] + 9;
mlen[cur + 1] = 1;
}
}
// Phase 2: Follow lowest cost path backwards gathering tokens
unsigned long next_token = src_size;
for (unsigned long cur = src_size; cur > 1; cur -= mlen[cur], --next_token) {
mlen[next_token] = mlen[cur];
mpos[next_token] = mpos[cur];
}
// Phase 3: Output tokens
unsigned long cur = 1;
for (unsigned long i = next_token + 1; i <= src_size; cur += mlen[i++]) {
if (mlen[i] == 1) {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[cur];
}
else {
const unsigned long offs = mpos[i];
// Output match tag
blz_putbit(&bs, 1);
// Output match length
blz_putgamma(&bs, mlen[i] - 2);
// Output match offset
blz_putgamma(&bs, (offs >> 8) + 2);
*bs.next_out++ = offs & 0x00FF;
}
}
// Return compressed size
return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
}
#endif /* BRIEFLZ_BTPARSE_H_INCLUDED */

View File

@@ -0,0 +1,262 @@
//
// BriefLZ - small fast Lempel-Ziv
//
// Lazy parsing with multiple previous positions per hash
//
// Copyright (c) 2016-2020 Joergen Ibsen
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must
// not claim that you wrote the original software. If you use this
// software in a product, an acknowledgment in the product
// documentation would be appreciated but is not required.
//
// 2. Altered source versions must be plainly marked as such, and must
// not be misrepresented as being the original software.
//
// 3. This notice may not be removed or altered from any source
// distribution.
//
#ifndef BRIEFLZ_HASHBUCKET_H_INCLUDED
#define BRIEFLZ_HASHBUCKET_H_INCLUDED
static size_t
blz_hashbucket_workmem_size(size_t src_size, unsigned int bucket_size)
{
(void) src_size;
assert(bucket_size > 0);
assert(sizeof(bucket_size) < sizeof(size_t)
|| bucket_size < SIZE_MAX / (LOOKUP_SIZE * sizeof(blz_word)));
return (LOOKUP_SIZE * bucket_size) * sizeof(blz_word);
}
// Lazy parsing with multiple previous positions per hash.
//
// Instead of storing only the previous position a given hash occured at,
// this stores the last bucket_size such positions in lookup. This means we
// can check each of these and choose the "best".
//
// There are multiple options for maintaining the entries of the buckets, we
// simply insert at the front to maintain the order of matches and avoid extra
// variables. This gives some overhead for moving elements, but as long as
// bucket_size is small and everything fits in a cache line it is pretty fast.
//
// If we find a match that is accept_len or longer, we stop searching.
//
static unsigned long
blz_pack_hashbucket(const void *src, void *dst, unsigned long src_size, void *workmem,
const unsigned int bucket_size, const unsigned long accept_len)
{
struct blz_state bs;
blz_word *const lookup = (blz_word *) workmem;
const unsigned char *const in = (const unsigned char *) src;
const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
unsigned long hash_pos = 0;
unsigned long cur = 0;
assert(src_size < BLZ_WORD_MAX);
// Check for empty input
if (src_size == 0) {
return 0;
}
bs.next_out = (unsigned char *) dst;
// First byte verbatim
*bs.next_out++ = in[0];
// Check for 1 byte input
if (src_size == 1) {
return 1;
}
// Initialize first tag
bs.tag_out = bs.next_out;
bs.next_out += 2;
bs.tag = 0;
bs.bits_left = 16;
assert(bucket_size > 0);
assert(sizeof(bucket_size) < sizeof(unsigned long)
|| bucket_size < ULONG_MAX / LOOKUP_SIZE);
// Initialize lookup
for (unsigned long i = 0; i < LOOKUP_SIZE * bucket_size; ++i) {
lookup[i] = NO_MATCH_POS;
}
// Main compression loop
for (cur = 1; cur <= last_match_pos; ) {
// Update lookup up to current position
while (hash_pos < cur) {
blz_word *const bucket = &lookup[blz_hash4(&in[hash_pos]) * bucket_size];
unsigned long next = hash_pos;
// Insert hash_pos at start of bucket
for (unsigned int i = 0; i < bucket_size; ++i) {
unsigned long tmp = bucket[i];
bucket[i] = next;
next = tmp;
}
hash_pos++;
}
unsigned long best_pos = NO_MATCH_POS;
unsigned long best_len = 0;
// Look up first match for current position
const blz_word *const bucket = &lookup[blz_hash4(&in[cur]) * bucket_size];
unsigned long pos = bucket[0];
unsigned int bucket_idx = 0;
const unsigned long len_limit = src_size - cur;
// Check matches
while (pos != NO_MATCH_POS) {
unsigned long len = 0;
// Check match
if (best_len < len_limit
&& in[pos + best_len] == in[cur + best_len]) {
while (len < len_limit && in[pos + len] == in[cur + len]) {
++len;
}
}
// Update best match
if (blz_match_better(cur, pos, len, best_pos, best_len)) {
best_pos = pos;
best_len = len;
if (best_len >= accept_len) {
break;
}
}
// Go to previous match
if (++bucket_idx == bucket_size) {
break;
}
pos = bucket[bucket_idx];
}
// Check if match at next position is better
if (best_len > 3 && best_len < accept_len && cur < last_match_pos) {
// Update lookup up to next position
{
blz_word *const next_bucket = &lookup[blz_hash4(&in[hash_pos]) * bucket_size];
unsigned long next = hash_pos;
// Insert hash_pos at start of bucket
for (unsigned int i = 0; i < bucket_size; ++i) {
unsigned long tmp = next_bucket[i];
next_bucket[i] = next;
next = tmp;
}
hash_pos++;
}
// Look up first match for next position
const blz_word *const next_bucket = &lookup[blz_hash4(&in[cur + 1]) * bucket_size];
unsigned long next_pos = next_bucket[0];
unsigned int next_bucket_idx = 0;
const unsigned long next_len_limit = src_size - (cur + 1);
// Check matches
while (next_pos != NO_MATCH_POS) {
unsigned long next_len = 0;
// Check match
if (best_len - 1 < next_len_limit
&& in[next_pos + best_len - 1] == in[cur + 1 + best_len - 1]) {
while (next_len < next_len_limit
&& in[next_pos + next_len] == in[cur + 1 + next_len]) {
++next_len;
}
}
if (next_len >= best_len) {
// Replace with next match if it extends backwards
if (next_pos > 0 && in[next_pos - 1] == in[cur]) {
if (blz_match_better(cur, next_pos - 1, next_len + 1, best_pos, best_len)) {
best_pos = next_pos - 1;
best_len = next_len + 1;
}
}
else {
// Drop current match if next match is better
if (blz_next_match_better(cur, next_pos, next_len, best_pos, best_len)) {
best_len = 0;
break;
}
}
}
// Go to previous match
if (++next_bucket_idx == bucket_size) {
break;
}
next_pos = next_bucket[next_bucket_idx];
}
}
// Output match or literal
if (best_len > 4 || (best_len == 4 && cur - best_pos - 1 < 0x3FE00UL)) {
const unsigned long offs = cur - best_pos - 1;
// Output match tag
blz_putbit(&bs, 1);
// Output match length
blz_putgamma(&bs, best_len - 2);
// Output match offset
blz_putgamma(&bs, (offs >> 8) + 2);
*bs.next_out++ = offs & 0x00FF;
cur += best_len;
}
else {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[cur++];
}
}
// Output any remaining literals
while (cur < src_size) {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[cur++];
}
// Trailing one bit to delimit any literal tags
blz_putbit(&bs, 1);
// Shift last tag into position and store
bs.tag <<= bs.bits_left;
bs.tag_out[0] = bs.tag & 0x00FF;
bs.tag_out[1] = (bs.tag >> 8) & 0x00FF;
// Return compressed size
return (unsigned long) (bs.next_out - (unsigned char *) dst);
}
#endif /* BRIEFLZ_HASHBUCKET_H_INCLUDED */

View File

@@ -0,0 +1,192 @@
//
// BriefLZ - small fast Lempel-Ziv
//
// Lazy (non-greedy) parsing with one-byte-lookahead
//
// Copyright (c) 2016-2020 Joergen Ibsen
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must
// not claim that you wrote the original software. If you use this
// software in a product, an acknowledgment in the product
// documentation would be appreciated but is not required.
//
// 2. Altered source versions must be plainly marked as such, and must
// not be misrepresented as being the original software.
//
// 3. This notice may not be removed or altered from any source
// distribution.
//
#ifndef BRIEFLZ_LAZY_H_INCLUDED
#define BRIEFLZ_LAZY_H_INCLUDED
static size_t
blz_lazy_workmem_size(size_t src_size)
{
(void) src_size;
return LOOKUP_SIZE * sizeof(blz_word);
}
// Lazy (non-greedy) parsing with one-byte-lookahead.
//
// Each time we find a match, we check if there is a better match at the next
// position, and if so encode a literal instead.
//
static unsigned long
blz_pack_lazy(const void *src, void *dst, unsigned long src_size, void *workmem)
{
struct blz_state bs;
blz_word *const lookup = (blz_word *) workmem;
const unsigned char *const in = (const unsigned char *) src;
const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
unsigned long hash_pos = 0;
unsigned long cur = 0;
assert(src_size < BLZ_WORD_MAX);
// Check for empty input
if (src_size == 0) {
return 0;
}
bs.next_out = (unsigned char *) dst;
// First byte verbatim
*bs.next_out++ = in[0];
// Check for 1 byte input
if (src_size == 1) {
return 1;
}
// Initialize first tag
bs.tag_out = bs.next_out;
bs.next_out += 2;
bs.tag = 0;
bs.bits_left = 16;
// Initialize lookup
for (unsigned long i = 0; i < LOOKUP_SIZE; ++i) {
lookup[i] = NO_MATCH_POS;
}
// Main compression loop
for (cur = 1; cur <= last_match_pos; ) {
// Update lookup up to current position
while (hash_pos < cur) {
lookup[blz_hash4(&in[hash_pos])] = hash_pos;
hash_pos++;
}
// Look up match for current position
unsigned long pos = lookup[blz_hash4(&in[cur])];
unsigned long len = 0;
// Check match
if (pos != NO_MATCH_POS) {
const unsigned long len_limit = src_size - cur;
while (len < len_limit
&& in[pos + len] == in[cur + len]) {
++len;
}
}
// Check if match at next position is better
if (len > 3 && cur < last_match_pos) {
// Update lookup up to next position
lookup[blz_hash4(&in[hash_pos])] = hash_pos;
hash_pos++;
// Look up match for next position
const unsigned long next_pos = lookup[blz_hash4(&in[cur + 1])];
unsigned long next_len = 0;
// Check match
if (next_pos != NO_MATCH_POS && next_pos != pos + 1) {
const unsigned long next_len_limit = src_size - (cur + 1);
// If last byte matches, so this has a chance to be a better match
if (len - 1 < next_len_limit
&& in[next_pos + len - 1] == in[cur + 1 + len - 1]) {
while (next_len < next_len_limit
&& in[next_pos + next_len] == in[cur + 1 + next_len]) {
++next_len;
}
}
}
if (next_len >= len) {
// Replace with next match if it extends backwards
if (next_pos > 0 && in[next_pos - 1] == in[cur]) {
if (blz_match_better(cur, next_pos - 1, next_len + 1, pos, len)) {
pos = next_pos - 1;
len = next_len + 1;
}
}
else {
// Drop current match if next match is better
if (blz_next_match_better(cur, next_pos, next_len, pos, len)) {
len = 0;
}
}
}
}
// Output match or literal
if (len > 4 || (len == 4 && cur - pos - 1 < 0x3FE00UL)) {
const unsigned long offs = cur - pos - 1;
// Output match tag
blz_putbit(&bs, 1);
// Output match length
blz_putgamma(&bs, len - 2);
// Output match offset
blz_putgamma(&bs, (offs >> 8) + 2);
*bs.next_out++ = offs & 0x00FF;
cur += len;
}
else {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[cur++];
}
}
// Output any remaining literals
while (cur < src_size) {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[cur++];
}
// Trailing one bit to delimit any literal tags
blz_putbit(&bs, 1);
// Shift last tag into position and store
bs.tag <<= bs.bits_left;
bs.tag_out[0] = bs.tag & 0x00FF;
bs.tag_out[1] = (bs.tag >> 8) & 0x00FF;
// Return compressed size
return (unsigned long) (bs.next_out - (unsigned char *) dst);
}
#endif /* BRIEFLZ_LAZY_H_INCLUDED */

View File

@@ -0,0 +1,256 @@
//
// BriefLZ - small fast Lempel-Ziv
//
// Backwards dynamic programming parse with left-extension of matches
//
// Copyright (c) 2016-2020 Joergen Ibsen
//
// This software is provided 'as-is', without any express or implied
// warranty. In no event will the authors be held liable for any damages
// arising from the use of this software.
//
// Permission is granted to anyone to use this software for any purpose,
// including commercial applications, and to alter it and redistribute it
// freely, subject to the following restrictions:
//
// 1. The origin of this software must not be misrepresented; you must
// not claim that you wrote the original software. If you use this
// software in a product, an acknowledgment in the product
// documentation would be appreciated but is not required.
//
// 2. Altered source versions must be plainly marked as such, and must
// not be misrepresented as being the original software.
//
// 3. This notice may not be removed or altered from any source
// distribution.
//
#ifndef BRIEFLZ_LEPARSE_H_INCLUDED
#define BRIEFLZ_LEPARSE_H_INCLUDED
static size_t
blz_leparse_workmem_size(size_t src_size)
{
return (LOOKUP_SIZE < 2 * src_size ? 3 * src_size : src_size + LOOKUP_SIZE)
* sizeof(blz_word);
}
// Backwards dynamic programming parse with left-extension of matches.
//
// Whenever we find a match that improves the cost at the current position,
// we try to extend this match to the left, and if possible we use that
// left-extension for each position to the left. Since we are processing
// the input from right to left, this matches repeated patterns without
// searching at each position.
//
// Essentially, this improves the worst case for the parsing at a small cost
// in ratio. The match finding is still O(n^2) in number of matches though,
// so may have to limit max_depth on larger block sizes.
//
// This is usually within a few percent of the "optimal" parse with the same
// parameters.
//
static unsigned long
blz_pack_leparse(const void *src, void *dst, unsigned long src_size, void *workmem,
const unsigned long max_depth, const unsigned long accept_len)
{
struct blz_state bs;
const unsigned char *const in = (const unsigned char *) src;
const unsigned long last_match_pos = src_size > 4 ? src_size - 4 : 0;
assert(src_size < BLZ_WORD_MAX);
// Check for empty input
if (src_size == 0) {
return 0;
}
bs.next_out = (unsigned char *) dst;
// First byte verbatim
*bs.next_out++ = in[0];
// Check for 1 byte input
if (src_size == 1) {
return 1;
}
// Initialize first tag
bs.tag_out = bs.next_out;
bs.next_out += 2;
bs.tag = 0;
bs.bits_left = 16;
if (src_size < 4) {
for (unsigned long i = 1; i < src_size; ++i) {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[i];
}
// Return compressed size
return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
}
// With a bit of careful ordering we can fit in 3 * src_size words.
//
// The idea is that the lookup is only used in the first phase to
// build the hash chains, so we overlap it with mpos and mlen.
// Also, since we are using prev from right to left in phase two,
// and that is the order we fill in cost, we can overlap these.
//
// One detail is that we actually use src_size + 1 elements of cost,
// but we put mpos after it, where we do not need the first element.
//
blz_word *const prev = (blz_word *) workmem;
blz_word *const mpos = prev + src_size;
blz_word *const mlen = mpos + src_size;
blz_word *const cost = prev;
blz_word *const lookup = mpos;
// Phase 1: Build hash chains
const int bits = 2 * src_size < LOOKUP_SIZE ? BLZ_HASH_BITS : blz_log2(src_size);
// Initialize lookup
for (unsigned long i = 0; i < (1UL << bits); ++i) {
lookup[i] = NO_MATCH_POS;
}
// Build hash chains in prev
if (last_match_pos > 0) {
for (unsigned long i = 0; i <= last_match_pos; ++i) {
const unsigned long hash = blz_hash4_bits(&in[i], bits);
prev[i] = lookup[hash];
lookup[hash] = i;
}
}
// Initialize last three positions as literals
mlen[src_size - 3] = 1;
mlen[src_size - 2] = 1;
mlen[src_size - 1] = 1;
cost[src_size - 3] = 27;
cost[src_size - 2] = 18;
cost[src_size - 1] = 9;
cost[src_size] = 0;
// Phase 2: Find lowest cost path from each position to end
for (unsigned long cur = last_match_pos; cur > 0; --cur) {
// Since we updated prev to the end in the first phase, we
// do not need to hash, but can simply look up the previous
// position directly.
unsigned long pos = prev[cur];
assert(pos == NO_MATCH_POS || pos < cur);
// Start with a literal
cost[cur] = cost[cur + 1] + 9;
mlen[cur] = 1;
unsigned long max_len = 3;
const unsigned long len_limit = src_size - cur;
unsigned long num_chain = max_depth;
// Go through the chain of prev matches
for (; pos != NO_MATCH_POS && num_chain--; pos = prev[pos]) {
unsigned long len = 0;
// If next byte matches, so this has a chance to be a longer match
if (max_len < len_limit && in[pos + max_len] == in[cur + max_len]) {
// Find match len
while (len < len_limit && in[pos + len] == in[cur + len]) {
++len;
}
}
// Extend current match if possible
//
// Note that we are checking matches in order from the
// closest and back. This means for a match further
// away, the encoding of all lengths up to the current
// max length will always be longer or equal, so we need
// only consider the extension.
if (len > max_len) {
unsigned long min_cost = ULONG_MAX;
unsigned long min_cost_len = 3;
// Find lowest cost match length
for (unsigned long i = max_len + 1; i <= len; ++i) {
unsigned long match_cost = blz_match_cost(cur - pos - 1, i);
assert(match_cost < BLZ_WORD_MAX - cost[cur + i]);
unsigned long cost_here = match_cost + cost[cur + i];
if (cost_here < min_cost) {
min_cost = cost_here;
min_cost_len = i;
}
}
max_len = len;
// Update cost if cheaper
if (min_cost < cost[cur]) {
cost[cur] = min_cost;
mpos[cur] = pos;
mlen[cur] = min_cost_len;
// Left-extend current match if possible
if (pos > 0 && in[pos - 1] == in[cur - 1]) {
do {
--cur;
--pos;
++min_cost_len;
unsigned long match_cost = blz_match_cost(cur - pos - 1, min_cost_len);
assert(match_cost < BLZ_WORD_MAX - cost[cur + min_cost_len]);
unsigned long cost_here = match_cost + cost[cur + min_cost_len];
cost[cur] = cost_here;
mpos[cur] = pos;
mlen[cur] = min_cost_len;
} while (pos > 0 && in[pos - 1] == in[cur - 1]);
break;
}
}
}
if (len >= accept_len || len == len_limit) {
break;
}
}
}
mpos[0] = 0;
mlen[0] = 1;
// Phase 3: Output compressed data, following lowest cost path
for (unsigned long i = 1; i < src_size; i += mlen[i]) {
if (mlen[i] == 1) {
// Output literal tag
blz_putbit(&bs, 0);
// Copy literal
*bs.next_out++ = in[i];
}
else {
const unsigned long offs = i - mpos[i] - 1;
// Output match tag
blz_putbit(&bs, 1);
// Output match length
blz_putgamma(&bs, mlen[i] - 2);
// Output match offset
blz_putgamma(&bs, (offs >> 8) + 2);
*bs.next_out++ = offs & 0x00FF;
}
}
// Return compressed size
return (unsigned long) (blz_finalize(&bs) - (unsigned char *) dst);
}
#endif /* BRIEFLZ_LEPARSE_H_INCLUDED */

View File

@@ -0,0 +1,271 @@
/*
* BriefLZ - small fast Lempel-Ziv
*
* C depacker
*
* Copyright (c) 2002-2018 Joergen Ibsen
*
* This software is provided 'as-is', without any express or implied
* warranty. In no event will the authors be held liable for any damages
* arising from the use of this software.
*
* Permission is granted to anyone to use this software for any purpose,
* including commercial applications, and to alter it and redistribute it
* freely, subject to the following restrictions:
*
* 1. The origin of this software must not be misrepresented; you must
* not claim that you wrote the original software. If you use this
* software in a product, an acknowledgment in the product
* documentation would be appreciated but is not required.
*
* 2. Altered source versions must be plainly marked as such, and must
* not be misrepresented as being the original software.
*
* 3. This notice may not be removed or altered from any source
* distribution.
*/
#include "brieflz.h"
/* Internal data structure */
struct blz_state {
const unsigned char *src;
unsigned char *dst;
unsigned int tag;
int bits_left;
};
#if !defined(BLZ_NO_LUT)
static const unsigned char blz_gamma_lookup[256][2] = {
/* 00xxxxxx = 2 */
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
{2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2}, {2, 2},
/* 0100xxxx = 4 */
{4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4},
{4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4}, {4, 4},
/* 010100xx = 8 */
{8, 6}, {8, 6}, {8, 6}, {8, 6},
/* 01010100 = 16 01010101 = 16+ 01010110 = 17 01010111 = 17+ */
{16, 8}, {16, 0}, {17, 8}, {17, 0},
/* 010110xx = 9 */
{9, 6}, {9, 6}, {9, 6}, {9, 6},
/* 01011100 = 18 01011101 = 18+ 01011110 = 19 01011111 = 19+ */
{18, 8}, {18, 0}, {19, 8}, {19, 0},
/* 0110xxxx = 5 */
{5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4},
{5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4}, {5, 4},
/* 011100xx = 10 */
{10, 6}, {10, 6}, {10, 6}, {10, 6},
/* 01110100 = 20 01110101 = 20+ 01110110 = 21 01110111 = 21+ */
{20, 8}, {20, 0}, {21, 8}, {21, 0},
/* 011110xx = 11 */
{11, 6}, {11, 6}, {11, 6}, {11, 6},
/* 01111100 = 22 01111101 = 22+ 01111110 = 23 01111111 = 23+ */
{22, 8}, {22, 0}, {23, 8}, {23, 0},
/* 10xxxxxx = 3 */
{3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
{3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
{3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
{3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
{3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
{3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
{3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
{3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2}, {3, 2},
/* 1100xxxx = 6 */
{6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4},
{6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4}, {6, 4},
/* 110100xx = 12 */
{12, 6}, {12, 6}, {12, 6}, {12, 6},
/* 11010100 = 24 11010101 = 24+ 11010110 = 25 11010111 = 25+ */
{24, 8}, {24, 0}, {25, 8}, {25, 0},
/* 110110xx = 13 */
{13, 6}, {13, 6}, {13, 6}, {13, 6},
/* 11011100 = 26 11011101 = 26+ 11011110 = 27 11011111 = 27+ */
{26, 8}, {26, 0}, {27, 8}, {27, 0},
/* 1110xxxx = 7 */
{7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4},
{7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4}, {7, 4},
/* 111100xx = 14 */
{14, 6}, {14, 6}, {14, 6}, {14, 6},
/* 11110100 = 28 11110101 = 28+ 11110110 = 29 11110111 = 29+ */
{28, 8}, {28, 0}, {29, 8}, {29, 0},
/* 111110xx = 15 */
{15, 6}, {15, 6}, {15, 6}, {15, 6},
/* 11111100 = 30 11111101 = 30+ 11111110 = 31 11111111 = 31+ */
{30, 8}, {30, 0}, {31, 8}, {31, 0}
};
#endif
static unsigned int
blz_getbit(struct blz_state *bs)
{
unsigned int bit;
/* Check if tag is empty */
if (!bs->bits_left--) {
/* Load next tag */
bs->tag = (unsigned int) bs->src[0]
| ((unsigned int) bs->src[1] << 8);
bs->src += 2;
bs->bits_left = 15;
}
/* Shift bit out of tag */
bit = (bs->tag & 0x8000) ? 1 : 0;
bs->tag <<= 1;
return bit;
}
static unsigned long
blz_getgamma(struct blz_state *bs)
{
unsigned long result = 1;
#if !defined(BLZ_NO_LUT)
/* Decode up to 8 bits of gamma2 code using lookup if possible */
if (bs->bits_left >= 8) {
unsigned int top8 = (bs->tag >> 8) & 0x00FF;
int shift;
result = blz_gamma_lookup[top8][0];
shift = (int) blz_gamma_lookup[top8][1];
if (shift) {
bs->tag <<= shift;
bs->bits_left -= shift;
return result;
}
bs->tag <<= 8;
bs->bits_left -= 8;
}
#endif
/* Input gamma2-encoded bits */
do {
result = (result << 1) + blz_getbit(bs);
} while (blz_getbit(bs));
return result;
}
unsigned long
blz_depack(const void *src, void *dst, unsigned long depacked_size)
{
struct blz_state bs;
unsigned long dst_size = 0;
bs.src = (const unsigned char *) src;
bs.dst = (unsigned char *) dst;
/* Initialise to one bit left in tag; that bit is zero (a literal) */
bs.bits_left = 1;
bs.tag = 0x4000;
/* Main decompression loop */
while (dst_size < depacked_size) {
if (blz_getbit(&bs)) {
/* Input match length and offset */
unsigned long len = blz_getgamma(&bs) + 2;
unsigned long off = blz_getgamma(&bs) - 2;
off = (off << 8) + (unsigned long) *bs.src++ + 1;
/* Copy match */
{
const unsigned char *p = bs.dst - off;
unsigned long i;
for (i = len; i > 0; --i) {
*bs.dst++ = *p++;
}
}
dst_size += len;
}
else {
/* Copy literal */
*bs.dst++ = *bs.src++;
dst_size++;
}
}
/* Return decompressed size */
return dst_size;
}
unsigned long
blz_depack_srcsize(const void *src, void *dst, unsigned long src_size)
{
struct blz_state bs;
unsigned long dst_size = 0;
const unsigned char *src_end = src + src_size;
bs.src = (const unsigned char *) src;
bs.dst = (unsigned char *) dst;
/* Initialise to one bit left in tag; that bit is zero (a literal) */
bs.bits_left = 1;
bs.tag = 0x4000;
/* Main decompression loop */
while (bs.src < src_end) {
if (blz_getbit(&bs)) {
/* Input match length and offset */
unsigned long len = blz_getgamma(&bs) + 2;
unsigned long off = blz_getgamma(&bs) - 2;
off = (off << 8) + (unsigned long) *bs.src++ + 1;
/* Copy match */
{
const unsigned char *p = bs.dst - off;
unsigned long i;
for (i = len; i > 0; --i) {
*bs.dst++ = *p++;
}
}
dst_size += len;
}
else {
/* Copy literal */
*bs.dst++ = *bs.src++;
dst_size++;
}
}
/* Return decompressed size */
return dst_size;
}

View File

@@ -1,79 +0,0 @@
This directory contains file originally by other people.
## Simplified LZFX-based compression library
- `lzfx.c`
- `lzfx.h`
The above files are obtained from https://github.com/janding/lzfx (commit
448017f). It is a fork by Jan Ding (GitHub user "janding") based on the LZFX
compression library by Andrew Collette.
### License:
```
LZFX is copyright (c) 2009 Andrew Collette and subject to the BSD license
(below). Original LZF copyright statement follows.
Copyright (c) 2000-2007 Marc Alexander Lehmann <schmorp@schmorp.de>
Redistribution and use in source and binary forms, with or without modifica-
tion, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
OF THE POSSIBILITY OF SUCH DAMAGE.
```
## lzfx-boot
- `lzfx-host-compress.c` (original: `lzfx-raw.c`)
The above file is obtained from https://github.com/janding/lzfx-boot (commit
88b1596).
### License:
```
BSD 2-Clause License
Copyright (c) 2017, Jan Ding
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
* Redistributions of source code must retain the above copyright notice, this
list of conditions and the following disclaimer.
* Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
```

View File

@@ -1,357 +0,0 @@
/*
* Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
* http://lzfx.googlecode.com
*
* Implements an LZF-compatible compressor/decompressor based on the liblzf
* codebase written by Marc Lehmann. This code is released under the BSD
* license. License and original copyright statement follow.
*
*
* Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#include "lzfx.h"
#define LZFX_HSIZE (1 << (LZFX_HLOG))
/* We need this for memset */
#ifdef __cplusplus
#include <cstring>
#else
#include <string.h>
#endif
#if __GNUC__ >= 3
#define fx_expect_false(expr) __builtin_expect((expr) != 0, 0)
#define fx_expect_true(expr) __builtin_expect((expr) != 0, 1)
#else
#define fx_expect_false(expr) (expr)
#define fx_expect_true(expr) (expr)
#endif
typedef unsigned char u8;
typedef const u8 * LZSTATE[LZFX_HSIZE];
/* Define the hash function */
#define LZFX_FRST(p) (((p[0]) << 8) | p[1])
#define LZFX_NEXT(v, p) (((v) << 8) | p[2])
#define LZFX_IDX(h) (((h >> (3 * 8 - LZFX_HLOG)) - h) & (LZFX_HSIZE - 1))
/* These cannot be changed, as they are related to the compressed format. */
#define LZFX_MAX_LIT (1 << 5) - 1
#define LZFX_MAX_OFF (1 << 13)
#define LZFX_MAX_REF ((1 << 8) + (1 << 3) - 2)
static int lzfx_getsize(const void *ibuf, unsigned int ilen, unsigned int *olen);
/* Compressed format
There are two kinds of structures in LZF/LZFX: literal runs and back
references. Literals are encoded as follows:
LLLLL000 <L bytes>
Back references are encoded as follows. The smallest possible encoded
length value is 1, as otherwise the control byte would be recognized as
a literal run. At least three bytes must match for a back reference
to be inserted. The offset (distance to the desired data in the output
buffer) is encoded as o - 1, as all offsets are at least 1. The binary
format is:
oooooLLL oooooooo for backrefs of real length < 7 (1 <= L < 7)
ooooo111 LLLLLLLL oooooooo for backrefs of real length >= 7 (L >= 7)
*/
int lzfx_compress(const void *const ibuf, const unsigned int ilen, void *obuf, unsigned int *const olen) {
/* Hash table; an array of u8*'s which point
to various locations in the input buffer */
const u8 *htab[LZFX_HSIZE];
const u8 ** hslot; /* Pointer to entry in hash table */
unsigned int hval; /* Hash value generated by macros above */
const u8 * ref; /* Pointer to candidate match location in input */
const u8 * ip = (const u8 *)ibuf;
const u8 *const in_end = ip + ilen;
u8 * op = (u8 *)obuf;
const u8 *const out_end = (olen == NULL ? NULL : op + *olen);
int lit; /* # of bytes in current literal run */
#if defined(WIN32) && defined(_M_X64)
unsigned _int64 off; /* workaround for missing POSIX compliance */
#else
unsigned long off;
#endif
if (olen == NULL)
return LZFX_EARGS;
if (ibuf == NULL) {
if (ilen != 0)
return LZFX_EARGS;
*olen = 0;
return 0;
}
if (obuf == NULL)
return LZFX_EARGS;
memset(htab, 0, sizeof(htab));
/* Start a literal run. Whenever we do this the output pointer is
advanced because the current byte will hold the encoded length. */
lit = 0;
op++;
hval = LZFX_FRST(ip);
while (ip + 2 < in_end) { /* The NEXT macro reads 2 bytes ahead */
hval = LZFX_NEXT(hval, ip);
hslot = htab + LZFX_IDX(hval);
ref = *hslot;
*hslot = ip;
if (ref < ip && (off = ip - ref - 1) < LZFX_MAX_OFF && ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */
&& ref > (u8 *)ibuf && ref[0] == ip[0] && ref[1] == ip[1] && ref[2] == ip[2]) {
unsigned int len = 3; /* We already know 3 bytes match */
const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ? LZFX_MAX_REF : in_end - ip - 2;
/* lit == 0: op + 3 must be < out_end (because we undo the run)
lit != 0: op + 3 + 1 must be < out_end */
if (fx_expect_false(op - !lit + 3 + 1 >= out_end))
return LZFX_ESIZE;
op[-lit - 1] = lit << 3; /* Terminate literal run */
op -= !lit; /* Undo run if length is zero */
/* Start checking at the fourth byte */
while (len < maxlen && ref[len] == ip[len])
len++;
/* Format 1: [oooooLLL oooooooo] */
if (len < 7) {
*op++ = ((off >> 8) << 3) + len;
*op++ = off;
/* Format 2: [ooooo111 LLLLLLLL oooooooo] */
} else {
*op++ = ((off >> 8) << 3) + 7;
*op++ = len - 7;
*op++ = off;
}
lit = 0;
op++;
ip += len - 1; /* ip = initial ip + #octets - 1 */
if (fx_expect_false(ip + 3 >= in_end)) {
ip++; /* Code following expects exit at bottom of loop */
break;
}
hval = LZFX_FRST(ip);
hval = LZFX_NEXT(hval, ip);
htab[LZFX_IDX(hval)] = ip;
ip++; /* ip = initial ip + #octets */
} else {
/* Keep copying literal bytes */
if (fx_expect_false(op >= out_end))
return LZFX_ESIZE;
lit++;
*op++ = *ip++;
if (fx_expect_false(lit == LZFX_MAX_LIT)) {
op[-lit - 1] = lit << 3; /* stop run */
lit = 0;
op++; /* start run */
}
} /* if() found match in htab */
} /* while(ip < ilen -2) */
/* At most 3 bytes remain in input. We therefore need 4 bytes available
in the output buffer to store them (3 data + ctrl byte).*/
if (op + 3 > out_end)
return LZFX_ESIZE;
while (ip < in_end) {
lit++;
*op++ = *ip++;
if (fx_expect_false(lit == LZFX_MAX_LIT)) {
op[-lit - 1] = lit << 3;
lit = 0;
op++;
}
}
op[-lit - 1] = lit << 3;
op -= !lit;
*olen = op - (u8 *)obuf;
return 0;
}
/* Decompressor */
int lzfx_decompress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen) {
u8 const * ip = (const u8 *)ibuf;
u8 const *const in_end = ip + ilen;
u8 * op = (u8 *)obuf;
u8 const *const out_end = (olen == NULL ? NULL : op + *olen);
unsigned int remain_len = 0;
int rc;
if (olen == NULL)
return LZFX_EARGS;
if (ibuf == NULL) {
if (ilen != 0)
return LZFX_EARGS;
*olen = 0;
return 0;
}
if (obuf == NULL) {
if (olen != 0)
return LZFX_EARGS;
return lzfx_getsize(ibuf, ilen, olen);
}
do {
unsigned int ctrl = *ip++;
/* Format LLLLL000: a literal byte string follows, of length L */
if ((ctrl & 0x7) == 0) {
unsigned int len = ctrl >> 3;
if (fx_expect_false(op + len > out_end)) {
--ip; /* Rewind to control byte */
goto guess;
}
if (fx_expect_false(ip + len > in_end))
return LZFX_ECORRUPT;
do
*op++ = *ip++;
while (--len);
/* Format #1 [oooooLLL oooooooo]: backref of length L+1
^^^^^ ^^^^^^^^
A B
#2 [ooooo111 LLLLLLLL oooooooo] backref of length L+7
^^^^^ ^^^^^^^^
A B
In both cases the location of the backref is computed from the
remaining part of the data as follows:
location = op - A*256 - B - 1
*/
} else {
unsigned int len = ctrl & 0x7;
u8 * ref = op - ((ctrl >> 3) << 8) - 1;
if (len == 7)
len += *ip++; /* i.e. format #2 */
if (fx_expect_false(op + len > out_end)) {
ip -= (len >= 7) ? 2 : 1; /* Rewind to control byte */
goto guess;
}
if (fx_expect_false(ip >= in_end))
return LZFX_ECORRUPT;
ref -= *ip++;
if (fx_expect_false(ref < (u8 *)obuf))
return LZFX_ECORRUPT;
do
*op++ = *ref++;
while (--len);
}
} while (ip < in_end);
*olen = op - (u8 *)obuf;
return 0;
guess:
rc = lzfx_getsize(ip, ilen - (ip - (u8 *)ibuf), &remain_len);
if (rc >= 0)
*olen = remain_len + (op - (u8 *)obuf);
return rc;
}
/* Guess len. No parameters may be NULL; this is not checked. */
static int lzfx_getsize(const void *ibuf, unsigned int ilen, unsigned int *olen) {
u8 const * ip = (const u8 *)ibuf;
u8 const *const in_end = ip + ilen;
int tot_len = 0;
while (ip < in_end) {
unsigned int ctrl = *ip++;
if ((ctrl & 0x7) == 0) {
if (ip + (ctrl >> 3) > in_end)
return LZFX_ECORRUPT;
tot_len += (ctrl >> 3);
ip += (ctrl >> 3);
} else {
unsigned int len = ctrl & 0x7;
if (len == 7) { /* i.e. format #2 */
len += *ip++;
}
if (ip >= in_end)
return LZFX_ECORRUPT;
ip++; /* skip the ref byte */
tot_len += len;
}
}
*olen = tot_len;
return 0;
}

View File

@@ -1,95 +0,0 @@
/*
* Copyright (c) 2009 Andrew Collette <andrew.collette at gmail.com>
* http://lzfx.googlecode.com
*
* Implements an LZF-compatible compressor/decompressor based on the liblzf
* codebase written by Marc Lehmann. This code is released under the BSD
* license. License and original copyright statement follow.
*
*
* Copyright (c) 2000-2008 Marc Alexander Lehmann <schmorp@schmorp.de>
*
* Redistribution and use in source and binary forms, with or without modifica-
* tion, are permitted provided that the following conditions are met:
*
* 1. Redistributions of source code must retain the above copyright notice,
* this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED
* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MER-
* CHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPE-
* CIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef LZFX_H
#define LZFX_H
#ifdef __cplusplus
extern "C" {
#endif
/* Documented behavior, including function signatures and error codes,
is guaranteed to remain unchanged for releases with the same major
version number. Releases of the same major version are also able
to read each other's output, although the output itself is not
guaranteed to be byte-for-byte identical.
*/
#define LZFX_VERSION_MAJOR 0
#define LZFX_VERSION_MINOR 1
#define LZFX_VERSION_STRING "0.1"
/* Hashtable size (2**LZFX_HLOG entries) */
#ifndef LZFX_HLOG
#define LZFX_HLOG 16
#endif
/* Predefined errors. */
#define LZFX_ESIZE -1 /* Output buffer too small */
#define LZFX_ECORRUPT -2 /* Invalid data for decompression */
#define LZFX_EARGS -3 /* Arguments invalid (NULL) */
/* Buffer-to buffer compression.
Supply pre-allocated input and output buffers via ibuf and obuf, and
their size in bytes via ilen and olen. Buffers may not overlap.
On success, the function returns a non-negative value and the argument
olen contains the compressed size in bytes. On failure, a negative
value is returned and olen is not modified.
*/
int lzfx_compress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen);
/* Buffer-to-buffer decompression.
Supply pre-allocated input and output buffers via ibuf and obuf, and
their size in bytes via ilen and olen. Buffers may not overlap.
On success, the function returns a non-negative value and the argument
olen contains the uncompressed size in bytes. On failure, a negative
value is returned.
If the failure code is LZFX_ESIZE, olen contains the minimum buffer size
required to hold the decompressed data. Otherwise, olen is not modified.
Supplying a zero *olen is a valid and supported strategy to determine the
required buffer size. This does not require decompression of the entire
stream and is consequently very fast. Argument obuf may be NULL in
this case only.
*/
int lzfx_decompress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen);
#ifdef __cplusplus
} /* extern "C" */
#endif
#endif