1
0
forked from me/IronOS

Reformat lzfx sources

This commit is contained in:
Alvin Wong
2021-04-12 20:07:06 +08:00
parent 07fb8adb7f
commit 11f40cfc1b
3 changed files with 305 additions and 307 deletions

View File

@@ -5,10 +5,8 @@
/* Program to demonstrate file compression. Don't use it in the real world! */ /* Program to demonstrate file compression. Don't use it in the real world! */
int main(int argc, char **argv) int main(int argc, char **argv) {
{ if (argc == 3) {
if (argc == 3)
{
/* open input */ /* open input */
FILE *f = fopen(argv[1], "rb"); FILE *f = fopen(argv[1], "rb");
if (!f) { if (!f) {
@@ -43,9 +41,7 @@ int main(int argc, char **argv)
fclose(f); fclose(f);
return 0; return 0;
} } else {
else
{
printf("Compresses a file.\n\nUsage: lzfx-raw input output\n"); printf("Compresses a file.\n\nUsage: lzfx-raw input output\n");
return 1; return 1;
} }

View File

@@ -29,7 +29,7 @@
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE. * OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#include "lzfx.h" #include "lzfx.h"
@@ -37,34 +37,33 @@
/* We need this for memset */ /* We need this for memset */
#ifdef __cplusplus #ifdef __cplusplus
# include <cstring> #include <cstring>
#else #else
# include <string.h> #include <string.h>
#endif #endif
#if __GNUC__ >= 3 #if __GNUC__ >= 3
# define fx_expect_false(expr) __builtin_expect((expr) != 0, 0) #define fx_expect_false(expr) __builtin_expect((expr) != 0, 0)
# define fx_expect_true(expr) __builtin_expect((expr) != 0, 1) #define fx_expect_true(expr) __builtin_expect((expr) != 0, 1)
#else #else
# define fx_expect_false(expr) (expr) #define fx_expect_false(expr) (expr)
# define fx_expect_true(expr) (expr) #define fx_expect_true(expr) (expr)
#endif #endif
typedef unsigned char u8; typedef unsigned char u8;
typedef const u8 *LZSTATE[LZFX_HSIZE]; typedef const u8 * LZSTATE[LZFX_HSIZE];
/* Define the hash function */ /* Define the hash function */
#define LZFX_FRST(p) (((p[0]) << 8) | p[1]) #define LZFX_FRST(p) (((p[0]) << 8) | p[1])
#define LZFX_NEXT(v,p) (((v) << 8) | p[2]) #define LZFX_NEXT(v, p) (((v) << 8) | p[2])
#define LZFX_IDX(h) ((( h >> (3*8 - LZFX_HLOG)) - h ) & (LZFX_HSIZE - 1)) #define LZFX_IDX(h) (((h >> (3 * 8 - LZFX_HLOG)) - h) & (LZFX_HSIZE - 1))
/* These cannot be changed, as they are related to the compressed format. */ /* These cannot be changed, as they are related to the compressed format. */
#define LZFX_MAX_LIT (1 << 5) - 1 #define LZFX_MAX_LIT (1 << 5) - 1
#define LZFX_MAX_OFF (1 << 13) #define LZFX_MAX_OFF (1 << 13)
#define LZFX_MAX_REF ((1 << 8) + (1 << 3) - 2) #define LZFX_MAX_REF ((1 << 8) + (1 << 3) - 2)
static static int lzfx_getsize(const void *ibuf, unsigned int ilen, unsigned int *olen);
int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen);
/* Compressed format /* Compressed format
@@ -83,72 +82,70 @@ int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen);
oooooLLL oooooooo for backrefs of real length < 7 (1 <= L < 7) oooooLLL oooooooo for backrefs of real length < 7 (1 <= L < 7)
ooooo111 LLLLLLLL oooooooo for backrefs of real length >= 7 (L >= 7) ooooo111 LLLLLLLL oooooooo for backrefs of real length >= 7 (L >= 7)
*/ */
int lzfx_compress(const void *const ibuf, const unsigned int ilen, int lzfx_compress(const void *const ibuf, const unsigned int ilen, void *obuf, unsigned int *const olen) {
void *obuf, unsigned int *const olen){
/* Hash table; an array of u8*'s which point /* Hash table; an array of u8*'s which point
to various locations in the input buffer */ to various locations in the input buffer */
const u8 *htab[LZFX_HSIZE]; const u8 *htab[LZFX_HSIZE];
const u8 **hslot; /* Pointer to entry in hash table */ const u8 ** hslot; /* Pointer to entry in hash table */
unsigned int hval; /* Hash value generated by macros above */ unsigned int hval; /* Hash value generated by macros above */
const u8 *ref; /* Pointer to candidate match location in input */ const u8 * ref; /* Pointer to candidate match location in input */
const u8 *ip = (const u8 *)ibuf; const u8 * ip = (const u8 *)ibuf;
const u8 *const in_end = ip + ilen; const u8 *const in_end = ip + ilen;
u8 *op = (u8 *)obuf; u8 * op = (u8 *)obuf;
const u8 *const out_end = (olen == NULL ? NULL : op + *olen); const u8 *const out_end = (olen == NULL ? NULL : op + *olen);
int lit; /* # of bytes in current literal run */ int lit; /* # of bytes in current literal run */
#if defined (WIN32) && defined (_M_X64) #if defined(WIN32) && defined(_M_X64)
unsigned _int64 off; /* workaround for missing POSIX compliance */ unsigned _int64 off; /* workaround for missing POSIX compliance */
#else #else
unsigned long off; unsigned long off;
#endif #endif
if(olen == NULL) return LZFX_EARGS; if (olen == NULL)
if(ibuf == NULL){ return LZFX_EARGS;
if(ilen != 0) return LZFX_EARGS; if (ibuf == NULL) {
if (ilen != 0)
return LZFX_EARGS;
*olen = 0; *olen = 0;
return 0; return 0;
} }
if(obuf == NULL) return LZFX_EARGS; if (obuf == NULL)
return LZFX_EARGS;
memset(htab, 0, sizeof(htab)); memset(htab, 0, sizeof(htab));
/* Start a literal run. Whenever we do this the output pointer is /* Start a literal run. Whenever we do this the output pointer is
advanced because the current byte will hold the encoded length. */ advanced because the current byte will hold the encoded length. */
lit = 0; op++; lit = 0;
op++;
hval = LZFX_FRST(ip); hval = LZFX_FRST(ip);
while(ip + 2 < in_end){ /* The NEXT macro reads 2 bytes ahead */ while (ip + 2 < in_end) { /* The NEXT macro reads 2 bytes ahead */
hval = LZFX_NEXT(hval, ip); hval = LZFX_NEXT(hval, ip);
hslot = htab + LZFX_IDX(hval); hslot = htab + LZFX_IDX(hval);
ref = *hslot; *hslot = ip; ref = *hslot;
*hslot = ip;
if( ref < ip if (ref < ip && (off = ip - ref - 1) < LZFX_MAX_OFF && ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */
&& (off = ip - ref - 1) < LZFX_MAX_OFF && ref > (u8 *)ibuf && ref[0] == ip[0] && ref[1] == ip[1] && ref[2] == ip[2]) {
&& ip + 4 < in_end /* Backref takes up to 3 bytes, so don't bother */
&& ref > (u8 *)ibuf
&& ref[0] == ip[0]
&& ref[1] == ip[1]
&& ref[2] == ip[2] ) {
unsigned int len = 3; /* We already know 3 bytes match */ unsigned int len = 3; /* We already know 3 bytes match */
const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ? const unsigned int maxlen = in_end - ip - 2 > LZFX_MAX_REF ? LZFX_MAX_REF : in_end - ip - 2;
LZFX_MAX_REF : in_end - ip - 2;
/* lit == 0: op + 3 must be < out_end (because we undo the run) /* lit == 0: op + 3 must be < out_end (because we undo the run)
lit != 0: op + 3 + 1 must be < out_end */ lit != 0: op + 3 + 1 must be < out_end */
if(fx_expect_false(op - !lit + 3 + 1 >= out_end)) if (fx_expect_false(op - !lit + 3 + 1 >= out_end))
return LZFX_ESIZE; return LZFX_ESIZE;
op [- lit - 1] = lit << 3;/* Terminate literal run */ op[-lit - 1] = lit << 3; /* Terminate literal run */
op -= !lit; /* Undo run if length is zero */ op -= !lit; /* Undo run if length is zero */
/* Start checking at the fourth byte */ /* Start checking at the fourth byte */
@@ -167,31 +164,35 @@ int lzfx_compress(const void *const ibuf, const unsigned int ilen,
*op++ = off; *op++ = off;
} }
lit = 0; op++; lit = 0;
op++;
ip += len - 1; /* ip = initial ip + #octets - 1 */ ip += len - 1; /* ip = initial ip + #octets - 1 */
if (fx_expect_false (ip + 3 >= in_end)){ if (fx_expect_false(ip + 3 >= in_end)) {
ip++; /* Code following expects exit at bottom of loop */ ip++; /* Code following expects exit at bottom of loop */
break; break;
} }
hval = LZFX_FRST (ip); hval = LZFX_FRST(ip);
hval = LZFX_NEXT (hval, ip); hval = LZFX_NEXT(hval, ip);
htab[LZFX_IDX (hval)] = ip; htab[LZFX_IDX(hval)] = ip;
ip++; /* ip = initial ip + #octets */ ip++; /* ip = initial ip + #octets */
} else { } else {
/* Keep copying literal bytes */ /* Keep copying literal bytes */
if (fx_expect_false (op >= out_end)) return LZFX_ESIZE; if (fx_expect_false(op >= out_end))
return LZFX_ESIZE;
lit++; *op++ = *ip++; lit++;
*op++ = *ip++;
if (fx_expect_false (lit == LZFX_MAX_LIT)) { if (fx_expect_false(lit == LZFX_MAX_LIT)) {
op [- lit - 1] = lit << 3; /* stop run */ op[-lit - 1] = lit << 3; /* stop run */
lit = 0; op++; /* start run */ lit = 0;
op++; /* start run */
} }
} /* if() found match in htab */ } /* if() found match in htab */
@@ -200,19 +201,22 @@ int lzfx_compress(const void *const ibuf, const unsigned int ilen,
/* At most 3 bytes remain in input. We therefore need 4 bytes available /* At most 3 bytes remain in input. We therefore need 4 bytes available
in the output buffer to store them (3 data + ctrl byte).*/ in the output buffer to store them (3 data + ctrl byte).*/
if (op + 3 > out_end) return LZFX_ESIZE; if (op + 3 > out_end)
return LZFX_ESIZE;
while (ip < in_end) { while (ip < in_end) {
lit++; *op++ = *ip++; lit++;
*op++ = *ip++;
if (fx_expect_false (lit == LZFX_MAX_LIT)){ if (fx_expect_false(lit == LZFX_MAX_LIT)) {
op [- lit - 1] = lit << 3; op[-lit - 1] = lit << 3;
lit = 0; op++; lit = 0;
op++;
} }
} }
op [- lit - 1] = lit << 3; op[-lit - 1] = lit << 3;
op -= !lit; op -= !lit;
*olen = op - (u8 *)obuf; *olen = op - (u8 *)obuf;
@@ -220,25 +224,27 @@ int lzfx_compress(const void *const ibuf, const unsigned int ilen,
} }
/* Decompressor */ /* Decompressor */
int lzfx_decompress(const void* ibuf, unsigned int ilen, int lzfx_decompress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen) {
void* obuf, unsigned int *olen){
u8 const *ip = (const u8 *)ibuf; u8 const * ip = (const u8 *)ibuf;
u8 const *const in_end = ip + ilen; u8 const *const in_end = ip + ilen;
u8 *op = (u8 *)obuf; u8 * op = (u8 *)obuf;
u8 const *const out_end = (olen == NULL ? NULL : op + *olen); u8 const *const out_end = (olen == NULL ? NULL : op + *olen);
unsigned int remain_len = 0; unsigned int remain_len = 0;
int rc; int rc;
if(olen == NULL) return LZFX_EARGS; if (olen == NULL)
if(ibuf == NULL){ return LZFX_EARGS;
if(ilen != 0) return LZFX_EARGS; if (ibuf == NULL) {
if (ilen != 0)
return LZFX_EARGS;
*olen = 0; *olen = 0;
return 0; return 0;
} }
if(obuf == NULL){ if (obuf == NULL) {
if(olen != 0) return LZFX_EARGS; if (olen != 0)
return LZFX_EARGS;
return lzfx_getsize(ibuf, ilen, olen); return lzfx_getsize(ibuf, ilen, olen);
} }
@@ -246,18 +252,19 @@ int lzfx_decompress(const void* ibuf, unsigned int ilen,
unsigned int ctrl = *ip++; unsigned int ctrl = *ip++;
/* Format LLLLL000: a literal byte string follows, of length L */ /* Format LLLLL000: a literal byte string follows, of length L */
if((ctrl & 0x7) == 0) { if ((ctrl & 0x7) == 0) {
unsigned int len = ctrl >> 3; unsigned int len = ctrl >> 3;
if(fx_expect_false(op + len > out_end)){ if (fx_expect_false(op + len > out_end)) {
--ip; /* Rewind to control byte */ --ip; /* Rewind to control byte */
goto guess; goto guess;
} }
if(fx_expect_false(ip + len > in_end)) return LZFX_ECORRUPT; if (fx_expect_false(ip + len > in_end))
return LZFX_ECORRUPT;
do do
*op++ = *ip++; *op++ = *ip++;
while(--len); while (--len);
/* Format #1 [oooooLLL oooooooo]: backref of length L+1 /* Format #1 [oooooLLL oooooooo]: backref of length L+1
^^^^^ ^^^^^^^^ ^^^^^ ^^^^^^^^
@@ -273,19 +280,22 @@ int lzfx_decompress(const void* ibuf, unsigned int ilen,
} else { } else {
unsigned int len = ctrl & 0x7; unsigned int len = ctrl & 0x7;
u8 *ref = op - ((ctrl >> 3) << 8) - 1; u8 * ref = op - ((ctrl >> 3) << 8) - 1;
if(len==7) len += *ip++; /* i.e. format #2 */ if (len == 7)
len += *ip++; /* i.e. format #2 */
if(fx_expect_false(op + len > out_end)){ if (fx_expect_false(op + len > out_end)) {
ip -= (len >= 7) ? 2 : 1; /* Rewind to control byte */ ip -= (len >= 7) ? 2 : 1; /* Rewind to control byte */
goto guess; goto guess;
} }
if(fx_expect_false(ip >= in_end)) return LZFX_ECORRUPT; if (fx_expect_false(ip >= in_end))
return LZFX_ECORRUPT;
ref -= *ip++; ref -= *ip++;
if(fx_expect_false(ref < (u8*)obuf)) return LZFX_ECORRUPT; if (fx_expect_false(ref < (u8 *)obuf))
return LZFX_ECORRUPT;
do do
*op++ = *ref++; *op++ = *ref++;
@@ -299,16 +309,16 @@ int lzfx_decompress(const void* ibuf, unsigned int ilen,
return 0; return 0;
guess: guess:
rc = lzfx_getsize(ip, ilen - (ip-(u8*)ibuf), &remain_len); rc = lzfx_getsize(ip, ilen - (ip - (u8 *)ibuf), &remain_len);
if(rc>=0) *olen = remain_len + (op - (u8*)obuf); if (rc >= 0)
*olen = remain_len + (op - (u8 *)obuf);
return rc; return rc;
} }
/* Guess len. No parameters may be NULL; this is not checked. */ /* Guess len. No parameters may be NULL; this is not checked. */
static static int lzfx_getsize(const void *ibuf, unsigned int ilen, unsigned int *olen) {
int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen){
u8 const *ip = (const u8 *)ibuf; u8 const * ip = (const u8 *)ibuf;
u8 const *const in_end = ip + ilen; u8 const *const in_end = ip + ilen;
int tot_len = 0; int tot_len = 0;
@@ -316,9 +326,9 @@ int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen){
unsigned int ctrl = *ip++; unsigned int ctrl = *ip++;
if((ctrl & 0x7) == 0) { if ((ctrl & 0x7) == 0) {
if(ip + (ctrl >> 3) > in_end) if (ip + (ctrl >> 3) > in_end)
return LZFX_ECORRUPT; return LZFX_ECORRUPT;
tot_len += (ctrl >> 3); tot_len += (ctrl >> 3);
@@ -328,25 +338,20 @@ int lzfx_getsize(const void* ibuf, unsigned int ilen, unsigned int *olen){
unsigned int len = ctrl & 0x7; unsigned int len = ctrl & 0x7;
if(len==7){ /* i.e. format #2 */ if (len == 7) { /* i.e. format #2 */
len += *ip++; len += *ip++;
} }
if(ip >= in_end) return LZFX_ECORRUPT; if (ip >= in_end)
return LZFX_ECORRUPT;
ip++; /* skip the ref byte */ ip++; /* skip the ref byte */
tot_len += len; tot_len += len;
} }
} }
*olen = tot_len; *olen = tot_len;
return 0; return 0;
} }

View File

@@ -29,7 +29,7 @@
* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH- * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTH-
* ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED * ERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
* OF THE POSSIBILITY OF SUCH DAMAGE. * OF THE POSSIBILITY OF SUCH DAMAGE.
*/ */
#ifndef LZFX_H #ifndef LZFX_H
#define LZFX_H #define LZFX_H
@@ -50,7 +50,7 @@ extern "C" {
/* Hashtable size (2**LZFX_HLOG entries) */ /* Hashtable size (2**LZFX_HLOG entries) */
#ifndef LZFX_HLOG #ifndef LZFX_HLOG
# define LZFX_HLOG 16 #define LZFX_HLOG 16
#endif #endif
/* Predefined errors. */ /* Predefined errors. */
@@ -67,8 +67,7 @@ extern "C" {
olen contains the compressed size in bytes. On failure, a negative olen contains the compressed size in bytes. On failure, a negative
value is returned and olen is not modified. value is returned and olen is not modified.
*/ */
int lzfx_compress(const void* ibuf, unsigned int ilen, int lzfx_compress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen);
void* obuf, unsigned int *olen);
/* Buffer-to-buffer decompression. /* Buffer-to-buffer decompression.
@@ -87,9 +86,7 @@ int lzfx_compress(const void* ibuf, unsigned int ilen,
stream and is consequently very fast. Argument obuf may be NULL in stream and is consequently very fast. Argument obuf may be NULL in
this case only. this case only.
*/ */
int lzfx_decompress(const void* ibuf, unsigned int ilen, int lzfx_decompress(const void *ibuf, unsigned int ilen, void *obuf, unsigned int *olen);
void* obuf, unsigned int *olen);
#ifdef __cplusplus #ifdef __cplusplus
} /* extern "C" */ } /* extern "C" */