LCOV - code coverage report
Current view: top level - lib/compression/tests - test_lzxpress_plain.c (source / functions) Hit Total Coverage
Test: coverage report for vadcx-master-patch-75612 fe003de8 Lines: 480 494 97.2 %
Date: 2024-02-29 22:57:05 Functions: 19 19 100.0 %

          Line data    Source code
       1             : /*
       2             :    Unix SMB/CIFS implementation.
       3             :    test suite for the compression functions
       4             : 
       5             :    Copyright (C) Jelmer Vernooij 2007
       6             : 
       7             :    This program is free software; you can redistribute it and/or modify
       8             :    it under the terms of the GNU General Public License as published by
       9             :    the Free Software Foundation; either version 3 of the License, or
      10             :    (at your option) any later version.
      11             : 
      12             :    This program is distributed in the hope that it will be useful,
      13             :    but WITHOUT ANY WARRANTY; without even the implied warranty of
      14             :    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
      15             :    GNU General Public License for more details.
      16             : 
      17             :    You should have received a copy of the GNU General Public License
      18             :    along with this program.  If not, see <http://www.gnu.org/licenses/>.
      19             : */
      20             : 
      21             : #include <stdarg.h>
      22             : #include <stddef.h>
      23             : #include <setjmp.h>
      24             : #include <sys/stat.h>
      25             : #include <cmocka.h>
      26             : #include "includes.h"
      27             : #include "talloc.h"
      28             : #include "lzxpress.h"
      29             : #include "lib/util/base64.h"
      30             : 
      31             : 
      32             : /* set LZX_DEBUG_FILES to true to save round-trip files in /tmp. */
      33             : #define LZX_DEBUG_FILES false
      34             : 
      35             : /* set LZX_DEBUG_VERBOSE to true to print more. */
      36             : #define LZX_DEBUG_VERBOSE false
      37             : 
      38             : 
      39             : #if LZX_DEBUG_VERBOSE
      40             : #define debug_message(...) print_message(__VA_ARGS__)
      41             : 
      42             : #include <time.h>
      43             : 
      44             : struct timespec start = {0};
      45             : struct timespec end = {0};
      46             : static void debug_start_timer(void)
      47             : {
      48             :         clock_gettime(CLOCK_MONOTONIC, &start);
      49             : }
      50             : 
      51             : static void debug_end_timer(const char *name, size_t len)
      52             : {
      53             :         uint64_t ns;
      54             :         double secs;
      55             :         double rate;
      56             :         clock_gettime(CLOCK_MONOTONIC, &end);
      57             :         ns = end.tv_nsec;
      58             :         ns += end.tv_sec * 1000 * 1000 * 1000;
      59             :         ns -= start.tv_nsec;
      60             :         ns -= start.tv_sec * 1000 * 1000 * 1000;
      61             :         secs = ns / 1e9;
      62             :         rate = len / (secs * 1024 * 1024);
      63             :         debug_message("%s %zu bytes in %.2g: \033[1;35m%.2f\033[0m MB per second\n",
      64             :                       name, len, secs, rate);
      65             : }
      66             : 
      67             : #else
      68             : #define debug_message(...) /* debug_message */
      69             : #define debug_start_timer(...) /* debug_start_timer */
      70             : #define debug_end_timer(...) /* debug_end_timer */
      71             : #endif
      72             : 
      73             : 
      74             : struct lzx_pair {
      75             :         const char *name;
      76             :         DATA_BLOB compressed;
      77             :         DATA_BLOB decompressed;
      78             : };
      79             : 
      80             : struct lzx_file_pair {
      81             :         const char *name;
      82             :         const char *compressed_file;
      83             :         const char *decompressed_file;
      84             : };
      85             : 
      86             : 
      87             : #define DECOMP_DIR "testdata/compression/decompressed"
      88             : #define COMP_DIR "testdata/compression/compressed-plain"
      89             : #define MORE_COMP_DIR "testdata/compression/compressed-more-plain"
      90             : 
      91             : 
      92             : #define BLOB_FROM_ARRAY(...)                             \
      93             :         {                                                \
      94             :                 .data = (uint8_t[]){__VA_ARGS__},          \
      95             :                 .length = sizeof((uint8_t[]){__VA_ARGS__}) \
      96             :         }
      97             : 
      98             : #define BLOB_FROM_STRING(s)                                      \
      99             :         {                                                            \
     100             :                 .data = discard_const_p(uint8_t, s),                 \
     101             :                 .length = (sizeof(s) - 1)                    \
     102             :         }
     103             : 
     104             : 
     105             : const char * file_names[] = {
     106             :         "generate-windows-test-vectors.c",
     107             :         "fib_shuffle-128k+",
     108             :         "fuzzing-0fc2d461b56cd8103c91",
     109             :         "fuzzing-3ec3bca27bb9eb40c128",
     110             :         "fuzzing-a3115a81d1ac500318f9",
     111             :         "fuzzing-3591f9dc02bb00a54b60",
     112             :         "27826-8.txt",
     113             :         "5d049b4cb1bd933f5e8ex19",
     114             :         "638e61e96d54279981c3x5",
     115             :         "64k-minus-one-zeros",
     116             :         "64k-plus-one-zeros",
     117             :         "64k-zeros",
     118             :         "96f696a4e5ce56c61a3dx10",
     119             :         "9e0b6a12febf38e98f13",
     120             :         "abc-times-101",
     121             :         "abc-times-105",
     122             :         "abc-times-200",
     123             :         "b63289ccc7f218c0d56b",
     124             :         "beta-variate1-128k+",
     125             :         "beta-variate3-128k+",
     126             :         "decayed_alphabet_128k+",
     127             :         "decayed_alphabet_64k",
     128             :         "f00842317dc6d5695b02",
     129             :         "fib_shuffle",
     130             :         "midsummer-nights-dream.txt",
     131             :         "notes-on-the-underground.txt",
     132             :         "pg22009.txt",
     133             :         "repeating",
     134             :         "repeating-exactly-64k",
     135             :         "setup.log",
     136             :         "slow-015ddc36a71412ccc50d",
     137             :         "slow-100e9f966a7feb9ca40a",
     138             :         "slow-2a671c3cff4f1574cbab",
     139             :         "slow-33d90a24e70515b14cd0",
     140             :         "slow-49d8c05261e3f412fc72",
     141             :         "slow-50a249d2fe56873e56a0",
     142             :         "slow-63e9f0b52235fb0129fa",
     143             :         "slow-73b7f971d65908ac0095",
     144             :         "slow-8b61e3dd267908544531",
     145             :         "slow-9d1c5a079b0462986f1f",
     146             :         "slow-aa7262a821dabdcf04a6",
     147             :         "slow-b8a91d142b0d2af7f5ca",
     148             :         "slow-c79142457734bbc8d575",
     149             :         "slow-d736544545b90d83fe75",
     150             :         "slow-e3b9bdfaed7d1a606fdb",
     151             :         "slow-f3f1c02a9d006e5e1703",
     152             :         "trigram_128k+",
     153             :         "trigram_64k",
     154             :         "trigram_sum_128k+",
     155             :         "trigram_sum_64k",
     156             :         NULL
     157             : };
     158             : 
     159             : 
     160             : 
     161         301 : static DATA_BLOB datablob_from_file(TALLOC_CTX *mem_ctx,
     162             :                                     const char *filename)
     163             : {
     164         301 :         DATA_BLOB b = {0};
     165         301 :         FILE *fh = fopen(filename, "rb");
     166         301 :         int ret;
     167         301 :         struct stat s;
     168         301 :         size_t len;
     169         301 :         if (fh == NULL) {
     170          20 :                 debug_message("could not open '%s'\n", filename);
     171          20 :                 return b;
     172             :         }
     173         281 :         ret = fstat(fileno(fh), &s);
     174         281 :         if (ret != 0) {
     175           0 :                 fclose(fh);
     176           0 :                 return b;
     177             :         }
     178         281 :         b.data = talloc_array(mem_ctx, uint8_t, s.st_size);
     179         281 :         if (b.data == NULL) {
     180           0 :                 fclose(fh);
     181           0 :                 return b;
     182             :         }
     183         281 :         len = fread(b.data, 1, s.st_size, fh);
     184         281 :         if (ferror(fh) || len != s.st_size) {
     185           0 :                 TALLOC_FREE(b.data);
     186             :         } else {
     187             :                 b.length = len;
     188             :         }
     189         281 :         fclose(fh);
     190         281 :         return b;
     191             : }
     192             : 
     193             : 
     194             : 
     195           1 : static void test_lzxpress_plain_decompress_files(void **state)
     196             : {
     197           1 :         size_t i;
     198           1 :         int score = 0;
     199           1 :         TALLOC_CTX *mem_ctx = talloc_new(NULL);
     200          52 :         for (i = 0; file_names[i] != NULL; i++) {
     201          50 :                 char filename[200];
     202          50 :                 uint8_t *dest = NULL;
     203          50 :                 ssize_t written;
     204          50 :                 TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
     205          50 :                 struct lzx_pair p = {
     206          50 :                         .name = file_names[i]
     207             :                 };
     208             : 
     209          50 :                 debug_message("%s\n", p.name);
     210             : 
     211          50 :                 snprintf(filename, sizeof(filename),
     212             :                          "%s/%s.decomp", DECOMP_DIR, p.name);
     213             : 
     214          50 :                 p.decompressed = datablob_from_file(tmp_ctx, filename);
     215          50 :                 assert_non_null(p.decompressed.data);
     216             : 
     217          50 :                 snprintf(filename, sizeof(filename),
     218             :                          "%s/%s.lzplain", COMP_DIR, p.name);
     219             : 
     220          50 :                 p.compressed = datablob_from_file(tmp_ctx, filename);
     221          50 :                 assert_non_null(p.compressed.data);
     222             : 
     223          50 :                 dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length);
     224          50 :                 debug_start_timer();
     225          50 :                 written = lzxpress_decompress(p.compressed.data,
     226             :                                               p.compressed.length,
     227             :                                               dest,
     228             :                                               p.decompressed.length);
     229          50 :                 debug_end_timer("decompress", p.decompressed.length);
     230          50 :                 if (written == p.decompressed.length &&
     231          50 :                     memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) {
     232          50 :                         debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name);
     233          50 :                         score++;
     234             :                 } else {
     235             :                         debug_message("\033[1;31mfailed to decompress %s!\033[0m\n",
     236          50 :                                       p.name);
     237             :                         debug_message("size %zd vs reference %zu\n",
     238          50 :                                       written, p.decompressed.length);
     239             :                 }
     240          50 :                 talloc_free(tmp_ctx);
     241             :         }
     242           1 :         debug_message("%d/%zu correct\n", score, i);
     243           1 :         assert_int_equal(score, i);
     244           1 : }
     245             : 
     246             : 
     247           1 : static void test_lzxpress_plain_decompress_more_compressed_files(void **state)
     248             : {
     249             :         /*
     250             :          * This tests the decompression of files that have been compressed on
     251             :          * Windows with the level turned up (to 1, default for MS-XCA is 0).
     252             :          *
     253             :          * The format is identical, but it will have tried harder to find
     254             :          * matches.
     255             :          */
     256           1 :         size_t i;
     257           1 :         int score = 0;
     258           1 :         int found = 0;
     259           1 :         TALLOC_CTX *mem_ctx = talloc_new(NULL);
     260          52 :         for (i = 0; file_names[i] != NULL; i++) {
     261          50 :                 char filename[200];
     262          50 :                 uint8_t *dest = NULL;
     263          50 :                 ssize_t written;
     264          50 :                 TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
     265          50 :                 struct lzx_pair p = {
     266          50 :                         .name = file_names[i]
     267             :                 };
     268             : 
     269          50 :                 debug_message("%s\n", p.name);
     270             : 
     271          50 :                 snprintf(filename, sizeof(filename),
     272             :                          "%s/%s.decomp", DECOMP_DIR, p.name);
     273             : 
     274          50 :                 p.decompressed = datablob_from_file(tmp_ctx, filename);
     275          50 :                 assert_non_null(p.decompressed.data);
     276             : 
     277          50 :                 snprintf(filename, sizeof(filename),
     278             :                          "%s/%s.lzplain", MORE_COMP_DIR, p.name);
     279             : 
     280          50 :                 p.compressed = datablob_from_file(tmp_ctx, filename);
     281          50 :                 if (p.compressed.data == NULL) {
     282             :                         /*
     283             :                          * We don't have all the vectors in the
     284             :                          * more-compressed directory, which is OK, we skip
     285             :                          * them.
     286             :                          */
     287          20 :                         continue;
     288             :                 }
     289          30 :                 found++;
     290          30 :                 dest = talloc_array(tmp_ctx, uint8_t, p.decompressed.length);
     291          30 :                 debug_start_timer();
     292          30 :                 written = lzxpress_decompress(p.compressed.data,
     293             :                                               p.compressed.length,
     294             :                                               dest,
     295             :                                               p.decompressed.length);
     296          30 :                 debug_end_timer("decompress", p.decompressed.length);
     297          30 :                 if (written != -1 &&
     298          30 :                     written == p.decompressed.length &&
     299          30 :                     memcmp(dest, p.decompressed.data, p.decompressed.length) == 0) {
     300          30 :                         debug_message("\033[1;32mdecompressed %s!\033[0m\n", p.name);
     301          30 :                         score++;
     302             :                 } else {
     303             :                         debug_message("\033[1;31mfailed to decompress %s!\033[0m\n",
     304          30 :                                       p.name);
     305             :                         debug_message("size %zd vs reference %zu\n",
     306          30 :                                       written, p.decompressed.length);
     307             :                 }
     308          30 :                 talloc_free(tmp_ctx);
     309             :         }
     310           1 :         debug_message("%d/%d correct\n", score, found);
     311           1 :         assert_int_equal(score, found);
     312           1 : }
     313             : 
     314             : 
     315             : /*
     316             :  * attempt_round_trip() tests whether a data blob can survive a compression
     317             :  * and decompression cycle. If save_name is not NULL and LZX_DEBUG_FILES
     318             :  * evals to true, the various stages are saved in files with that name and the
     319             :  * '-original', '-compressed', and '-decompressed' suffixes. If ref_compressed
     320             :  * has data, it'll print a message saying whether the compressed data matches
     321             :  * that.
     322             :  */
     323             : 
     324          56 : static ssize_t attempt_round_trip(TALLOC_CTX *mem_ctx,
     325             :                                   DATA_BLOB original,
     326             :                                   const char *save_name,
     327             :                                   DATA_BLOB ref_compressed)
     328             : {
     329          56 :         TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
     330          56 :         DATA_BLOB compressed = data_blob_talloc(tmp_ctx, NULL,
     331             :                                                 original.length * 8 / 7 + 8);
     332          56 :         DATA_BLOB decompressed = data_blob_talloc(tmp_ctx, NULL,
     333             :                                                   original.length);
     334          56 :         ssize_t comp_written, decomp_written;
     335          56 :         debug_start_timer();
     336          56 :         comp_written = lzxpress_compress(original.data,
     337             :                                          original.length,
     338             :                                          compressed.data,
     339             :                                          compressed.length);
     340          56 :         debug_end_timer("compress", original.length);
     341          56 :         if (comp_written <= 0) {
     342           0 :                 talloc_free(tmp_ctx);
     343           0 :                 return -1;
     344             :         }
     345             : 
     346          56 :         if (ref_compressed.data != NULL) {
     347             :                 /*
     348             :                  * This is informational, not an assertion; there are
     349             :                  * ~infinite legitimate ways to compress the data, many as
     350             :                  * good as each other (think of compression as a language, not
     351             :                  * a format).
     352             :                  */
     353             :                 debug_message("compressed size %zd vs reference %zu\n",
     354             :                               comp_written, ref_compressed.length);
     355             : 
     356             :                 if (comp_written == compressed.length &&
     357             :                     memcmp(compressed.data, ref_compressed.data, comp_written) == 0) {
     358          56 :                         debug_message("\033[1;32mbyte identical!\033[0m\n");
     359             :                 }
     360             :         }
     361          56 :         debug_start_timer();
     362         112 :         decomp_written = lzxpress_decompress(compressed.data,
     363             :                                              comp_written,
     364             :                                              decompressed.data,
     365          56 :                                              decompressed.length);
     366          56 :         debug_end_timer("decompress", original.length);
     367          56 :         if (save_name != NULL && LZX_DEBUG_FILES) {
     368             :                 char s[300];
     369             :                 FILE *fh = NULL;
     370             : 
     371             :                 snprintf(s, sizeof(s), "%s-original", save_name);
     372             :                 fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s);
     373             :                 fh = fopen(s, "w");
     374             :                 fwrite(original.data, 1, original.length, fh);
     375             :                 fclose(fh);
     376             : 
     377             :                 snprintf(s, sizeof(s), "%s-compressed", save_name);
     378             :                 fprintf(stderr, "Saving %zu bytes to %s\n", comp_written, s);
     379             :                 fh = fopen(s, "w");
     380             :                 fwrite(compressed.data, 1, comp_written, fh);
     381             :                 fclose(fh);
     382             :                 /*
     383             :                  * We save the decompressed file using original.length, not
     384             :                  * the returned size. If these differ, the returned size will
     385             :                  * be -1. By saving the whole buffer we can see at what point
     386             :                  * it went haywire.
     387             :                  */
     388             :                 snprintf(s, sizeof(s), "%s-decompressed", save_name);
     389             :                 fprintf(stderr, "Saving %zu bytes to %s\n", original.length, s);
     390             :                 fh = fopen(s, "w");
     391             :                 fwrite(decompressed.data, 1, original.length, fh);
     392             :                 fclose(fh);
     393             :         }
     394             : 
     395          56 :         if (original.length != decomp_written ||
     396          56 :             memcmp(decompressed.data,
     397             :                    original.data,
     398             :                    original.length) != 0) {
     399             :                 debug_message("\033[1;31mgot %zd, expected %zu\033[0m\n",
     400             :                               decomp_written,
     401           0 :                               original.length);
     402           0 :                 talloc_free(tmp_ctx);
     403           0 :                 return -1;
     404             :         }
     405          56 :         talloc_free(tmp_ctx);
     406          56 :         return comp_written;
     407             : }
     408             : 
     409             : 
     410           1 : static void test_lzxpress_plain_round_trip_files(void **state)
     411             : {
     412           1 :         size_t i;
     413           1 :         int score = 0;
     414           1 :         ssize_t compressed_total = 0;
     415           1 :         ssize_t reference_total = 0;
     416           1 :         TALLOC_CTX *mem_ctx = talloc_new(NULL);
     417          52 :         for (i = 0; file_names[i] != NULL; i++) {
     418          50 :                 char filename[200];
     419          50 :                 char *debug_files = NULL;
     420          50 :                 TALLOC_CTX *tmp_ctx = talloc_new(mem_ctx);
     421          50 :                 ssize_t comp_size;
     422          50 :                 struct lzx_pair p = {
     423          50 :                         .name = file_names[i]
     424             :                 };
     425          50 :                 debug_message("-------------------\n");
     426          50 :                 debug_message("%s\n", p.name);
     427             : 
     428          50 :                 snprintf(filename, sizeof(filename),
     429             :                          "%s/%s.decomp", DECOMP_DIR, p.name);
     430             : 
     431          50 :                 p.decompressed = datablob_from_file(tmp_ctx, filename);
     432          50 :                 assert_non_null(p.decompressed.data);
     433             : 
     434          50 :                 snprintf(filename, sizeof(filename),
     435             :                          "%s/%s.lzplain", COMP_DIR, p.name);
     436             : 
     437          50 :                 p.compressed = datablob_from_file(tmp_ctx, filename);
     438          50 :                 if (p.compressed.data == NULL) {
     439             :                         debug_message(
     440             :                                 "Could not load %s reference file %s\n",
     441             :                                 p.name, filename);
     442             :                         debug_message("%s decompressed %zu\n", p.name,
     443             :                                       p.decompressed.length);
     444             :                 } else {
     445             :                         debug_message("%s: reference compressed %zu decomp %zu\n",
     446             :                                       p.name,
     447             :                                       p.compressed.length,
     448          50 :                                       p.decompressed.length);
     449             :                 }
     450          50 :                 if (1) {
     451             :                         /*
     452             :                          * We're going to save copies in /tmp.
     453             :                          */
     454          50 :                         snprintf(filename, sizeof(filename),
     455             :                                  "/tmp/lzxplain-%s", p.name);
     456          50 :                         debug_files = filename;
     457             :                 }
     458             : 
     459          50 :                 comp_size = attempt_round_trip(mem_ctx, p.decompressed,
     460             :                                                debug_files,
     461             :                                                p.compressed);
     462          50 :                 if (comp_size > 0) {
     463          50 :                         debug_message("\033[1;32mround trip!\033[0m\n");
     464          50 :                         score++;
     465          50 :                         if (p.compressed.length) {
     466          50 :                                 compressed_total += comp_size;
     467          50 :                                 reference_total += p.compressed.length;
     468             :                         }
     469             :                 }
     470          50 :                 talloc_free(tmp_ctx);
     471             :         }
     472           1 :         debug_message("%d/%zu correct\n", score, i);
     473           1 :         print_message("\033[1;34mtotal compressed size: %zu\033[0m\n",
     474             :                       compressed_total);
     475           1 :         print_message("total reference size:  %zd \n", reference_total);
     476           1 :         print_message("diff:                  %7zd \n",
     477             :                       reference_total - compressed_total);
     478           1 :         assert_true(reference_total != 0);
     479           1 :         print_message("ratio: \033[1;3%dm%.2f\033[0m \n",
     480             :                       2 + (compressed_total >= reference_total),
     481           1 :                       ((double)compressed_total) / reference_total);
     482             :         /*
     483             :          * Assert that the compression is better than Windows. Unlike the
     484             :          * Huffman variant, where things are very even, here we do much better
     485             :          * than Windows without especially trying.
     486             :          */
     487           1 :         assert_true(compressed_total <= reference_total);
     488             : 
     489           1 :         assert_int_equal(score, i);
     490           1 :         talloc_free(mem_ctx);
     491           1 : }
     492             : 
     493             : 
     494             : /*
     495             :  * Bob Jenkins' Small Fast RNG.
     496             :  *
     497             :  * We don't need it to be this good, but we do need it to be reproduceable
     498             :  * across platforms, which rand() etc aren't.
     499             :  *
     500             :  * http://burtleburtle.net/bob/rand/smallprng.html
     501             :  */
     502             : 
     503             : struct jsf_rng {
     504             :         uint32_t a;
     505             :         uint32_t b;
     506             :         uint32_t c;
     507             :         uint32_t d;
     508             : };
     509             : 
     510             : #define ROTATE32(x, k) (((x) << (k)) | ((x) >> (32 - (k))))
     511             : 
     512    28901487 : static uint32_t jsf32(struct jsf_rng *x) {
     513    28901487 :         uint32_t e = x->a - ROTATE32(x->b, 27);
     514    28901487 :         x->a = x->b ^ ROTATE32(x->c, 17);
     515    28901487 :         x->b = x->c + x->d;
     516    28901487 :         x->c = x->d + e;
     517    28901487 :         x->d = e + x->a;
     518    28901487 :         return x->d;
     519             : }
     520             : 
     521           6 : static void jsf32_init(struct jsf_rng *x, uint32_t seed) {
     522           6 :         size_t i;
     523           6 :         x->a = 0xf1ea5eed;
     524           6 :         x->b = x->c = x->d = seed;
     525         126 :         for (i = 0; i < 20; ++i) {
     526         120 :                 jsf32(x);
     527             :         }
     528             : }
     529             : 
     530             : 
     531           1 : static void test_lzxpress_plain_long_gpl_round_trip(void **state)
     532             : {
     533             :         /*
     534             :          * We use a kind of model-free Markov model to generate a massively
     535             :          * extended pastiche of the GPLv3 (chosen because it is right there in
     536             :          * "COPYING" and won't change often).
     537             :          *
     538             :          * The point is to check a round trip of a very long message with
     539             :          * multiple repetitions on many scales, without having to add a very
     540             :          * large file.
     541             :          */
     542           1 :         size_t i, j, k;
     543           1 :         uint8_t c;
     544           1 :         TALLOC_CTX *mem_ctx = talloc_new(NULL);
     545           1 :         DATA_BLOB gpl = datablob_from_file(mem_ctx, "COPYING");
     546           1 :         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
     547           1 :         DATA_BLOB ref = {0};
     548           1 :         ssize_t comp_size;
     549           1 :         struct jsf_rng rng;
     550             : 
     551             : 
     552           1 :         jsf32_init(&rng, 1);
     553             : 
     554           1 :         j = 1;
     555           1 :         original.data[0] = gpl.data[0];
     556     5242880 :         for (i = 1; i < original.length; i++) {
     557     5242879 :                 size_t m;
     558     5242879 :                 char p = original.data[i - 1];
     559     5242879 :                 c = gpl.data[j];
     560     5242879 :                 original.data[i] = c;
     561     5242879 :                 j++;
     562     5242879 :                 m = (j + jsf32(&rng)) % (gpl.length - 50);
     563   147802506 :                 for (k = m; k < m + 30; k++) {
     564   143460910 :                         if (p == gpl.data[k] &&
     565     9104961 :                             c == gpl.data[k + 1]) {
     566      901283 :                                 j = k + 2;
     567      901283 :                                 break;
     568             :                         }
     569             :                 }
     570     5242879 :                 if (j == gpl.length) {
     571          62 :                         j = 1;
     572             :                 }
     573             :         }
     574             : 
     575           1 :         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/gpl", ref);
     576           1 :         assert_true(comp_size > 0);
     577           1 :         assert_true(comp_size < original.length);
     578             : 
     579           1 :         talloc_free(mem_ctx);
     580           1 : }
     581             : 
     582             : 
     583           2 : static void test_lzxpress_plain_long_random_graph_round_trip(void **state)
     584             : {
     585           2 :         size_t i;
     586           2 :         TALLOC_CTX *mem_ctx = talloc_new(NULL);
     587           2 :         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
     588           2 :         DATA_BLOB ref = {0};
     589             :         /*
     590             :          * There's a random trigram graph, with each pair of sequential bytes
     591             :          * pointing to a successor. This would probably fall into a fairly
     592             :          * simple loop, but we introduce damage into the system, randomly
     593             :          * flipping about 1 bit in 64.
     594             :          *
     595             :          * The result is semi-structured and compressible.
     596             :          */
     597           2 :         uint8_t *d = original.data;
     598           2 :         uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
     599           2 :         uint32_t *table32 = (void*)table;
     600           2 :         ssize_t comp_size;
     601           2 :         struct jsf_rng rng;
     602             : 
     603           2 :         jsf32_init(&rng, 1);
     604       32770 :         for (i = 0; i < (65536 / 4); i++) {
     605       32768 :                 table32[i] = jsf32(&rng);
     606             :         }
     607             : 
     608           2 :         d[0] = 'a';
     609           2 :         d[1] = 'b';
     610             : 
     611    10485758 :         for (i = 2; i < original.length; i++) {
     612    10485756 :                 uint16_t k = (d[i - 2] << 8) | d[i - 1];
     613    10485756 :                 uint32_t damage = jsf32(&rng) & jsf32(&rng) & jsf32(&rng);
     614    10485756 :                 damage &= (damage >> 16);
     615    10485756 :                 k ^= damage & 0xffff;
     616    10485756 :                 d[i] = table[k];
     617             :         }
     618             : 
     619           2 :         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-graph", ref);
     620           2 :         assert_true(comp_size > 0);
     621           2 :         assert_true(comp_size < original.length);
     622             : 
     623           2 :         talloc_free(mem_ctx);
     624           2 : }
     625             : 
     626             : 
     627           1 : static void test_lzxpress_plain_chaos_graph_round_trip(void **state)
     628             : {
     629           1 :         size_t i;
     630           1 :         TALLOC_CTX *mem_ctx = talloc_new(NULL);
     631           1 :         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
     632           1 :         DATA_BLOB ref = {0};
     633             :         /*
     634             :          * There's a random trigram graph, with each pair of sequential bytes
     635             :          * pointing to a successor. This would probably fall into a fairly
     636             :          * simple loop, but we keep changing the graph. The result is long
     637             :          * periods of stability separatd by bursts of noise.
     638             :          */
     639           1 :         uint8_t *d = original.data;
     640           1 :         uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
     641           1 :         uint32_t *table32 = (void*)table;
     642           1 :         ssize_t comp_size;
     643           1 :         struct jsf_rng rng;
     644             : 
     645           1 :         jsf32_init(&rng, 1);
     646       16385 :         for (i = 0; i < (65536 / 4); i++) {
     647       16384 :                 table32[i] = jsf32(&rng);
     648             :         }
     649             : 
     650           1 :         d[0] = 'a';
     651           1 :         d[1] = 'b';
     652             : 
     653     5242879 :         for (i = 2; i < original.length; i++) {
     654     5242878 :                 uint16_t k = (d[i - 2] << 8) | d[i - 1];
     655     5242878 :                 uint32_t damage = jsf32(&rng);
     656     5242878 :                 d[i] = table[k];
     657     5242878 :                 if ((damage >> 29) == 0) {
     658      655652 :                         uint16_t index = damage & 0xffff;
     659      655652 :                         uint8_t value = (damage >> 16) & 0xff;
     660      655652 :                         table[index] = value;
     661             :                 }
     662             :         }
     663             : 
     664           1 :         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/chaos-graph", ref);
     665           1 :         assert_true(comp_size > 0);
     666           1 :         assert_true(comp_size < original.length);
     667             : 
     668           1 :         talloc_free(mem_ctx);
     669           1 : }
     670             : 
     671             : 
     672           1 : static void test_lzxpress_plain_sparse_random_graph_round_trip(void **state)
     673             : {
     674           1 :         size_t i;
     675           1 :         TALLOC_CTX *mem_ctx = talloc_new(NULL);
     676           1 :         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, 5 * 1024 * 1024);
     677           1 :         DATA_BLOB ref = {0};
     678             :         /*
     679             :          * There's a random trigram graph, with each pair of sequential bytes
     680             :          * pointing to a successor. This will fall into a fairly simple loops,
     681             :          * but we introduce damage into the system, randomly mangling about 1
     682             :          * byte in 65536.
     683             :          *
     684             :          * The result has very long repetitive runs, which should lead to
     685             :          * oversized blocks.
     686             :          */
     687           1 :         uint8_t *d = original.data;
     688           1 :         uint8_t *table = talloc_array(mem_ctx, uint8_t, 65536);
     689           1 :         uint32_t *table32 = (void*)table;
     690           1 :         ssize_t comp_size;
     691           1 :         struct jsf_rng rng;
     692             : 
     693           1 :         jsf32_init(&rng, 3);
     694       16385 :         for (i = 0; i < (65536 / 4); i++) {
     695       16384 :                 table32[i] = jsf32(&rng);
     696             :         }
     697             : 
     698           1 :         d[0] = 'a';
     699           1 :         d[1] = 'b';
     700             : 
     701     5242879 :         for (i = 2; i < original.length; i++) {
     702     5242878 :                 uint16_t k = (d[i - 2] << 8) | d[i - 1];
     703     5242878 :                 uint32_t damage = jsf32(&rng);
     704     5242878 :                 if ((damage & 0xffff0000) == 0) {
     705          77 :                         k ^= damage & 0xffff;
     706             :                 }
     707     5242878 :                 d[i] = table[k];
     708             :         }
     709             : 
     710           1 :         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/sparse-random-graph", ref);
     711           1 :         assert_true(comp_size > 0);
     712           1 :         assert_true(comp_size < original.length);
     713             : 
     714           1 :         talloc_free(mem_ctx);
     715           1 : }
     716             : 
     717             : 
     718           1 : static void test_lzxpress_plain_random_noise_round_trip(void **state)
     719             : {
     720           1 :         size_t i;
     721           1 :         size_t len = 10 * 1024 * 1024;
     722           1 :         TALLOC_CTX *mem_ctx = talloc_new(NULL);
     723           1 :         DATA_BLOB original = data_blob_talloc(mem_ctx, NULL, len);
     724           1 :         DATA_BLOB ref = {0};
     725           1 :         ssize_t comp_size;
     726             :         /*
     727             :          * We are filling this up with incompressible noise, but we can assert
     728             :          * quite tight bounds on how badly it will fail to compress.
     729             :          *
     730             :          * There is one additional bit for each code, which says whether the
     731             :          * code is a literal byte or a match. If *all* codes are literal
     732             :          * bytes, the length should be 9/8 the original (with rounding
     733             :          * issues regarding the indicator bit blocks).
     734             :          *
     735             :          * If some matches are found the length will be a bit less. We would
     736             :          * expect one 3 byte match per 1 << 24 tries, but we try 8192 times
     737             :          * per position. That means there'll a match 1/2048 of the time at
     738             :          * best. 255 times out of 256 this will be exactly a 3 byte match,
     739             :          * encoded as two bytes, so we could get a 1 / 2048 saving on top of
     740             :          * the 1/8 cost. There'll be a smattering of longer matches too, and
     741             :          * the potential for complicated maths to account for those, but we'll
     742             :          * skimp on that by allowing for a 1/1500 saving.
     743             :          *
     744             :          * With the hash table, we take a shortcut in the "8192 tries", and
     745             :          * the size of the table makes a difference in how we perform, with 13
     746             :          * bits (8192 slots) naturally being luckier than 12. Ultimately,
     747             :          * either way, the compressed file is still 12.5% bigger than the
     748             :          * original.
     749             :          */
     750           1 :         size_t limit = len * 9 / 8 + 4;
     751             : 
     752           1 :         uint32_t *d32 = (uint32_t*)((void*)original.data);
     753           1 :         struct jsf_rng rng;
     754           1 :         jsf32_init(&rng, 2);
     755             : 
     756     2621441 :         for (i = 0; i < (len / 4); i++) {
     757     2621440 :                 d32[i] = jsf32(&rng);
     758             :         }
     759             : 
     760           1 :         comp_size = attempt_round_trip(mem_ctx, original, "/tmp/random-noise", ref);
     761             :         debug_message("original size %zu; compressed size %zd; ratio %.5f\n",
     762           1 :                       len, comp_size, ((double)comp_size) / len);
     763             :         debug_message("expected range %zu - %zu\n",
     764           1 :                       limit - limit / 1500, limit);
     765             : 
     766           1 :         assert_true(comp_size > 0);
     767           1 :         assert_true(comp_size < limit);
     768           1 :         assert_true(comp_size >= limit - limit / 1500);
     769           1 :         talloc_free(mem_ctx);
     770           1 : }
     771             : 
     772             : 
     773             : /* Tests based on [MS-XCA] 3.1 Examples */
     774           1 : static void test_msft_data1(void **state)
     775             : {
     776           1 :         TALLOC_CTX *tmp_ctx = talloc_new(NULL);
     777             : 
     778           1 :         const char *fixed_data = "abcdefghijklmnopqrstuvwxyz";
     779           1 :         const uint8_t fixed_out[] = {
     780             :                 0x3f, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64,
     781             :                 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
     782             :                 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
     783             :                 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a };
     784             : 
     785           1 :         ssize_t c_size;
     786           1 :         uint8_t *out, *out2;
     787             : 
     788           1 :         out  = talloc_size(tmp_ctx, 2048);
     789           1 :         memset(out, 0x42, talloc_get_size(out));
     790             : 
     791           1 :         c_size = lzxpress_compress((const uint8_t *)fixed_data,
     792             :                                    strlen(fixed_data),
     793             :                                    out,
     794           1 :                                    talloc_get_size(out));
     795           1 :         assert_int_not_equal(c_size, -1);
     796           1 :         assert_int_equal(c_size, sizeof(fixed_out));
     797           1 :         assert_memory_equal(out, fixed_out, c_size);
     798           1 :         out2  = talloc_size(tmp_ctx, strlen(fixed_data));
     799           1 :         c_size = lzxpress_decompress(out,
     800             :                                      sizeof(fixed_out),
     801             :                                      out2,
     802           1 :                                      talloc_get_size(out2));
     803           1 :         assert_int_not_equal(c_size, -1);
     804           1 :         assert_int_equal(c_size, strlen(fixed_data));
     805           1 :         assert_memory_equal(out2, fixed_data, c_size);
     806             : 
     807           1 :         talloc_free(tmp_ctx);
     808           1 : }
     809             : 
     810             : 
     811           1 : static void test_msft_data2(void **state)
     812             : {
     813           1 :         TALLOC_CTX *tmp_ctx = talloc_new(NULL);
     814             : 
     815           1 :         const char *fixed_data =
     816             :                 "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
     817             :                 "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
     818             :                 "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
     819             :                 "abcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabcabc"
     820             :                 "abcabcabcabcabcabcabcabc";
     821           1 :         const uint8_t fixed_out[] = {
     822             :                 0xff, 0xff, 0xff, 0x1f, 0x61, 0x62, 0x63, 0x17,
     823             :                 0x00, 0x0f, 0xff, 0x26, 0x01};
     824             : 
     825           1 :         ssize_t c_size;
     826           1 :         uint8_t *out, *out2;
     827             : 
     828           1 :         out  = talloc_size(tmp_ctx, 2048);
     829           1 :         memset(out, 0x42, talloc_get_size(out));
     830             : 
     831           1 :         c_size = lzxpress_compress((const uint8_t *)fixed_data,
     832             :                                    strlen(fixed_data),
     833             :                                    out,
     834           1 :                                    talloc_get_size(out));
     835           1 :         assert_int_not_equal(c_size, -1);
     836           1 :         assert_int_equal(c_size, sizeof(fixed_out));
     837           1 :         assert_memory_equal(out, fixed_out, c_size);
     838             : 
     839           1 :         out2  = talloc_size(tmp_ctx, strlen(fixed_data));
     840           1 :         c_size = lzxpress_decompress(out,
     841             :                                      sizeof(fixed_out),
     842             :                                      out2,
     843           1 :                                      talloc_get_size(out2));
     844             : 
     845           1 :         assert_int_not_equal(c_size, -1);
     846           1 :         assert_int_equal(c_size, strlen(fixed_data));
     847           1 :         assert_memory_equal(out2, fixed_data, c_size);
     848             : 
     849           1 :         talloc_free(tmp_ctx);
     850           1 : }
     851             : 
     852             : /*
     853             :   test lzxpress
     854             :  */
     855           1 : static void test_lzxpress(void **state)
     856             : {
     857           1 :         TALLOC_CTX *tmp_ctx = talloc_new(NULL);
     858           1 :         const char *fixed_data = "this is a test. and this is a test too";
     859           1 :         const uint8_t fixed_out[] = {
     860             :                 0xff, 0x21, 0x00, 0x04, 0x74, 0x68, 0x69, 0x73,
     861             :                 0x20, 0x10, 0x00, 0x61, 0x20, 0x74, 0x65, 0x73,
     862             :                 0x74, 0x2E, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x9F,
     863             :                 0x00, 0x04, 0x20, 0x74, 0x6F, 0x6F };
     864             : 
     865           1 :         const uint8_t fixed_out_old_version[] = {
     866             :                 0x00, 0x20, 0x00, 0x04, 0x74, 0x68, 0x69, 0x73,
     867             :                 0x20, 0x10, 0x00, 0x61, 0x20, 0x74, 0x65, 0x73,
     868             :                 0x74, 0x2E, 0x20, 0x61, 0x6E, 0x64, 0x20, 0x9F,
     869             :                 0x00, 0x04, 0x20, 0x74, 0x6F, 0x6F, 0x00, 0x00,
     870             :                 0x00, 0x00 };
     871             : 
     872           1 :         ssize_t c_size;
     873           1 :         uint8_t *out, *out2, *out3;
     874             : 
     875           1 :         out  = talloc_size(tmp_ctx, 2048);
     876           1 :         memset(out, 0x42, talloc_get_size(out));
     877             : 
     878           1 :         c_size = lzxpress_compress((const uint8_t *)fixed_data,
     879             :                                    strlen(fixed_data),
     880             :                                    out,
     881           1 :                                    talloc_get_size(out));
     882             : 
     883           1 :         assert_int_not_equal(c_size, -1);
     884           1 :         assert_int_equal(c_size, sizeof(fixed_out));
     885           1 :         assert_memory_equal(out, fixed_out, c_size);
     886             : 
     887           1 :         out2  = talloc_size(tmp_ctx, strlen(fixed_data));
     888           1 :         c_size = lzxpress_decompress(out,
     889             :                                      sizeof(fixed_out),
     890             :                                      out2,
     891           1 :                                      talloc_get_size(out2));
     892             : 
     893           1 :         assert_int_not_equal(c_size, -1);
     894           1 :         assert_int_equal(c_size, strlen(fixed_data));
     895           1 :         assert_memory_equal(out2, fixed_data, c_size);
     896             : 
     897           1 :         out3  = talloc_size(tmp_ctx, strlen(fixed_data));
     898           1 :         c_size = lzxpress_decompress(fixed_out_old_version,
     899             :                                      sizeof(fixed_out_old_version),
     900             :                                      out3,
     901           1 :                                      talloc_get_size(out3));
     902             : 
     903           1 :         assert_int_not_equal(c_size, -1);
     904           1 :         assert_int_equal(c_size, strlen(fixed_data));
     905           1 :         assert_memory_equal(out3, fixed_data, c_size);
     906             : 
     907           1 :         talloc_free(tmp_ctx);
     908           1 : }
     909             : 
     910           1 : static void test_lzxpress2(void **state)
     911             : {
     912             :         /*
     913             :          * Use two matches, separated by a literal, and each with a length
     914             :          * greater than 10, to test the use of nibble_index. Both length values
     915             :          * (less ten) should be stored as adjacent nibbles to form the 0x21
     916             :          * byte.
     917             :          */
     918             : 
     919           1 :         TALLOC_CTX *tmp_ctx = talloc_new(NULL);
     920           1 :         const char *fixed_data = "aaaaaaaaaaaabaaaaaaaaaaaa";
     921           1 :         const uint8_t fixed_out[] = {
     922             :                 0xff, 0xff, 0xff, 0x5f, 0x61, 0x07, 0x00, 0x21,
     923             :                 0x62, 0x67, 0x00};
     924             : 
     925           1 :         ssize_t c_size;
     926           1 :         uint8_t *out, *out2;
     927             : 
     928           1 :         out  = talloc_size(tmp_ctx, 2048);
     929           1 :         memset(out, 0x42, talloc_get_size(out));
     930             : 
     931           1 :         c_size = lzxpress_compress((const uint8_t *)fixed_data,
     932             :                                    strlen(fixed_data),
     933             :                                    out,
     934           1 :                                    talloc_get_size(out));
     935             : 
     936           1 :         assert_int_not_equal(c_size, -1);
     937           1 :         assert_int_equal(c_size, sizeof(fixed_out));
     938           1 :         assert_memory_equal(out, fixed_out, c_size);
     939             : 
     940           1 :         out2  = talloc_size(tmp_ctx, strlen(fixed_data));
     941           1 :         c_size = lzxpress_decompress(out,
     942             :                                      sizeof(fixed_out),
     943             :                                      out2,
     944           1 :                                      talloc_get_size(out2));
     945             : 
     946           1 :         assert_int_not_equal(c_size, -1);
     947           1 :         assert_int_equal(c_size, strlen(fixed_data));
     948           1 :         assert_memory_equal(out2, fixed_data, c_size);
     949             : 
     950           1 :         talloc_free(tmp_ctx);
     951           1 : }
     952             : 
     953           1 : static void test_lzxpress3(void **state)
     954             : {
     955             :         /*
     956             :          * Use a series of 31 literals, followed by a single minimum-length
     957             :          * match (and a terminating literal), to test setting indic_pos when the
     958             :          * 32-bit flags value overflows after a match.
     959             :          */
     960             : 
     961           1 :         TALLOC_CTX *tmp_ctx = talloc_new(NULL);
     962           1 :         const char *fixed_data = "abcdefghijklmnopqrstuvwxyz01234abca";
     963           1 :         const uint8_t fixed_out[] = {
     964             :                 0x01, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64,
     965             :                 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
     966             :                 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
     967             :                 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31,
     968             :                 0x32, 0x33, 0x34, 0xf0, 0x00, 0xff, 0xff, 0xff,
     969             :                 0x7f, 0x61};
     970             : 
     971           1 :         ssize_t c_size;
     972           1 :         uint8_t *out, *out2;
     973             : 
     974           1 :         out  = talloc_size(tmp_ctx, 2048);
     975           1 :         memset(out, 0x42, talloc_get_size(out));
     976             : 
     977           1 :         c_size = lzxpress_compress((const uint8_t *)fixed_data,
     978             :                                    strlen(fixed_data),
     979             :                                    out,
     980           1 :                                    talloc_get_size(out));
     981             : 
     982           1 :         assert_int_not_equal(c_size, -1);
     983           1 :         assert_int_equal(c_size, sizeof(fixed_out));
     984           1 :         assert_memory_equal(out, fixed_out, c_size);
     985             : 
     986           1 :         out2  = talloc_size(tmp_ctx, strlen(fixed_data));
     987           1 :         c_size = lzxpress_decompress(out,
     988             :                                      sizeof(fixed_out),
     989             :                                      out2,
     990           1 :                                      talloc_get_size(out2));
     991             : 
     992           1 :         assert_int_not_equal(c_size, -1);
     993           1 :         assert_int_equal(c_size, strlen(fixed_data));
     994           1 :         assert_memory_equal(out2, fixed_data, c_size);
     995             : 
     996           1 :         talloc_free(tmp_ctx);
     997           1 : }
     998             : 
     999           1 : static void test_lzxpress4(void **state)
    1000             : {
    1001             :         /*
    1002             :          * Use a series of 31 literals, followed by a single minimum-length
    1003             :          * match, to test that the final set of 32-bit flags is written
    1004             :          * correctly when it is empty.
    1005             :          */
    1006             : 
    1007           1 :         TALLOC_CTX *tmp_ctx = talloc_new(NULL);
    1008           1 :         const char *fixed_data = "abcdefghijklmnopqrstuvwxyz01234abc";
    1009           1 :         const uint8_t fixed_out[] = {
    1010             :                 0x01, 0x00, 0x00, 0x00, 0x61, 0x62, 0x63, 0x64,
    1011             :                 0x65, 0x66, 0x67, 0x68, 0x69, 0x6a, 0x6b, 0x6c,
    1012             :                 0x6d, 0x6e, 0x6f, 0x70, 0x71, 0x72, 0x73, 0x74,
    1013             :                 0x75, 0x76, 0x77, 0x78, 0x79, 0x7a, 0x30, 0x31,
    1014             :                 0x32, 0x33, 0x34, 0xf0, 0x00, 0xff, 0xff, 0xff,
    1015             :                 0xff};
    1016             : 
    1017           1 :         ssize_t c_size;
    1018           1 :         uint8_t *out, *out2;
    1019             : 
    1020           1 :         out  = talloc_size(tmp_ctx, 2048);
    1021           1 :         memset(out, 0x42, talloc_get_size(out));
    1022             : 
    1023           1 :         c_size = lzxpress_compress((const uint8_t *)fixed_data,
    1024             :                                    strlen(fixed_data),
    1025             :                                    out,
    1026           1 :                                    talloc_get_size(out));
    1027             : 
    1028           1 :         assert_int_not_equal(c_size, -1);
    1029           1 :         assert_int_equal(c_size, sizeof(fixed_out));
    1030           1 :         assert_memory_equal(out, fixed_out, c_size);
    1031             : 
    1032           1 :         out2  = talloc_size(tmp_ctx, strlen(fixed_data));
    1033           1 :         c_size = lzxpress_decompress(out,
    1034             :                                      sizeof(fixed_out),
    1035             :                                      out2,
    1036           1 :                                      talloc_get_size(out2));
    1037             : 
    1038           1 :         assert_int_not_equal(c_size, -1);
    1039           1 :         assert_int_equal(c_size, strlen(fixed_data));
    1040           1 :         assert_memory_equal(out2, fixed_data, c_size);
    1041             : 
    1042           1 :         talloc_free(tmp_ctx);
    1043           1 : }
    1044             : 
    1045             : 
    1046           1 : static void test_lzxpress_many_zeros(void **state)
    1047             : {
    1048             :         /*
    1049             :          * Repeated values (zero is convenient but not special) will lead to
    1050             :          * very long substring searches in compression, which can be very slow
    1051             :          * if we're not careful.
    1052             :          *
    1053             :          * This test makes a very loose assertion about how long it should
    1054             :          * take to compress a million zeros.
    1055             :          *
    1056             :          * Wall clock time *should* be < 0.1 seconds with the fix and around a
    1057             :          * minute without it. We try for CLOCK_THREAD_CPUTIME_ID which should
    1058             :          * filter out some noise on the machine, and set the threshold at 5
    1059             :          * seconds.
    1060             :          */
    1061             : 
    1062           1 :         TALLOC_CTX *tmp_ctx = talloc_new(NULL);
    1063           1 :         const size_t N_ZEROS = 1000000;
    1064           1 :         const uint8_t *zeros = talloc_zero_size(tmp_ctx, N_ZEROS);
    1065           1 :         const ssize_t expected_c_size_max = 120;
    1066           1 :         const ssize_t expected_c_size_min = 93;
    1067           1 :         ssize_t c_size;
    1068           1 :         uint8_t *comp, *decomp;
    1069           1 :         static struct timespec t_start, t_end;
    1070           1 :         uint64_t elapsed_ns;
    1071             : 
    1072           1 :         if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_start) != 0) {
    1073           0 :                 if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_start) != 0) {
    1074           0 :                         clock_gettime(CLOCK_REALTIME, &t_start);
    1075             :                 }
    1076             :         }
    1077             : 
    1078           1 :         comp = talloc_zero_size(tmp_ctx, 2048);
    1079             : 
    1080           1 :         c_size = lzxpress_compress(zeros,
    1081             :                                    N_ZEROS,
    1082             :                                    comp,
    1083           1 :                                    talloc_get_size(comp));
    1084             :         /*
    1085             :          * Because our compression depends on heuristics, we don't insist on
    1086             :          * an exact size in this case.
    1087             :          */
    1088             : 
    1089           1 :         assert_true(c_size <= expected_c_size_max);
    1090           1 :         assert_true(c_size >= expected_c_size_min);
    1091             : 
    1092           1 :         decomp = talloc_size(tmp_ctx, N_ZEROS * 2);
    1093           1 :         c_size = lzxpress_decompress(comp,
    1094             :                                      c_size,
    1095             :                                      decomp,
    1096             :                                      N_ZEROS * 2);
    1097             : 
    1098           1 :         if (clock_gettime(CLOCK_THREAD_CPUTIME_ID, &t_end) != 0) {
    1099           0 :                 if (clock_gettime(CUSTOM_CLOCK_MONOTONIC, &t_end) != 0) {
    1100           0 :                         clock_gettime(CLOCK_REALTIME, &t_end);
    1101             :                 }
    1102             :         }
    1103           1 :         elapsed_ns = (
    1104           1 :                 (t_end.tv_sec - t_start.tv_sec) * 1000U * 1000U * 1000U) +
    1105           1 :                 (t_end.tv_nsec - t_start.tv_nsec);
    1106           1 :         print_message("round-trip time: %"PRIu64" ns\n", elapsed_ns);
    1107           1 :         assert_true(elapsed_ns < 3 * 1000U * 1000U * 1000U);
    1108           1 :         assert_memory_equal(decomp, zeros, N_ZEROS);
    1109             : 
    1110           1 :         talloc_free(tmp_ctx);
    1111           1 : }
    1112             : 
    1113             : 
    1114           1 : static void test_lzxpress_round_trip(void **state)
    1115             : {
    1116             :         /*
    1117             :          * Examples found using via fuzzing.
    1118             :          */
    1119           1 :         TALLOC_CTX *tmp_ctx = talloc_new(NULL);
    1120           1 :         size_t i;
    1121           1 :         struct b64_pair {
    1122             :                 const char *uncompressed;
    1123             :                 const char *compressed;
    1124           1 :         } pairs[] = {
    1125             :                 {   /* this results in a trailing flags block */
    1126             :                         "AAICAmq/EKdP785YU2Ddh7d4vUtdlQyLeHV09LHpUBw=",
    1127             :                         "AAAAAAACAgJqvxCnT+/OWFNg3Ye3eL1LXZUMi3h1dPSx6VAc/////w==",
    1128             :                 },
    1129             :                 {    /* empty string compresses to empty string */
    1130             :                         "",  ""
    1131             :                 },
    1132             :         };
    1133           1 :         const size_t alloc_size = 1000;
    1134           1 :         uint8_t *data = talloc_array(tmp_ctx, uint8_t, alloc_size);
    1135             : 
    1136           4 :         for (i = 0; i < ARRAY_SIZE(pairs); i++) {
    1137           2 :                 ssize_t len;
    1138           2 :                 DATA_BLOB uncomp = base64_decode_data_blob_talloc(
    1139             :                         tmp_ctx,
    1140             :                         pairs[i].uncompressed);
    1141           2 :                 DATA_BLOB comp = base64_decode_data_blob_talloc(
    1142             :                         tmp_ctx,
    1143             :                         pairs[i].compressed);
    1144             : 
    1145           2 :                 len = lzxpress_compress(uncomp.data,
    1146             :                                         uncomp.length,
    1147             :                                         data,
    1148             :                                         alloc_size);
    1149             : 
    1150           2 :                 assert_int_not_equal(len, -1);
    1151           2 :                 assert_int_equal(len, comp.length);
    1152             : 
    1153           2 :                 assert_memory_equal(comp.data, data, len);
    1154             : 
    1155           2 :                 len = lzxpress_decompress(comp.data,
    1156             :                                           comp.length,
    1157             :                                           data,
    1158             :                                           alloc_size);
    1159             : 
    1160           2 :                 assert_int_not_equal(len, -1);
    1161           2 :                 assert_int_equal(len, uncomp.length);
    1162             : 
    1163           2 :                 assert_memory_equal(uncomp.data, data, len);
    1164             :         }
    1165           1 :         talloc_free(tmp_ctx);
    1166           1 : }
    1167             : 
    1168             : 
    1169           1 : int main(void)
    1170             : {
    1171           1 :         const struct CMUnitTest tests[] = {
    1172             :                 cmocka_unit_test(test_lzxpress_plain_decompress_files),
    1173             :                 cmocka_unit_test(test_lzxpress_plain_decompress_more_compressed_files),
    1174             :                 cmocka_unit_test(test_lzxpress_plain_round_trip_files),
    1175             :                 cmocka_unit_test(test_lzxpress_plain_long_gpl_round_trip),
    1176             :                 cmocka_unit_test(test_lzxpress_plain_long_random_graph_round_trip),
    1177             :                 cmocka_unit_test(test_lzxpress_plain_chaos_graph_round_trip),
    1178             :                 cmocka_unit_test(test_lzxpress_plain_sparse_random_graph_round_trip),
    1179             :                 cmocka_unit_test(test_lzxpress_plain_long_random_graph_round_trip),
    1180             :                 cmocka_unit_test(test_lzxpress_plain_random_noise_round_trip),
    1181             :                 cmocka_unit_test(test_lzxpress),
    1182             :                 cmocka_unit_test(test_msft_data1),
    1183             :                 cmocka_unit_test(test_msft_data2),
    1184             :                 cmocka_unit_test(test_lzxpress2),
    1185             :                 cmocka_unit_test(test_lzxpress3),
    1186             :                 cmocka_unit_test(test_lzxpress4),
    1187             :                 cmocka_unit_test(test_lzxpress_many_zeros),
    1188             :                 cmocka_unit_test(test_lzxpress_round_trip),
    1189             :         };
    1190           1 :         if (!isatty(1)) {
    1191           1 :                 cmocka_set_message_output(CM_OUTPUT_SUBUNIT);
    1192             :         }
    1193           1 :         return cmocka_run_group_tests(tests, NULL, NULL);
    1194             : }

Generated by: LCOV version 1.14