Backed out changeset 96d954eae2a0 (bug 1949947) requested by glandium.

This commit is contained in:
Goloman Adrian
2025-03-12 02:25:38 +02:00
parent 687f08a875
commit 5b89d7c1ef
29 changed files with 725 additions and 741 deletions

8
Cargo.lock generated
View File

@@ -3576,9 +3576,9 @@ dependencies = [
[[package]]
name = "libz-rs-sys"
version = "0.4.2"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "902bc563b5d65ad9bba616b490842ef0651066a1a1dc3ce1087113ffcb873c8d"
checksum = "a90e19106f1b2c93f1fa6cdeec2e56facbf2e403559c1e1c0ddcc6d46e979cdf"
dependencies = [
"zlib-rs",
]
@@ -7855,6 +7855,6 @@ dependencies = [
[[package]]
name = "zlib-rs"
version = "0.4.2"
version = "0.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05"
checksum = "aada01553a9312bad4b9569035a1f12b05e5ec9770a1a4b323757356928944f8"

View File

@@ -3093,8 +3093,8 @@ delta = "0.28.0 -> 0.31.0"
[[audits.libz-rs-sys]]
who = "Mike Hommey <mh+mozilla@glandium.org>"
criteria = "safe-to-deploy"
delta = "0.4.1 -> 0.4.2"
notes = "Only documentation changes."
delta = "0.2.1 -> 0.2.1@git:4aa430ccb77537d0d60dab8db993ca51bb1194c5"
importable = false
[[audits.linked-hash-map]]
who = "Aria Beingessner <a.beingessner@gmail.com>"

View File

@@ -1940,11 +1940,6 @@ who = "Ameer Ghani <inahga@divviup.org>"
criteria = "safe-to-deploy"
delta = "0.4.0 -> 0.4.1"
[[audits.isrg.audits.zlib-rs]]
who = "Ameer Ghani <inahga@divviup.org>"
criteria = "safe-to-deploy"
delta = "0.4.1 -> 0.4.2"
[[audits.mozilla.wildcard-audits.weedle2]]
who = "Jan-Erik Rediger <jrediger@mozilla.com>"
criteria = "safe-to-deploy"

View File

@@ -1 +1 @@
{"files":{"Cargo.lock":"4e51d6269a51ea8732fd452060a11235871dc8209a06f711e8515517b0ea47ad","Cargo.toml":"50c7e977783911c7b8c09b75c6a2d129c3c5be709dbfb78aefdad22cef6849ce","LICENSE":"e72111c52b7d96ebe25348dee19f0744f444d3c95ae6b1ecb6ccaecc5bce05ba","README.md":"e0a044d7f02db9dfe9b7462ccf527207c591d8ac226d6465e125e98b88111d9b","src/lib.rs":"1c85cd2696e769762c7c37ca7dff8109fb7491f10b838bb5e837bce39b8c9aa3"},"package":"902bc563b5d65ad9bba616b490842ef0651066a1a1dc3ce1087113ffcb873c8d"}
{"files":{"Cargo.toml":"33f49be9129ddbe0c32b7bf627a67b7e1e239cfd7aa9e1c9414844f68e7afbc7","LICENSE":"e72111c52b7d96ebe25348dee19f0744f444d3c95ae6b1ecb6ccaecc5bce05ba","README.md":"46f48b56018d0efef5738be7d930019631899dede51ee5e92f44bd53f6e26749","src/lib.rs":"eb21c2e4d653c6f4a781a81492ebc4483175c4609078d534e7f15e6e0e095a56"},"package":"a90e19106f1b2c93f1fa6cdeec2e56facbf2e403559c1e1c0ddcc6d46e979cdf"}

View File

@@ -1,16 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "libz-rs-sys"
version = "0.4.2"
dependencies = [
"zlib-rs",
]
[[package]]
name = "zlib-rs"
version = "0.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05"

View File

@@ -13,7 +13,7 @@
edition = "2021"
rust-version = "1.75"
name = "libz-rs-sys"
version = "0.4.2"
version = "0.4.1"
build = false
publish = true
autolib = false
@@ -32,7 +32,7 @@ name = "libz_rs_sys"
path = "src/lib.rs"
[dependencies.zlib-rs]
version = "0.4.2"
version = "0.4.1"
default-features = false
[features]

View File

@@ -3,7 +3,7 @@ This crate is a C API for [zlib-rs](https://docs.rs/zlib-rs/latest/zlib_rs/). Th
From a rust perspective, this API is not very ergonomic. Use the [`flate2`](https://crates.io/crates/flate2) crate for a more
ergonomic rust interface to zlib.
## Features
# Features
**`custom-prefix`**
@@ -29,13 +29,13 @@ Pick the default allocator implementation that is used if no `zalloc` and `zfree
- `c-allocator`: use `malloc`/`free` for the implementation of `zalloc` and `zfree`
- `rust-allocator`: the rust global allocator for the implementation of `zalloc` and `zfree`
The `rust-allocator` is the default when this crate is used as a rust dependency, and slightly more efficient because alignment is handled by the allocator. When building a dynamic library, it may make sense to use `c-allocator` instead.
The `rust-allocator` is the default when this crate is used as a rust dependency, and slightly more efficient because alignment is handled by the allocator. When building a dynamic library, it may make sense to use `c-allocator` instead.
**`std`**
Assume that `std` is available. When this feature is turned off, this crate is compatible with `#![no_std]`.
## Example
# Example
This example compresses ("deflates") the string `"Hello, World!"` and then decompresses
("inflates") it again.
@@ -88,39 +88,3 @@ let inflated = &output[..strm.total_out as usize];
assert_eq!(inflated, input.as_bytes())
```
## Compression Levels
The zlib library supports compression levels 0 up to and including 9. The level indicates a tradeoff between time spent on the compression versus the compression ratio, the factor by which the input is reduced in size:
- level 0: no compression at all
- level 1: fastest compression
- level 6: default (a good tradeoff between speed and compression ratio)
- level 9: best compression
Beyond this intuition, the exact behavior of the compression levels is not specified. The implementation of `zlib-rs` follows the implementation of [`zlig-ng`](https://github.com/zlib-ng/zlib-ng), and deviates from the one in stock zlib.
In particular, our compression level 1 is extremely fast, but also just does not compress that well. On the `silesia-small.tar` input file, we see these output sizes:
| implementation | compression level | output size (mb) |
| --- | --- | --- |
| - | 0 | `15.74` |
| stock | 1 | ` 7.05` |
| rs | 1 | ` 8.52` |
| rs | 2 | ` 6.90` |
| rs | 4 | ` 6.55` |
But, `zlib-rs` is much faster than stock zlib. In our benchmarks, it is only at level 4 that we spend roughly as much time as stock zlib on level 1:
| implementation | compression level | wall time (ms) |
| --- | --- | --- |
| stock | 1 | 185 |
| rs | 2 | 139 |
| rs | 4 | 181 |
In our example, the main options are:
- level 1: worse compression, but much faster
- level 2: equivalent compression, but significantly faster
- level 4: better compression, at the same speed
In summary, when you upgrade from stock zlib, we recommend that you benchmark on your data and target platform, and pick the right compression level for your use case.

View File

@@ -712,7 +712,7 @@ pub unsafe extern "C-unwind" fn inflateReset(strm: *mut z_stream) -> i32 {
///
/// - [`Z_OK`] if success
/// - [`Z_STREAM_ERROR`] if the source stream state was inconsistent, or if the `windowBits`
/// parameter is invalid
/// parameter is invalid
///
/// # Safety
///
@@ -788,8 +788,8 @@ pub unsafe extern "C-unwind" fn inflateSetDictionary(
/// - The `text`, `time`, `xflags`, and `os` fields are filled in with the gzip header contents.
/// - `hcrc` is set to true if there is a header CRC. (The header CRC was valid if done is set to one.)
/// - If `extra` is not `NULL`, then `extra_max` contains the maximum number of bytes to write to extra.
/// Once `done` is `true`, `extra_len` contains the actual extra field length,
/// and `extra` contains the extra field, or that field truncated if `extra_max` is less than `extra_len`.
/// Once `done` is `true`, `extra_len` contains the actual extra field length,
/// and `extra` contains the extra field, or that field truncated if `extra_max` is less than `extra_len`.
/// - If `name` is not `NULL`, then up to `name_max` characters are written there, terminated with a zero unless the length is greater than `name_max`.
/// - If `comment` is not `NULL`, then up to `comm_max` characters are written there, terminated with a zero unless the length is greater than `comm_max`.
///

View File

@@ -1 +1 @@
{"files":{"Cargo.lock":"8f77aba472fab186eed3ed07f3d7c0160536107dc86ea521f02917ecf930afab","Cargo.toml":"4a7d43e17e2e2f35c327c37921ec615e35849896c8b44e6a28ff759b33888380","LICENSE":"e72111c52b7d96ebe25348dee19f0744f444d3c95ae6b1ecb6ccaecc5bce05ba","README.md":"9938581c82330440be5f3b6b9125cc02c0874b250dc62093f167bf2158dbe29a","src/adler32.rs":"5077a887f8ed1b54ce436f4c50765cfb2773bde20b0ea21ca938dd6faf986fb7","src/adler32/avx2.rs":"b484a541efe367774248fb0a562ea9a8b788b48e721d68873b91e1d135f9af6b","src/adler32/generic.rs":"4ee8d80f0fdfae891e036592ca86a0ecc0817c1ece7e9057bab5f2d17669657a","src/adler32/neon.rs":"46efef19fce0fa81dc4015508781f7e686203251723c169fb71d92c07cfa58ac","src/adler32/wasm.rs":"235628f7ad37316ddfa6a62e2811ce5f90e2673247a2df46d64ad04708a548c7","src/allocate.rs":"398f6d622444cede8c828b56f0e60f1d4b97b5da258cdf6980123c3c0b6589d6","src/c_api.rs":"8328f52b477beecfc7f68240c40e775516cb773dad8c46cf24aad178c4ea0f6c","src/cpu_features.rs":"b9c7df9afd1e09787c182594ed9ce995e0cd449ec6051e3ba2d11f35574f0c78","src/crc32.rs":"8a67c0f5eee767399344a93f2a6f314127a8b6d8fd5b332cc51d42c7995ddeaf","src/crc32/acle.rs":"de881ff90d938f9ff38bfaa3f51c74ca75f9d5152a0427e145c28fe27bd40209","src/crc32/braid.rs":"e1f25477829f43b46529aded6b52c2094990d6c2dbe8d6ddcf907e80fc1ae0e0","src/crc32/combine.rs":"a1aded8c5f1886f60daad9765886299c65feb5240f24f8e9f67ebced14e267f0","src/crc32/pclmulqdq.rs":"70e4fa9d408ca804f496403d87f56e51a6a58c808750e90854aaa03af2143bc2","src/deflate.rs":"60f14c532a3dbe0cc7fca4926a5cb680011d7ad37edf6b98ef4e8b8dd247d271","src/deflate/algorithm/fast.rs":"7009bda9f552d1b158cd030a9f1db015bbde807f31808709533df5cdcb19f47c","src/deflate/algorithm/huff.rs":"2ed0a098571d4e056bb4e1d8655ec8d37e6d291ba3e2d5d7c581c2486e6abbce","src/deflate/algorithm/medium.rs":"88fd17246a8f4e9f98f3868fe4d8081894d4f5942de89a689c399976b0e25c43","src/deflate/algorithm/mod.rs":"184151cde5952a4ff0029c6647705be5f884d558bd8552a3d29f9c7a16598c93","src/deflate/algorithm/quick.rs":"7e59bcfb9994ac9c89d34781f2b175135c134fab9595c4fe00e3cb5fa7170cac","src/deflate/algorithm/rle.rs":"549427a5a8a69610afd612f89a9cbde97fe78c38c85442083b5dde10e8be4d73","src/deflate/algorithm/slow.rs":"3da90214d031f178f9bc896abadcbb67ad4c790d6fa0560f4bd63121f439e659","src/deflate/algorithm/stored.rs":"40c60591307409a5ea5f8e8505fbae70ca4a69a258b151c829db9bc5ffe02e04","src/deflate/compare256.rs":"3398a810b1cf86114523329e357684dc9cbd9f5793783d101d5994206504f2d8","src/deflate/hash_calc.rs":"9a148125444f822c80a25e8215d406b77000799ced223e229e86b0c96c69cf8b","src/deflate/longest_match.rs":"53edbca5db81df33cc0aa25527c7f24e0a32b0c09d0c1d131cb9cc492e8b28e7","src/deflate/pending.rs":"0f476c2d43429c864ce8545e65137af57369f907d9f884a66b166e40d7dbc510","src/deflate/slide_hash.rs":"6069f7d02259a6defafa67c6397b0388bf6220fec3b7d15b1deb63bce4288de5","src/deflate/test-data/inflate_buf_error.dat":"254f280f8f1e8914bd12d8bcd3813e5c08083b1b46d8d643c8e2ebe109e75cb8","src/deflate/test-data/paper-100k.pdf":"60f73a051b7ca35bfec44734b2eed7736cb5c0b7f728beb7b97ade6c5e44849b","src/deflate/trees_tbl.rs":"cbb897a638b6fa7bd007b8394dbaa5ac52c6d935e21e9593d66e8c348b6e44c7","src/deflate/window.rs":"c7f1ec7e0c3ffe38608b35e839e198cd6f3676ecb08a741b1a5265236eceb7b1","src/inflate.rs":"ceabfa271c2969a2635caa25a030f51c64e0d41e26f8351e29cd92cae8e3a694","src/inflate/bitreader.rs":"cac96b20be765bd1645219145f6a1dacdff6f34b9f0c3bb66cdae6d1a9aa574d","src/inflate/inffixed_tbl.rs":"eb1ed1927ca07b61fe30ae8461ce62e7da28c595416e687a26db57c8eac8f4a1","src/inflate/inftrees.rs":"44efb568c9cc2dbbc6c51e50f3cc38d6c8e896b93936f47b3879396fc814abfe","src/inflate/window.rs":"f0b65ef776685c64a03b75491e9f60ecad3279eac01f549a3e859023102000d2","src/inflate/writer.rs":"4b3260eec9e8d726fc3b7107ea4d96a0c11dfb2ccea5621b87629c7130527d54","src/lib.rs":"d47f73e77dcfeb1f1a8063def826a82f491f0cbe06914aa89bf30db06acf2a00","src/read_buf.rs":"e9724f21763589cfd3e2c3bca6b6b1cbf5b3bec9445e19eb7947a1fb2a2064ff","src/weak_slice.rs":"1a2075ba9bbd7c3305282c17b7467c66c5c0c464be3346fb2d25b2c73c62792c"},"package":"8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05"}
{"files":{"Cargo.toml":"bead245e487afcf0b64a5d633761ecb8121feefaee2054e5b09f39d8e5457784","LICENSE":"e72111c52b7d96ebe25348dee19f0744f444d3c95ae6b1ecb6ccaecc5bce05ba","README.md":"9938581c82330440be5f3b6b9125cc02c0874b250dc62093f167bf2158dbe29a","src/adler32.rs":"5077a887f8ed1b54ce436f4c50765cfb2773bde20b0ea21ca938dd6faf986fb7","src/adler32/avx2.rs":"b484a541efe367774248fb0a562ea9a8b788b48e721d68873b91e1d135f9af6b","src/adler32/generic.rs":"4ee8d80f0fdfae891e036592ca86a0ecc0817c1ece7e9057bab5f2d17669657a","src/adler32/neon.rs":"46efef19fce0fa81dc4015508781f7e686203251723c169fb71d92c07cfa58ac","src/adler32/wasm.rs":"235628f7ad37316ddfa6a62e2811ce5f90e2673247a2df46d64ad04708a548c7","src/allocate.rs":"d5afe99477b7cb22b5fbbd59860e91dcfbdc3a1b0f622bb98876d7eba994ac3a","src/c_api.rs":"8328f52b477beecfc7f68240c40e775516cb773dad8c46cf24aad178c4ea0f6c","src/cpu_features.rs":"67f44433971d7ae094dbe9d67b14170bc1f5eed585cbf707a21a0425f7233afa","src/crc32.rs":"20397402d6692304354f73542315c90dd5311c62e6a21f9d7d7d20e00f9234ac","src/crc32/acle.rs":"2eebb297ca47d0ad4cc49e455c42e48a4a2f58b885b3da63a0a9f7961f2e95f3","src/crc32/braid.rs":"e1f25477829f43b46529aded6b52c2094990d6c2dbe8d6ddcf907e80fc1ae0e0","src/crc32/combine.rs":"a1aded8c5f1886f60daad9765886299c65feb5240f24f8e9f67ebced14e267f0","src/crc32/pclmulqdq.rs":"70e4fa9d408ca804f496403d87f56e51a6a58c808750e90854aaa03af2143bc2","src/deflate.rs":"b4a57cd6057d7a83bdbca8423d02fbe8eb7cf6ec6b5a6e0f97fc812ea93e5ba4","src/deflate/algorithm/fast.rs":"686c0a35c1baff2d842287354f919e166fe5eca1748ad46ed14d6127611bffa0","src/deflate/algorithm/huff.rs":"2ed0a098571d4e056bb4e1d8655ec8d37e6d291ba3e2d5d7c581c2486e6abbce","src/deflate/algorithm/medium.rs":"03237619c654ee48ce176c7a6dd685025634fa9686d1c0602066b07d13659f10","src/deflate/algorithm/mod.rs":"184151cde5952a4ff0029c6647705be5f884d558bd8552a3d29f9c7a16598c93","src/deflate/algorithm/quick.rs":"8d44e91a21de91316b6bf577f3b4318e1895a4aeae3afafad44ff5db0c7fb2f8","src/deflate/algorithm/rle.rs":"549427a5a8a69610afd612f89a9cbde97fe78c38c85442083b5dde10e8be4d73","src/deflate/algorithm/slow.rs":"2fa351c77604fad7d5e113ed3b90ba2abc83be0ff589a0e367d012aee5ce967b","src/deflate/algorithm/stored.rs":"40c60591307409a5ea5f8e8505fbae70ca4a69a258b151c829db9bc5ffe02e04","src/deflate/compare256.rs":"3398a810b1cf86114523329e357684dc9cbd9f5793783d101d5994206504f2d8","src/deflate/hash_calc.rs":"057bc4d1cde94860c1e66d675c05786ccd80c797409bf0c68d6e258756e0e30b","src/deflate/longest_match.rs":"f164f072061ad5724217d213a43207d6ba7f8df31b6dec141878ec7df9b9761b","src/deflate/pending.rs":"1212cd8b301c0ccf1eb2532a16465dd1d296b9e4ad061c8fc448d68904b22b03","src/deflate/slide_hash.rs":"6069f7d02259a6defafa67c6397b0388bf6220fec3b7d15b1deb63bce4288de5","src/deflate/test-data/inflate_buf_error.dat":"254f280f8f1e8914bd12d8bcd3813e5c08083b1b46d8d643c8e2ebe109e75cb8","src/deflate/test-data/paper-100k.pdf":"60f73a051b7ca35bfec44734b2eed7736cb5c0b7f728beb7b97ade6c5e44849b","src/deflate/trees_tbl.rs":"503c65c7648405619a95dc9f5a52ecd558e439e870c116f61ef94128c6a4c52e","src/deflate/window.rs":"f864752ef33615f73fab2e2033358e1915f55a999738289b93a478d89e107557","src/inflate.rs":"6ada7430c815e4fec0a29ba78ef53b1ae7d231465d3586fc47be722678ae22a8","src/inflate/bitreader.rs":"cac96b20be765bd1645219145f6a1dacdff6f34b9f0c3bb66cdae6d1a9aa574d","src/inflate/inffixed_tbl.rs":"eb1ed1927ca07b61fe30ae8461ce62e7da28c595416e687a26db57c8eac8f4a1","src/inflate/inftrees.rs":"44efb568c9cc2dbbc6c51e50f3cc38d6c8e896b93936f47b3879396fc814abfe","src/inflate/window.rs":"55eb946c50bc8798c9965b44227364decb36ff89969530d78dd5f96a3e09f6b8","src/inflate/writer.rs":"bb0968dbc6f8f881ffe49bcad3fd5037eecdbdb0e04e24661c276e924f67ee31","src/lib.rs":"d47f73e77dcfeb1f1a8063def826a82f491f0cbe06914aa89bf30db06acf2a00","src/read_buf.rs":"9b79c1c3aa0454758d0a4ab8365e62b34e33fdb57590755d9f09c52f75b038ee","src/weak_slice.rs":"1a2075ba9bbd7c3305282c17b7467c66c5c0c464be3346fb2d25b2c73c62792c"},"package":"aada01553a9312bad4b9569035a1f12b05e5ec9770a1a4b323757356928944f8"}

148
third_party/rust/zlib-rs/Cargo.lock generated vendored
View File

@@ -1,148 +0,0 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "arbitrary"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
dependencies = [
"derive_arbitrary",
]
[[package]]
name = "autocfg"
version = "1.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
[[package]]
name = "cfg-if"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
[[package]]
name = "crc32fast"
version = "1.4.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
dependencies = [
"cfg-if",
]
[[package]]
name = "derive_arbitrary"
version = "1.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
dependencies = [
"proc-macro2",
"quote",
"syn",
]
[[package]]
name = "getrandom"
version = "0.2.15"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
dependencies = [
"cfg-if",
"libc",
"wasi",
]
[[package]]
name = "libc"
version = "0.2.169"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
[[package]]
name = "memoffset"
version = "0.9.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
dependencies = [
"autocfg",
]
[[package]]
name = "proc-macro2"
version = "1.0.92"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quickcheck"
version = "1.0.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
dependencies = [
"rand",
]
[[package]]
name = "quote"
version = "1.0.38"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
dependencies = [
"proc-macro2",
]
[[package]]
name = "rand"
version = "0.8.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
dependencies = [
"rand_core",
]
[[package]]
name = "rand_core"
version = "0.6.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
dependencies = [
"getrandom",
]
[[package]]
name = "syn"
version = "2.0.95"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "unicode-ident"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
[[package]]
name = "wasi"
version = "0.11.0+wasi-snapshot-preview1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
[[package]]
name = "zlib-rs"
version = "0.4.2"
dependencies = [
"arbitrary",
"crc32fast",
"memoffset",
"quickcheck",
]

View File

@@ -13,7 +13,7 @@
edition = "2021"
rust-version = "1.75"
name = "zlib-rs"
version = "0.4.2"
version = "0.4.1"
build = false
publish = true
autolib = false
@@ -45,9 +45,6 @@ default-features = false
[dev-dependencies.crc32fast]
version = "1.3.2"
[dev-dependencies.memoffset]
version = "0.9.1"
[dev-dependencies.quickcheck]
version = "1.0.3"
features = []

View File

@@ -4,7 +4,6 @@ use core::{
alloc::Layout,
ffi::{c_uint, c_void},
marker::PhantomData,
ptr::NonNull,
};
#[cfg(feature = "rust-allocator")]
@@ -246,21 +245,33 @@ impl Allocator<'_> {
ptr
}
pub fn allocate_raw<T>(&self) -> Option<NonNull<T>> {
NonNull::new(self.allocate_layout(Layout::new::<T>()).cast())
pub fn allocate_raw<T>(&self) -> Option<*mut T> {
let ptr = self.allocate_layout(Layout::new::<T>());
if ptr.is_null() {
None
} else {
Some(ptr as *mut T)
}
}
pub fn allocate_slice_raw<T>(&self, len: usize) -> Option<NonNull<T>> {
NonNull::new(self.allocate_layout(Layout::array::<T>(len).ok()?).cast())
pub fn allocate_slice_raw<T>(&self, len: usize) -> Option<*mut T> {
let ptr = self.allocate_layout(Layout::array::<T>(len).ok()?);
if ptr.is_null() {
None
} else {
Some(ptr.cast())
}
}
pub fn allocate_zeroed(&self, len: usize) -> Option<NonNull<u8>> {
pub fn allocate_zeroed(&self, len: usize) -> *mut u8 {
#[cfg(feature = "rust-allocator")]
if self.zalloc == Allocator::RUST.zalloc {
// internally, we want to align allocations to 64 bytes (in part for SIMD reasons)
let layout = Layout::from_size_align(len, 64).unwrap();
return NonNull::new(unsafe { std::alloc::System.alloc_zeroed(layout) });
return unsafe { std::alloc::System.alloc_zeroed(layout) };
}
#[cfg(feature = "c-allocator")]
@@ -274,18 +285,24 @@ impl Allocator<'_> {
let ptr = alloc.allocate_layout(Layout::array::<u8>(len).ok().unwrap());
return NonNull::new(ptr.cast());
if ptr.is_null() {
return core::ptr::null_mut();
}
return ptr.cast();
}
// create the allocation (contents are uninitialized)
let ptr = self.allocate_layout(Layout::array::<u8>(len).ok().unwrap());
let ptr = NonNull::new(ptr)?;
if ptr.is_null() {
return core::ptr::null_mut();
}
// zero all contents (thus initializing the buffer)
unsafe { core::ptr::write_bytes(ptr.as_ptr(), 0, len) };
unsafe { core::ptr::write_bytes(ptr, 0, len) };
Some(ptr.cast())
ptr.cast()
}
/// # Panics
@@ -356,11 +373,11 @@ mod tests {
_marker: PhantomData,
};
let ptr = allocator.allocate_raw::<T>().unwrap().as_ptr();
let ptr = allocator.allocate_raw::<T>().unwrap();
assert_eq!(ptr as usize % core::mem::align_of::<T>(), 0);
unsafe { allocator.deallocate(ptr, 1) }
let ptr = allocator.allocate_slice_raw::<T>(10).unwrap().as_ptr();
let ptr = allocator.allocate_slice_raw::<T>(10).unwrap();
assert_eq!(ptr as usize % core::mem::align_of::<T>(), 0);
unsafe { allocator.deallocate(ptr, 10) }
}
@@ -411,15 +428,15 @@ mod tests {
fn test_allocate_zeroed_help(allocator: Allocator) {
let len = 42;
let Some(buf) = allocator.allocate_zeroed(len) else {
return;
};
let buf = allocator.allocate_zeroed(len);
let slice = unsafe { core::slice::from_raw_parts_mut(buf.as_ptr(), len) };
if !buf.is_null() {
let slice = unsafe { core::slice::from_raw_parts_mut(buf, len) };
assert_eq!(slice.iter().sum::<u8>(), 0);
assert_eq!(slice.iter().sum::<u8>(), 0);
}
unsafe { allocator.deallocate(buf.as_ptr(), len) };
unsafe { allocator.deallocate(buf, len) };
}
#[test]

View File

@@ -1,13 +1,6 @@
#![allow(dead_code)]
#![allow(unreachable_code)]
pub struct CpuFeatures;
impl CpuFeatures {
pub const NONE: usize = 0;
pub const AVX2: usize = 1;
}
#[inline(always)]
pub fn is_enabled_sse() -> bool {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]

View File

@@ -52,14 +52,24 @@ impl Crc32Fold {
}
}
#[cfg_attr(not(target_arch = "x86_64"), allow(unused))]
pub(crate) fn is_pclmulqdq_enabled() -> bool {
crate::cpu_features::is_enabled_pclmulqdq()
}
#[cfg_attr(not(target_arch = "aarch64"), allow(unused))]
pub(crate) fn is_crc_enabled() -> bool {
crate::cpu_features::is_enabled_crc()
}
pub fn fold(&mut self, src: &[u8], _start: u32) {
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_pclmulqdq() {
if Self::is_pclmulqdq_enabled() {
return unsafe { self.fold.fold(src, _start) };
}
#[cfg(target_arch = "aarch64")]
if crate::cpu_features::is_enabled_crc() {
if Self::is_crc_enabled() {
self.value = unsafe { self::acle::crc32_acle_aarch64(self.value, src) };
return;
}
@@ -70,7 +80,7 @@ impl Crc32Fold {
pub fn fold_copy(&mut self, dst: &mut [u8], src: &[u8]) {
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_pclmulqdq() {
if Self::is_pclmulqdq_enabled() {
return unsafe { self.fold.fold_copy(dst, src) };
}
@@ -80,7 +90,7 @@ impl Crc32Fold {
pub fn finish(self) -> u32 {
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_pclmulqdq() {
if Self::is_pclmulqdq_enabled() {
return unsafe { self.fold.finish() };
}

View File

@@ -3,7 +3,6 @@
//! The functions in this module must only be executed on an ARM system with the CRC feature.
#[cfg_attr(not(target_arch = "aarch64"), allow(unused))]
#[target_feature(enable = "crc")]
pub unsafe fn crc32_acle_aarch64(crc: u32, buf: &[u8]) -> u32 {
let mut c = !crc;
@@ -25,9 +24,29 @@ pub unsafe fn crc32_acle_aarch64(crc: u32, buf: &[u8]) -> u32 {
!c
}
#[inline]
#[target_feature(enable = "crc")]
unsafe fn remainder(mut c: u32, mut buf: &[u8]) -> u32 {
#[cfg_attr(not(target_arch = "arm"), allow(unused))]
pub unsafe fn crc32_acle_arm(crc: u32, buf: &[u8]) -> u32 {
let mut c = !crc;
// SAFETY: [u8; 4] safely transmutes into u32.
let (before, middle, after) = unsafe { buf.align_to::<u32>() };
c = remainder(c, before);
if middle.is_empty() && after.is_empty() {
return !c;
}
for w in middle {
c = unsafe { __crc32w(c, *w) };
}
c = remainder(c, after);
!c
}
fn remainder(mut c: u32, mut buf: &[u8]) -> u32 {
if let [b0, b1, b2, b3, rest @ ..] = buf {
c = unsafe { __crc32w(c, u32::from_le_bytes([*b0, *b1, *b2, *b3])) };
buf = rest;
@@ -48,9 +67,6 @@ unsafe fn remainder(mut c: u32, mut buf: &[u8]) -> u32 {
c
}
// FIXME the intrinsics below are stable since rust 1.80.0: remove these and use the standard
// library versions once our MSRV reaches that version.
/// CRC32 single round checksum for bytes (8 bits).
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32b)
@@ -99,6 +115,18 @@ unsafe fn __crc32d(mut crc: u32, data: u64) -> u32 {
}
}
/// CRC32-C single round checksum for words (32 bits).
///
/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cw)
#[target_feature(enable = "crc")]
#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
pub unsafe fn __crc32cw(mut crc: u32, data: u32) -> u32 {
unsafe {
core::arch::asm!("crc32cw {crc:w}, {crc:w}, {data:w}", crc = inout(reg) crc, data = in(reg) data);
crc
}
}
#[cfg(test)]
mod tests {
use super::*;
@@ -114,11 +142,21 @@ mod tests {
a == b
}
fn crc32_acle_arm_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
let mut h = crc32fast::Hasher::new_with_initial(start);
h.update(&v);
let a = unsafe { crc32_acle_arm(start, &v) };
let b = h.finalize();
a == b
}
}
#[test]
fn test_crc32b() {
if !crate::cpu_features::is_enabled_crc() {
if !crate::crc32::Crc32Fold::is_crc_enabled() {
return;
}
@@ -130,7 +168,7 @@ mod tests {
#[test]
fn test_crc32h() {
if !crate::cpu_features::is_enabled_crc() {
if !crate::crc32::Crc32Fold::is_crc_enabled() {
return;
}
@@ -142,7 +180,7 @@ mod tests {
#[test]
fn test_crc32w() {
if !crate::cpu_features::is_enabled_crc() {
if !crate::crc32::Crc32Fold::is_crc_enabled() {
return;
}
@@ -155,7 +193,7 @@ mod tests {
#[test]
#[cfg(target_arch = "aarch64")]
fn test_crc32d() {
if !crate::cpu_features::is_enabled_crc() {
if !crate::crc32::Crc32Fold::is_crc_enabled() {
return;
}
@@ -164,4 +202,16 @@ mod tests {
assert_eq!(__crc32d(0, 18446744073709551615), 1147535477);
}
}
#[test]
fn test_crc32cw() {
if !crate::crc32::Crc32Fold::is_crc_enabled() {
return;
}
unsafe {
assert_eq!(__crc32cw(0, 0), 0);
assert_eq!(__crc32cw(0, 4294967295), 3080238136);
}
}
}

View File

@@ -14,7 +14,7 @@ use crate::{
use self::{
algorithm::CONFIGURATION_TABLE,
hash_calc::{HashCalcVariant, RollHashCalc, StandardHashCalc},
hash_calc::{Crc32HashCalc, HashCalcVariant, RollHashCalc, StandardHashCalc},
pending::Pending,
trees_tbl::STATIC_LTREE,
window::Window,
@@ -29,9 +29,6 @@ mod slide_hash;
mod trees_tbl;
mod window;
// Position relative to the current window
pub(crate) type Pos = u16;
// SAFETY: This struct must have the same layout as [`z_stream`], so that casts and transmutations
// between the two can work without UB.
#[repr(C)]
@@ -285,16 +282,16 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
pending.drop_in(&alloc);
}
if let Some(head) = head {
alloc.deallocate(head.as_ptr(), 1)
alloc.deallocate(head, 1)
}
if let Some(prev) = prev {
alloc.deallocate(prev.as_ptr(), w_size)
alloc.deallocate(prev, w_size)
}
if let Some(mut window) = window {
window.drop_in(&alloc);
}
alloc.deallocate(state_allocation.as_ptr(), 1);
alloc.deallocate(state_allocation, 1);
}
return ReturnCode::MemError;
@@ -302,12 +299,10 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
};
// zero initialize the memory
let prev = prev.as_ptr(); // FIXME: write_bytes is stable for NonNull since 1.80.0
unsafe { prev.write_bytes(0, w_size) };
let prev = unsafe { WeakSliceMut::from_raw_parts_mut(prev, w_size) };
// zero out head's first element
let head = head.as_ptr(); // FIXME: write_bytes is stable for NonNull since 1.80.0
unsafe { head.write_bytes(0, 1) };
let head = unsafe { WeakArrayMut::<u16, HASH_SIZE>::from_ptr(head) };
@@ -315,6 +310,7 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
status: Status::Init,
// window
w_bits: window_bits,
w_size,
w_mask: w_size - 1,
@@ -357,6 +353,11 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
d_desc: TreeDesc::EMPTY,
bl_desc: TreeDesc::EMPTY,
bl_count: [0u16; MAX_BITS + 1],
//
heap: Heap::new(),
//
crc_fold: Crc32Fold::new(),
gzhead: None,
@@ -364,22 +365,17 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
//
match_start: 0,
match_length: 0,
prev_match: 0,
match_available: false,
prev_length: 0,
// just provide a valid default; gets set properly later
hash_calc_variant: HashCalcVariant::Standard,
_cache_line_0: (),
_cache_line_1: (),
_cache_line_2: (),
_cache_line_3: (),
_padding_0: 0,
};
unsafe { state_allocation.as_ptr().write(state) }; // FIXME: write is stable for NonNull since 1.80.0
stream.state = state_allocation.as_ptr() as *mut internal_state;
unsafe { state_allocation.write(state) };
stream.state = state_allocation as *mut internal_state;
let Some(stream) = (unsafe { DeflateStream::from_stream_mut(stream) }) else {
if cfg!(debug_assertions) {
@@ -600,16 +596,16 @@ pub fn copy<'a>(
pending.drop_in(alloc);
}
if let Some(head) = head {
alloc.deallocate(head.as_ptr(), HASH_SIZE)
alloc.deallocate(head, HASH_SIZE)
}
if let Some(prev) = prev {
alloc.deallocate(prev.as_ptr(), source_state.w_size)
alloc.deallocate(prev, source_state.w_size)
}
if let Some(mut window) = window {
window.drop_in(alloc);
}
alloc.deallocate(state_allocation.as_ptr(), 1);
alloc.deallocate(state_allocation, 1);
}
return ReturnCode::MemError;
@@ -617,14 +613,11 @@ pub fn copy<'a>(
};
let prev = unsafe {
let prev = prev.as_ptr();
prev.copy_from_nonoverlapping(source_state.prev.as_ptr(), source_state.prev.len());
WeakSliceMut::from_raw_parts_mut(prev, source_state.prev.len())
};
// FIXME: write_bytes is stable for NonNull since 1.80.0
let head = unsafe {
let head = head.as_ptr();
head.write_bytes(0, 1);
head.cast::<u16>().write(source_state.head.as_slice()[0]);
WeakArrayMut::from_ptr(head)
@@ -646,6 +639,8 @@ pub fn copy<'a>(
l_desc: source_state.l_desc.clone(),
d_desc: source_state.d_desc.clone(),
bl_desc: source_state.bl_desc.clone(),
bl_count: source_state.bl_count,
match_length: source_state.match_length,
prev_match: source_state.prev_match,
match_available: source_state.match_available,
strstart: source_state.strstart,
@@ -664,28 +659,25 @@ pub fn copy<'a>(
static_len: source_state.static_len,
insert: source_state.insert,
w_size: source_state.w_size,
w_bits: source_state.w_bits,
w_mask: source_state.w_mask,
lookahead: source_state.lookahead,
prev,
head,
ins_h: source_state.ins_h,
heap: source_state.heap.clone(),
hash_calc_variant: source_state.hash_calc_variant,
crc_fold: source_state.crc_fold,
gzhead: None,
gzindex: source_state.gzindex,
_cache_line_0: (),
_cache_line_1: (),
_cache_line_2: (),
_cache_line_3: (),
_padding_0: source_state._padding_0,
};
// write the cloned state into state_ptr
unsafe { state_allocation.as_ptr().write(dest_state) }; // FIXME: write is stable for NonNull since 1.80.0
unsafe { state_allocation.write(dest_state) };
// insert the state_ptr into `dest`
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) };
unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation.as_ptr()) };
unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation) };
// update the gzhead field (it contains a mutable reference so we need to be careful
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state.gzhead) };
@@ -792,10 +784,10 @@ fn lm_init(state: &mut State) {
}
fn lm_set_level(state: &mut State, level: i8) {
state.max_lazy_match = CONFIGURATION_TABLE[level as usize].max_lazy;
state.good_match = CONFIGURATION_TABLE[level as usize].good_length;
state.nice_match = CONFIGURATION_TABLE[level as usize].nice_length;
state.max_chain_length = CONFIGURATION_TABLE[level as usize].max_chain;
state.max_lazy_match = CONFIGURATION_TABLE[level as usize].max_lazy as usize;
state.good_match = CONFIGURATION_TABLE[level as usize].good_length as usize;
state.nice_match = CONFIGURATION_TABLE[level as usize].nice_length as usize;
state.max_chain_length = CONFIGURATION_TABLE[level as usize].max_chain as usize;
state.hash_calc_variant = HashCalcVariant::for_max_chain_length(state.max_chain_length);
state.level = level;
@@ -808,10 +800,10 @@ pub fn tune(
nice_length: usize,
max_chain: usize,
) -> ReturnCode {
stream.state.good_match = good_length as u16;
stream.state.max_lazy_match = max_lazy as u16;
stream.state.nice_match = nice_length as u16;
stream.state.max_chain_length = max_chain as u16;
stream.state.good_match = good_length;
stream.state.max_lazy_match = max_lazy;
stream.state.nice_match = nice_length;
stream.state.max_chain_length = max_chain;
ReturnCode::Ok
}
@@ -849,18 +841,15 @@ impl Value {
self.a
}
#[inline(always)]
pub(crate) const fn code(self) -> u16 {
pub(crate) fn code(self) -> u16 {
self.a
}
#[inline(always)]
pub(crate) const fn dad(self) -> u16 {
pub(crate) fn dad(self) -> u16 {
self.b
}
#[inline(always)]
pub(crate) const fn len(self) -> u16 {
pub(crate) fn len(self) -> u16 {
self.b
}
}
@@ -904,50 +893,6 @@ struct BitWriter<'a> {
bits_sent: usize,
}
#[inline]
const fn encode_len(ltree: &[Value], lc: u8) -> (u64, usize) {
let mut lc = lc as usize;
/* Send the length code, len is the match length - STD_MIN_MATCH */
let code = self::trees_tbl::LENGTH_CODE[lc] as usize;
let c = code + LITERALS + 1;
assert!(c < L_CODES, "bad l_code");
// send_code_trace(s, c);
let lnode = ltree[c];
let mut match_bits: u64 = lnode.code() as u64;
let mut match_bits_len = lnode.len() as usize;
let extra = StaticTreeDesc::EXTRA_LBITS[code] as usize;
if extra != 0 {
lc -= self::trees_tbl::BASE_LENGTH[code] as usize;
match_bits |= (lc as u64) << match_bits_len;
match_bits_len += extra;
}
(match_bits, match_bits_len)
}
#[inline]
const fn encode_dist(dtree: &[Value], mut dist: u16) -> (u64, usize) {
dist -= 1; /* dist is now the match distance - 1 */
let code = State::d_code(dist as usize) as usize;
assert!(code < D_CODES, "bad d_code");
// send_code_trace(s, code);
/* Send the distance code */
let dnode = dtree[code];
let mut match_bits = dnode.code() as u64;
let mut match_bits_len = dnode.len() as usize;
let extra = StaticTreeDesc::EXTRA_DBITS[code] as usize;
if extra != 0 {
dist -= self::trees_tbl::BASE_DIST[code];
match_bits |= (dist as u64) << match_bits_len;
match_bits_len += extra;
}
(match_bits, match_bits_len)
}
impl<'a> BitWriter<'a> {
pub(crate) const BIT_BUF_SIZE: u8 = 64;
@@ -1108,30 +1053,41 @@ impl<'a> BitWriter<'a> {
ltree: &[Value],
dtree: &[Value],
lc: u8,
dist: u16,
mut dist: usize,
) -> usize {
let (mut match_bits, mut match_bits_len) = encode_len(ltree, lc);
let mut lc = lc as usize;
let (dist_match_bits, dist_match_bits_len) = encode_dist(dtree, dist);
/* Send the length code, len is the match length - STD_MIN_MATCH */
let mut code = self::trees_tbl::LENGTH_CODE[lc] as usize;
let c = code + LITERALS + 1;
assert!(c < L_CODES, "bad l_code");
// send_code_trace(s, c);
match_bits |= dist_match_bits << match_bits_len;
match_bits_len += dist_match_bits_len;
let lnode = ltree[c];
let mut match_bits: u64 = lnode.code() as u64;
let mut match_bits_len = lnode.len() as usize;
let mut extra = StaticTreeDesc::EXTRA_LBITS[code] as usize;
if extra != 0 {
lc -= self::trees_tbl::BASE_LENGTH[code] as usize;
match_bits |= (lc as u64) << match_bits_len;
match_bits_len += extra;
}
self.send_bits(match_bits, match_bits_len as u8);
dist -= 1; /* dist is now the match distance - 1 */
code = State::d_code(dist) as usize;
assert!(code < D_CODES, "bad d_code");
// send_code_trace(s, code);
match_bits_len
}
pub(crate) fn emit_dist_static(&mut self, lc: u8, dist: u16) -> usize {
let precomputed_len = trees_tbl::STATIC_LTREE_ENCODINGS[lc as usize];
let mut match_bits = precomputed_len.code() as u64;
let mut match_bits_len = precomputed_len.len() as usize;
let dtree = self::trees_tbl::STATIC_DTREE.as_slice();
let (dist_match_bits, dist_match_bits_len) = encode_dist(dtree, dist);
match_bits |= dist_match_bits << match_bits_len;
match_bits_len += dist_match_bits_len;
/* Send the distance code */
let dnode = dtree[code];
match_bits |= (dnode.code() as u64) << match_bits_len;
match_bits_len += dnode.len() as usize;
extra = StaticTreeDesc::EXTRA_DBITS[code] as usize;
if extra != 0 {
dist -= self::trees_tbl::BASE_DIST[code] as usize;
match_bits |= (dist as u64) << match_bits_len;
match_bits_len += extra;
}
self.send_bits(match_bits, match_bits_len as u8);
@@ -1144,7 +1100,7 @@ impl<'a> BitWriter<'a> {
unreachable!("out of bound access on the symbol buffer");
};
match u16::from_le_bytes([dist_low, dist_high]) {
match u16::from_be_bytes([dist_high, dist_low]) as usize {
0 => self.emit_lit(ltree, lc) as usize,
dist => self.emit_dist(ltree, dtree, lc, dist),
};
@@ -1218,7 +1174,7 @@ impl<'a> BitWriter<'a> {
}
}
#[repr(C, align(64))]
#[repr(C)]
pub(crate) struct State<'a> {
status: Status,
@@ -1230,47 +1186,40 @@ pub(crate) struct State<'a> {
pub(crate) level: i8,
/// Whether or not a block is currently open for the QUICK deflation scheme.
/// 0 if the block is closed, 1 if there is an active block, or 2 if there
/// is an active block and it is the last block.
/// true if there is an active block, or false if the block was just closed
pub(crate) block_open: u8,
pub(crate) hash_calc_variant: HashCalcVariant,
pub(crate) match_available: bool, /* set if previous match exists */
bit_writer: BitWriter<'a>,
/// Use a faster search when the previous match is longer than this
pub(crate) good_match: u16,
pub(crate) good_match: usize,
/// Stop searching when current match exceeds this
pub(crate) nice_match: u16,
pub(crate) nice_match: usize,
pub(crate) match_start: Pos, /* start of matching string */
pub(crate) prev_match: Pos, /* previous match */
// part of the fields below
// dyn_ltree: [Value; ],
// dyn_dtree: [Value; ],
// bl_tree: [Value; ],
l_desc: TreeDesc<HEAP_SIZE>, /* literal and length tree */
d_desc: TreeDesc<{ 2 * D_CODES + 1 }>, /* distance tree */
bl_desc: TreeDesc<{ 2 * BL_CODES + 1 }>, /* Huffman tree for bit lengths */
pub(crate) bl_count: [u16; MAX_BITS + 1],
pub(crate) match_length: usize, /* length of best match */
pub(crate) prev_match: u16, /* previous match */
pub(crate) match_available: bool, /* set if previous match exists */
pub(crate) strstart: usize, /* start of string to insert */
pub(crate) window: Window<'a>,
pub(crate) w_size: usize, /* LZ77 window size (32K by default) */
pub(crate) w_mask: usize, /* w_size - 1 */
_cache_line_0: (),
/// prev[N], where N is an offset in the current window, contains the offset in the window
/// of the previous 4-byte sequence that hashes to the same value as the 4-byte sequence
/// starting at N. Together with head, prev forms a chained hash table that can be used
/// to find earlier strings in the window that are potential matches for new input being
/// deflated.
pub(crate) prev: WeakSliceMut<'a, u16>,
/// head[H] contains the offset of the last 4-character sequence seen so far in
/// the current window that hashes to H (as calculated using the hash_calc_variant).
pub(crate) head: WeakArrayMut<'a, u16, HASH_SIZE>,
pub(crate) match_start: usize, /* start of matching string */
/// Length of the best match at previous step. Matches not greater than this
/// are discarded. This is used in the lazy match evaluation.
pub(crate) prev_length: u16,
pub(crate) prev_length: usize,
/// To speed up deflation, hash chains are never searched beyond this length.
/// A higher limit improves compression ratio but degrades the speed.
pub(crate) max_chain_length: u16,
pub(crate) max_chain_length: usize,
// TODO untangle this mess! zlib uses the same field differently based on compression level
// we should just have 2 fields for clarity!
@@ -1281,21 +1230,15 @@ pub(crate) struct State<'a> {
// define max_insert_length max_lazy_match
/// Attempt to find a better match only when the current match is strictly smaller
/// than this value. This mechanism is used only for compression levels >= 4.
pub(crate) max_lazy_match: u16,
/// number of string matches in current block
/// NOTE: this is a saturating 8-bit counter, to help keep the struct compact. The code that
/// makes decisions based on this field only cares whether the count is greater than 2, so
/// an 8-bit counter is sufficient.
pub(crate) matches: u8,
pub(crate) max_lazy_match: usize,
/// Window position at the beginning of the current output block. Gets
/// negative when the window is moved backwards.
pub(crate) block_start: isize,
pub(crate) sym_buf: ReadBuf<'a>,
pub(crate) window: Window<'a>,
_cache_line_1: (),
pub(crate) sym_buf: ReadBuf<'a>,
/// Size of match buffer for literals/lengths. There are 4 reasons for
/// limiting lit_bufsize to 64K:
@@ -1316,12 +1259,11 @@ pub(crate) struct State<'a> {
/// - I can't count above 4
lit_bufsize: usize,
/// Actual size of window: 2*w_size, except when the user input buffer is directly used as sliding window.
/// Actual size of window: 2*wSize, except when the user input buffer is directly used as sliding window.
pub(crate) window_size: usize,
bit_writer: BitWriter<'a>,
_cache_line_2: (),
/// number of string matches in current block
pub(crate) matches: usize,
/// bit length of current block with optimal trees
opt_len: usize,
@@ -1331,23 +1273,24 @@ pub(crate) struct State<'a> {
/// bytes at end of window left to insert
pub(crate) insert: usize,
pub(crate) w_size: usize, /* LZ77 window size (32K by default) */
pub(crate) w_bits: usize, /* log2(w_size) (8..16) */
pub(crate) w_mask: usize, /* w_size - 1 */
pub(crate) lookahead: usize, /* number of valid bytes ahead in window */
pub(crate) prev: WeakSliceMut<'a, u16>,
pub(crate) head: WeakArrayMut<'a, u16, HASH_SIZE>,
/// hash index of string to be inserted
pub(crate) ins_h: u32,
pub(crate) ins_h: usize,
gzhead: Option<&'a mut gz_header>,
gzindex: usize,
heap: Heap,
_padding_0: usize,
_cache_line_3: (),
pub(crate) hash_calc_variant: HashCalcVariant,
crc_fold: crate::crc32::Crc32Fold,
l_desc: TreeDesc<HEAP_SIZE>, /* literal and length tree */
d_desc: TreeDesc<{ 2 * D_CODES + 1 }>, /* distance tree */
bl_desc: TreeDesc<{ 2 * BL_CODES + 1 }>, /* Huffman tree for bit lengths */
gzhead: Option<&'a mut gz_header>,
gzindex: usize,
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
@@ -1386,11 +1329,6 @@ enum DataType {
impl<'a> State<'a> {
pub const BIT_BUF_SIZE: u8 = BitWriter::BIT_BUF_SIZE;
// log2(w_size) (in the range MIN_WBITS..=MAX_WBITS)
pub(crate) fn w_bits(&self) -> u32 {
self.w_size.trailing_zeros()
}
pub(crate) fn max_dist(&self) -> usize {
self.w_size - MIN_LOOKAHEAD
}
@@ -1398,7 +1336,7 @@ impl<'a> State<'a> {
// TODO untangle this mess! zlib uses the same field differently based on compression level
// we should just have 2 fields for clarity!
pub(crate) fn max_insert_length(&self) -> usize {
self.max_lazy_match as usize
self.max_lazy_match
}
/// Total size of the pending buf. But because `pending` shares memory with `sym_buf`, this is
@@ -1411,6 +1349,9 @@ impl<'a> State<'a> {
pub(crate) fn update_hash(&self, h: u32, val: u32) -> u32 {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::update_hash(h, val),
// SAFETY: self.hash_calc_variant is set by HashCalcVariant::for_max_chain_length,
// which avoids choosing Crc32 if the system doesn't have support.
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::update_hash(h, val) },
HashCalcVariant::Roll => RollHashCalc::update_hash(h, val),
}
}
@@ -1419,6 +1360,9 @@ impl<'a> State<'a> {
pub(crate) fn quick_insert_string(&mut self, string: usize) -> u16 {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::quick_insert_string(self, string),
// SAFETY: self.hash_calc_variant is set by HashCalcVariant::for_max_chain_length,
// which avoids choosing Crc32 if the system doesn't have support.
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::quick_insert_string(self, string) },
HashCalcVariant::Roll => RollHashCalc::quick_insert_string(self, string),
}
}
@@ -1427,6 +1371,9 @@ impl<'a> State<'a> {
pub(crate) fn insert_string(&mut self, string: usize, count: usize) {
match self.hash_calc_variant {
HashCalcVariant::Standard => StandardHashCalc::insert_string(self, string, count),
// SAFETY: self.hash_calc_variant is set by HashCalcVariant::for_max_chain_length,
// which avoids choosing Crc32 if the system doesn't have support.
HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::insert_string(self, string, count) },
HashCalcVariant::Roll => RollHashCalc::insert_string(self, string, count),
}
}
@@ -1464,7 +1411,7 @@ impl<'a> State<'a> {
pub(crate) fn tally_dist(&mut self, mut dist: usize, len: usize) -> bool {
self.sym_buf.push_dist(dist as u16, len as u8);
self.matches = self.matches.saturating_add(1);
self.matches += 1;
dist -= 1;
assert!(
@@ -1511,19 +1458,11 @@ impl<'a> State<'a> {
}
fn compress_block_static_trees(&mut self) {
let ltree = self::trees_tbl::STATIC_LTREE.as_slice();
for chunk in self.sym_buf.filled().chunks_exact(3) {
let [dist_low, dist_high, lc] = *chunk else {
unreachable!("out of bound access on the symbol buffer");
};
match u16::from_le_bytes([dist_low, dist_high]) {
0 => self.bit_writer.emit_lit(ltree, lc) as usize,
dist => self.bit_writer.emit_dist_static(lc, dist),
};
}
self.bit_writer.emit_end_block(ltree, false)
self.bit_writer.compress_block_help(
self.sym_buf.filled(),
self::trees_tbl::STATIC_LTREE.as_slice(),
self::trees_tbl::STATIC_DTREE.as_slice(),
)
}
fn compress_block_dynamic_trees(&mut self) {
@@ -1547,7 +1486,7 @@ impl<'a> State<'a> {
};
let h =
(Z_DEFLATED + ((self.w_bits() as u16 - 8) << 4)) << 8 | (self.level_flags() << 6) | dict;
(Z_DEFLATED + ((self.w_bits as u16 - 8) << 4)) << 8 | (self.level_flags() << 6) | dict;
h + 31 - (h % 31)
}
@@ -1660,6 +1599,7 @@ pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: u
// we likely cannot fuse the crc32 and the copy here because the input can be changed by
// a concurrent thread. Therefore it cannot be converted into a slice!
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
// SAFETY: len is bounded by avail_in, so this copy is in bounds.
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };
@@ -1669,6 +1609,7 @@ pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: u
// we likely cannot fuse the adler32 and the copy here because the input can be changed by
// a concurrent thread. Therefore it cannot be converted into a slice!
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
// SAFETY: len is bounded by avail_in, so this copy is in bounds.
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };
@@ -1676,6 +1617,7 @@ pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: u
stream.adler = adler32(stream.adler as u32, data) as _;
} else {
let window = &mut stream.state.window;
window.initialize_at_least(offset + len);
// SAFETY: len is bounded by avail_in, so this copy is in bounds.
unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };
}
@@ -1733,31 +1675,36 @@ pub(crate) const WANT_MIN_MATCH: usize = 4;
pub(crate) const MIN_LOOKAHEAD: usize = STD_MAX_MATCH + STD_MIN_MATCH + 1;
#[inline]
pub(crate) fn fill_window(stream: &mut DeflateStream) {
debug_assert!(stream.state.lookahead < MIN_LOOKAHEAD);
let wsize = stream.state.w_size;
loop {
let state = &mut *stream.state;
let state = &mut stream.state;
let mut more = state.window_size - state.lookahead - state.strstart;
// If the window is almost full and there is insufficient lookahead,
// move the upper half to the lower one to make room in the upper half.
if state.strstart >= wsize + state.max_dist() {
// shift the window to the left
let (old, new) = state.window.filled_mut()[..2 * wsize].split_at_mut(wsize);
old.copy_from_slice(new);
// in some cases zlib-ng copies uninitialized bytes here. We cannot have that, so
// explicitly initialize them with zeros.
//
// see also the "fill_window_out_of_bounds" test.
state.window.initialize_at_least(2 * wsize);
state.window.filled_mut().copy_within(wsize..2 * wsize, 0);
state.match_start = state.match_start.saturating_sub(wsize as u16);
if state.match_start == 0 {
if state.match_start >= wsize {
state.match_start -= wsize;
} else {
state.match_start = 0;
state.prev_length = 0;
}
state.strstart -= wsize; /* we now have strstart >= MAX_DIST */
state.block_start -= wsize as isize;
state.insert = Ord::min(state.insert, state.strstart);
if state.insert > state.strstart {
state.insert = state.strstart;
}
self::slide_hash::slide_hash(state);
@@ -1782,7 +1729,7 @@ pub(crate) fn fill_window(stream: &mut DeflateStream) {
let n = read_buf_window(stream, stream.state.strstart + stream.state.lookahead, more);
let state = &mut *stream.state;
let state = &mut stream.state;
state.lookahead += n;
// Initialize the hash value now that we have some input:
@@ -1791,7 +1738,7 @@ pub(crate) fn fill_window(stream: &mut DeflateStream) {
if state.max_chain_length > 1024 {
let v0 = state.window.filled()[string] as u32;
let v1 = state.window.filled()[string + 1] as u32;
state.ins_h = state.update_hash(v0, v1);
state.ins_h = state.update_hash(v0, v1) as usize;
} else if string >= 1 {
state.quick_insert_string(string + 2 - STD_MIN_MATCH);
}
@@ -1813,6 +1760,11 @@ pub(crate) fn fill_window(stream: &mut DeflateStream) {
}
}
// initialize some memory at the end of the (filled) window, so SIMD operations can go "out of
// bounds" without violating any requirements. The window allocation is already slightly bigger
// to allow for this.
stream.state.window.initialize_out_of_bounds();
assert!(
stream.state.strstart <= stream.state.window_size - MIN_LOOKAHEAD,
"not enough room for search"
@@ -1906,15 +1858,14 @@ fn build_tree<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
let stree = desc.stat_desc.static_tree;
let elements = desc.stat_desc.elems;
let mut heap = Heap::new();
let mut max_code = heap.initialize(&mut tree[..elements]);
let mut max_code = state.heap.initialize(&mut tree[..elements]);
// The pkzip format requires that at least one distance code exists,
// and that at least one bit should be sent even if there is only one
// possible code. So to avoid special checks later on we force at least
// two codes of non zero frequency.
while heap.heap_len < 2 {
heap.heap_len += 1;
while state.heap.heap_len < 2 {
state.heap.heap_len += 1;
let node = if max_code < 2 {
max_code += 1;
max_code
@@ -1925,9 +1876,9 @@ fn build_tree<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
debug_assert!(node >= 0);
let node = node as usize;
heap.heap[heap.heap_len] = node as u32;
state.heap.heap[state.heap.heap_len] = node as u32;
*tree[node].freq_mut() = 1;
heap.depth[node] = 0;
state.heap.depth[node] = 0;
state.opt_len -= 1;
if !stree.is_empty() {
state.static_len -= stree[node].len() as usize;
@@ -1941,27 +1892,25 @@ fn build_tree<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
// The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
// establish sub-heaps of increasing lengths:
let mut n = heap.heap_len / 2;
let mut n = state.heap.heap_len / 2;
while n >= 1 {
heap.pqdownheap(tree, n);
state.heap.pqdownheap(tree, n);
n -= 1;
}
heap.construct_huffman_tree(tree, elements);
state.heap.construct_huffman_tree(tree, elements);
// At this point, the fields freq and dad are set. We can now
// generate the bit lengths.
let bl_count = gen_bitlen(state, &mut heap, desc);
gen_bitlen(state, desc);
// The field len is now set, we can generate the bit codes
gen_codes(&mut desc.dyn_tree, max_code, &bl_count);
gen_codes(&mut desc.dyn_tree, max_code, &state.bl_count);
}
fn gen_bitlen<const N: usize>(
state: &mut State,
heap: &mut Heap,
desc: &mut TreeDesc<N>,
) -> [u16; MAX_BITS + 1] {
fn gen_bitlen<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
let heap = &mut state.heap;
let tree = &mut desc.dyn_tree;
let max_code = desc.max_code;
let stree = desc.stat_desc.static_tree;
@@ -1969,7 +1918,7 @@ fn gen_bitlen<const N: usize>(
let base = desc.stat_desc.extra_base;
let max_length = desc.stat_desc.max_length;
let mut bl_count = [0u16; MAX_BITS + 1];
state.bl_count.fill(0);
// In a first pass, compute the optimal bit lengths (which may
// overflow in the case of the bit length tree).
@@ -1995,7 +1944,7 @@ fn gen_bitlen<const N: usize>(
continue;
}
bl_count[bits as usize] += 1;
state.bl_count[bits as usize] += 1;
let mut xbits = 0;
if n >= base {
xbits = extra[n - base] as usize;
@@ -2010,18 +1959,18 @@ fn gen_bitlen<const N: usize>(
}
if overflow == 0 {
return bl_count;
return;
}
/* Find the first bit length which could increase: */
loop {
let mut bits = max_length as usize - 1;
while bl_count[bits] == 0 {
while state.bl_count[bits] == 0 {
bits -= 1;
}
bl_count[bits] -= 1; /* move one leaf down the tree */
bl_count[bits + 1] += 2; /* move one overflow item as its brother */
bl_count[max_length as usize] -= 1;
state.bl_count[bits] -= 1; /* move one leaf down the tree */
state.bl_count[bits + 1] += 2; /* move one overflow item as its brother */
state.bl_count[max_length as usize] -= 1;
/* The brother of the overflow item also moves one step up,
* but this does not affect bl_count[max_length]
*/
@@ -2038,7 +1987,7 @@ fn gen_bitlen<const N: usize>(
// from 'ar' written by Haruhiko Okumura.)
let mut h = HEAP_SIZE;
for bits in (1..=max_length).rev() {
let mut n = bl_count[bits as usize];
let mut n = state.bl_count[bits as usize];
while n != 0 {
h -= 1;
let m = heap.heap[h] as usize;
@@ -2056,7 +2005,6 @@ fn gen_bitlen<const N: usize>(
n -= 1;
}
}
bl_count
}
/// Checks that symbol is a printing character (excluding space)
@@ -2969,37 +2917,34 @@ impl Heap {
/// Index within the heap array of least frequent node in the Huffman tree
const SMALLEST: usize = 1;
fn smaller(tree: &[Value], n: u32, m: u32, depth: &[u8]) -> bool {
let (n, m) = (n as usize, m as usize);
match Ord::cmp(&tree[n].freq(), &tree[m].freq()) {
core::cmp::Ordering::Less => true,
core::cmp::Ordering::Equal => depth[n] <= depth[m],
core::cmp::Ordering::Greater => false,
}
}
fn pqdownheap(&mut self, tree: &[Value], mut k: usize) {
/* tree: the tree to restore */
/* k: node to move down */
// Given the index $i of a node in the tree, pack the node's frequency and depth
// into a single integer. The heap ordering logic uses a primary sort on frequency
// and a secondary sort on depth, so packing both into one integer makes it
// possible to sort with fewer comparison operations.
macro_rules! freq_and_depth {
($i:expr) => {
(tree[$i as usize].freq() as u32) << 8 | self.depth[$i as usize] as u32
};
}
let v = self.heap[k];
let v_val = freq_and_depth!(v);
let mut j = k << 1; /* left son of k */
while j <= self.heap_len {
/* Set j to the smallest of the two sons: */
let mut j_val = freq_and_depth!(self.heap[j]);
if j < self.heap_len {
let j1_val = freq_and_depth!(self.heap[j + 1]);
if j1_val <= j_val {
let cond = Self::smaller(tree, self.heap[j + 1], self.heap[j], &self.depth);
if cond {
j += 1;
j_val = j1_val;
}
}
/* Exit if v is smaller than both sons */
if v_val <= j_val {
if Self::smaller(tree, v, self.heap[j], &self.depth) {
break;
}
@@ -3195,7 +3140,7 @@ pub fn bound(stream: Option<&mut DeflateStream>, source_len: usize) -> usize {
}
};
if stream.state.w_bits() != MAX_WBITS as u32 || HASH_BITS < 15 {
if stream.state.w_bits != MAX_WBITS as usize || HASH_BITS < 15 {
if stream.state.level == 0 {
/* upper bound for stored blocks with length 127 (memLevel == 1) ~4% overhead plus a small constant */
source_len
@@ -3428,7 +3373,7 @@ mod test {
};
assert_eq!(init(&mut stream, config), ReturnCode::Ok);
let stream = unsafe { DeflateStream::from_stream_mut(&mut stream) }.unwrap();
assert_eq!(stream.state.w_bits(), 9);
assert_eq!(stream.state.w_bits, 9);
assert!(end(stream).is_ok());
}
@@ -4214,27 +4159,34 @@ mod test {
strategy: Strategy::Default,
};
let expected = [
let crc32 = [
24, 149, 99, 96, 96, 96, 96, 208, 6, 17, 112, 138, 129, 193, 128, 1, 29, 24, 50, 208,
1, 200, 146, 169, 79, 24, 74, 59, 96, 147, 52, 71, 22, 70, 246, 88, 26, 94, 80, 128,
83, 6, 162, 219, 144, 76, 183, 210, 5, 8, 67, 105, 7, 108, 146, 230, 216, 133, 145,
129, 22, 3, 3, 131, 17, 3, 0, 3, 228, 25, 128,
];
let other = [
24, 149, 99, 96, 96, 96, 96, 208, 6, 17, 112, 138, 129, 193, 128, 1, 29, 24, 50, 208,
1, 200, 146, 169, 79, 24, 74, 59, 96, 147, 52, 71, 22, 70, 246, 88, 26, 94, 80, 128,
83, 6, 162, 219, 144, 76, 183, 210, 5, 8, 67, 105, 36, 159, 35, 128, 57, 118, 97, 100,
160, 197, 192, 192, 96, 196, 0, 0, 3, 228, 25, 128,
];
fuzz_based_test(&input, config, &expected);
}
#[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
mod _cache_lines {
use super::State;
// FIXME: once zlib-rs Minimum Supported Rust Version >= 1.77, switch to core::mem::offset_of
// and move this _cache_lines module from up a level from tests to super::
use memoffset::offset_of;
const _: () = assert!(offset_of!(State, status) == 0);
const _: () = assert!(offset_of!(State, _cache_line_0) == 64);
const _: () = assert!(offset_of!(State, _cache_line_1) == 128);
const _: () = assert!(offset_of!(State, _cache_line_2) == 192);
const _: () = assert!(offset_of!(State, _cache_line_3) == 256);
// the output is slightly different based on what hashing algorithm is used
match HashCalcVariant::for_compression_level(config.level as usize) {
HashCalcVariant::Crc32 => {
// the aarch64 hashing algorithm is different from the standard algorithm, but in
// this case they turn out to give the same output. Beware!
if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
fuzz_based_test(&input, config, &crc32);
} else {
fuzz_based_test(&input, config, &other);
}
}
HashCalcVariant::Standard | HashCalcVariant::Roll => {
fuzz_based_test(&input, config, &other);
}
}
}
}

View File

@@ -53,7 +53,7 @@ pub fn deflate_fast(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
// bflush = zng_tr_tally_dist(s, s->strstart - s->match_start, match_len - STD_MIN_MATCH);
bflush = state.tally_dist(
state.strstart - state.match_start as usize,
state.strstart - state.match_start,
match_len - STD_MIN_MATCH,
);

View File

@@ -80,7 +80,7 @@ pub fn deflate_medium(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockS
crate::deflate::longest_match::longest_match(state, hash_head);
state.match_start = match_start;
current_match.match_length = match_length as u16;
current_match.match_start = match_start;
current_match.match_start = match_start as u16;
if (current_match.match_length as usize) < WANT_MIN_MATCH {
current_match.match_length = 1;
}
@@ -123,7 +123,7 @@ pub fn deflate_medium(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockS
crate::deflate::longest_match::longest_match(state, hash_head);
state.match_start = match_start;
next_match.match_length = match_length as u16;
next_match.match_start = match_start;
next_match.match_start = match_start as u16;
if next_match.match_start >= next_match.strstart {
/* this can happen due to some restarts */
@@ -229,8 +229,12 @@ fn insert_match(state: &mut State, mut m: Match) {
return;
}
// Insert new strings in the hash table
if state.lookahead >= WANT_MIN_MATCH {
/* Insert new strings in the hash table only if the match length
* is not too large. This saves time but degrades compression.
*/
if (m.match_length as usize) <= 16 * state.max_insert_length()
&& state.lookahead >= WANT_MIN_MATCH
{
m.match_length -= 1; /* string at strstart already in table */
m.strstart += 1;

View File

@@ -102,10 +102,9 @@ pub fn deflate_quick(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSt
macro_rules! first_two_bytes {
($slice:expr, $offset:expr) => {
u16::from_le_bytes($slice[$offset..$offset + 2].try_into().unwrap())
};
u16::from_le_bytes($slice[$offset..$offset+2].try_into().unwrap())
}
}
if first_two_bytes!(str_start, 0) == first_two_bytes!(match_start, 0) {
let mut match_len = crate::deflate::compare256::compare256_slice(
&str_start[2..],
@@ -119,13 +118,12 @@ pub fn deflate_quick(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSt
// TODO do this with a debug_assert?
// check_match(s, state.strstart, hash_head, match_len);
// The `dist` value is a distance within the window,
// and MAX_WBITS == 15 (32k), hence a u16 can always represent this value.
let dist = u16::try_from(dist).unwrap();
state
.bit_writer
.emit_dist_static((match_len - STD_MIN_MATCH) as u8, dist);
state.bit_writer.emit_dist(
StaticTreeDesc::L.static_tree,
StaticTreeDesc::D.static_tree,
(match_len - STD_MIN_MATCH) as u8,
dist as usize,
);
state.lookahead -= match_len;
state.strstart += match_len;
continue;

View File

@@ -49,7 +49,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
};
// Find the longest match, discarding those <= prev_length.
state.prev_match = state.match_start;
state.prev_match = state.match_start as u16;
match_len = STD_MIN_MATCH - 1;
dist = state.strstart as isize - hash_head as isize;
@@ -76,7 +76,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
// If there was a match at the previous step and the current
// match is not better, output the previous match:
if state.prev_length as usize >= STD_MIN_MATCH && match_len <= state.prev_length as usize {
if state.prev_length >= STD_MIN_MATCH && match_len <= state.prev_length {
let max_insert = state.strstart + state.lookahead - STD_MIN_MATCH;
/* Do not insert strings in hash table beyond this. */
@@ -84,7 +84,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
bflush = state.tally_dist(
state.strstart - 1 - state.prev_match as usize,
state.prev_length as usize - STD_MIN_MATCH,
state.prev_length - STD_MIN_MATCH,
);
/* Insert in hash table all strings up to the end of the match.
@@ -93,9 +93,9 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
* the hash table.
*/
state.prev_length -= 1;
state.lookahead -= state.prev_length as usize;
state.lookahead -= state.prev_length;
let mov_fwd = state.prev_length as usize - 1;
let mov_fwd = state.prev_length - 1;
if max_insert > state.strstart {
let insert_cnt = Ord::min(mov_fwd, max_insert - state.strstart);
state.insert_string(state.strstart + 1, insert_cnt);
@@ -118,7 +118,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
flush_block_only(stream, false);
}
stream.state.prev_length = match_len as u16;
stream.state.prev_length = match_len;
stream.state.strstart += 1;
stream.state.lookahead -= 1;
if stream.avail_out == 0 {
@@ -127,7 +127,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
} else {
// There is no previous match to compare with, wait for
// the next step to decide.
state.prev_length = match_len as u16;
state.prev_length = match_len;
state.match_available = true;
match_available = true;
state.strstart += 1;

View File

@@ -3,15 +3,28 @@ use crate::deflate::{State, HASH_SIZE, STD_MIN_MATCH};
#[derive(Debug, Clone, Copy)]
pub enum HashCalcVariant {
Standard,
/// # Safety
///
/// This variant should only be used on supported systems, checked at runtime. See
/// [`Crc32HashCalc`].
Crc32,
Roll,
}
impl HashCalcVariant {
#[cfg(test)]
pub fn for_compression_level(level: usize) -> Self {
let max_chain_length = crate::deflate::algorithm::CONFIGURATION_TABLE[level].max_chain;
Self::for_max_chain_length(max_chain_length as usize)
}
/// Use rolling hash for deflate_slow algorithm with level 9. It allows us to
/// properly lookup different hash chains to speed up longest_match search.
pub fn for_max_chain_length(max_chain_length: u16) -> Self {
pub fn for_max_chain_length(max_chain_length: usize) -> Self {
if max_chain_length > 1024 {
HashCalcVariant::Roll
} else if Crc32HashCalc::is_supported() {
HashCalcVariant::Crc32
} else {
HashCalcVariant::Standard
}
@@ -91,10 +104,10 @@ impl RollHashCalc {
pub fn quick_insert_string(state: &mut State, string: usize) -> u16 {
let val = state.window.filled()[string + Self::HASH_CALC_OFFSET] as u32;
state.ins_h = Self::hash_calc(state.ins_h, val);
state.ins_h &= Self::HASH_CALC_MASK;
state.ins_h = Self::hash_calc(state.ins_h as u32, val) as usize;
state.ins_h &= Self::HASH_CALC_MASK as usize;
let hm = state.ins_h as usize;
let hm = state.ins_h;
let head = state.head.as_slice()[hm];
if head != string as u16 {
@@ -111,9 +124,108 @@ impl RollHashCalc {
for (i, val) in slice.iter().copied().enumerate() {
let idx = string as u16 + i as u16;
state.ins_h = Self::hash_calc(state.ins_h, val as u32);
state.ins_h &= Self::HASH_CALC_MASK;
let hm = state.ins_h as usize;
state.ins_h = Self::hash_calc(state.ins_h as u32, val as u32) as usize;
state.ins_h &= Self::HASH_CALC_MASK as usize;
let hm = state.ins_h;
let head = state.head.as_slice()[hm];
if head != idx {
state.prev.as_mut_slice()[idx as usize & state.w_mask] = head;
state.head.as_mut_slice()[hm] = idx;
}
}
}
}
/// # Safety
///
/// The methods of this struct can only be executed if the system has platform support, otherwise
/// the result is UB. Use [`Self::is_supported()`] to check at runtime whether the system has
/// support before executing any methods.
pub struct Crc32HashCalc;
impl Crc32HashCalc {
fn is_supported() -> bool {
#[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
return crate::cpu_features::is_enabled_sse42();
// NOTE: more recent versions of zlib-ng no longer use the crc instructions on aarch64
#[cfg(target_arch = "aarch64")]
return crate::cpu_features::is_enabled_crc();
#[allow(unreachable_code)]
false
}
const HASH_CALC_OFFSET: usize = 0;
const HASH_CALC_MASK: u32 = (HASH_SIZE - 1) as u32;
#[cfg(target_arch = "x86")]
#[target_feature(enable = "sse4.2")]
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
unsafe { core::arch::x86::_mm_crc32_u32(h, val) }
}
#[cfg(target_arch = "x86_64")]
#[target_feature(enable = "sse4.2")]
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
unsafe { core::arch::x86_64::_mm_crc32_u32(h, val) }
}
#[cfg(target_arch = "aarch64")]
#[target_feature(enable = "neon")]
unsafe fn hash_calc(h: u32, val: u32) -> u32 {
unsafe { crate::crc32::acle::__crc32w(h, val) }
}
#[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
unsafe fn hash_calc(_h: u32, _val: u32) -> u32 {
assert!(!Self::is_supported());
unimplemented!("there is no hardware support on this platform")
}
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
pub unsafe fn update_hash(h: u32, val: u32) -> u32 {
(unsafe { Self::hash_calc(h, val) }) & Self::HASH_CALC_MASK
}
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
pub unsafe fn quick_insert_string(state: &mut State, string: usize) -> u16 {
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
let val = u32::from_le_bytes(slice[..4].try_into().unwrap());
let hm = unsafe { Self::update_hash(0, val) } as usize;
let head = state.head.as_slice()[hm];
if head != string as u16 {
state.prev.as_mut_slice()[string & state.w_mask] = head;
state.head.as_mut_slice()[hm] = string as u16;
}
head
}
#[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
#[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
#[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
pub unsafe fn insert_string(state: &mut State, string: usize, count: usize) {
let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
// it can happen that insufficient bytes are initialized
// .take(count) generates worse assembly
let slice = &slice[..Ord::min(slice.len(), count + 3)];
for (i, w) in slice.windows(4).enumerate() {
let idx = string as u16 + i as u16;
let val = u32::from_le_bytes(w.try_into().unwrap());
let hm = unsafe { Self::update_hash(0, val) } as usize;
let head = state.head.as_slice()[hm];
if head != idx {
@@ -128,6 +240,48 @@ impl RollHashCalc {
mod tests {
use super::*;
#[test]
#[cfg_attr(
not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")),
ignore = "no crc32 hardware support on this platform"
)]
fn crc32_hash_calc() {
if !Crc32HashCalc::is_supported() {
return;
}
unsafe {
if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
assert_eq!(Crc32HashCalc::hash_calc(0, 170926112), 500028708);
assert_eq!(Crc32HashCalc::hash_calc(0, 537538592), 3694129053);
assert_eq!(Crc32HashCalc::hash_calc(0, 538970672), 373925026);
assert_eq!(Crc32HashCalc::hash_calc(0, 538976266), 4149335727);
assert_eq!(Crc32HashCalc::hash_calc(0, 538976288), 1767342659);
assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 4090502627);
assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1744703325);
} else {
assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2067507791);
assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 2086141925);
assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 716394180);
assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1396070634);
assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 637105634);
}
}
}
#[test]
fn roll_hash_calc() {
assert_eq!(RollHashCalc::hash_calc(2565, 93), 82173);

View File

@@ -1,21 +1,37 @@
use crate::deflate::{Pos, State, MIN_LOOKAHEAD, STD_MAX_MATCH, STD_MIN_MATCH};
use crate::deflate::{State, MIN_LOOKAHEAD, STD_MAX_MATCH, STD_MIN_MATCH};
type Pos = u16;
const EARLY_EXIT_TRIGGER_LEVEL: i8 = 5;
/// Find the (length, offset) in the window of the longest match for the string
/// at offset cur_match
pub fn longest_match(state: &crate::deflate::State, cur_match: u16) -> (usize, u16) {
const UNALIGNED_OK: bool = cfg!(any(
target_arch = "wasm32",
target_arch = "x86",
target_arch = "x86_64",
target_arch = "arm",
target_arch = "aarch64",
target_arch = "powerpc64",
));
const UNALIGNED64_OK: bool = cfg!(any(
target_arch = "wasm32",
target_arch = "x86_64",
target_arch = "aarch64",
target_arch = "powerpc64",
));
pub fn longest_match(state: &crate::deflate::State, cur_match: u16) -> (usize, usize) {
longest_match_help::<false>(state, cur_match)
}
pub fn longest_match_slow(state: &crate::deflate::State, cur_match: u16) -> (usize, u16) {
pub fn longest_match_slow(state: &crate::deflate::State, cur_match: u16) -> (usize, usize) {
longest_match_help::<true>(state, cur_match)
}
fn longest_match_help<const SLOW: bool>(
state: &crate::deflate::State,
mut cur_match: u16,
) -> (usize, u16) {
) -> (usize, usize) {
let mut match_start = state.match_start;
let strstart = state.strstart;
@@ -26,12 +42,15 @@ fn longest_match_help<const SLOW: bool>(
let limit_base: Pos;
let early_exit: bool;
let mut chain_length: u16;
let mut chain_length: usize;
let mut best_len: usize;
let lookahead = state.lookahead;
let mut match_offset = 0;
let mut scan_start = [0u8; 8];
let mut scan_end = [0u8; 8];
macro_rules! goto_next_in_chain {
() => {
chain_length -= 1;
@@ -50,28 +69,38 @@ fn longest_match_help<const SLOW: bool>(
// The code is optimized for STD_MAX_MATCH-2 multiple of 16.
assert_eq!(STD_MAX_MATCH, 258, "Code too clever");
// length of the previous match (if any), hence <= STD_MAX_MATCH
best_len = if state.prev_length > 0 {
state.prev_length as usize
state.prev_length
} else {
STD_MIN_MATCH - 1
};
// Calculate read offset which should only extend an extra byte to find the next best match length.
let mut offset = best_len - 1;
if best_len >= core::mem::size_of::<u32>() {
if best_len >= core::mem::size_of::<u32>() && UNALIGNED_OK {
offset -= 2;
if best_len >= core::mem::size_of::<u64>() {
if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
offset -= 4;
}
}
if UNALIGNED64_OK {
scan_start.copy_from_slice(&scan[..core::mem::size_of::<u64>()]);
scan_end.copy_from_slice(&scan[offset..][..core::mem::size_of::<u64>()]);
} else if UNALIGNED_OK {
scan_start[..4].copy_from_slice(&scan[..core::mem::size_of::<u32>()]);
scan_end[..4].copy_from_slice(&scan[offset..][..core::mem::size_of::<u32>()]);
} else {
scan_start[..2].copy_from_slice(&scan[..core::mem::size_of::<u16>()]);
scan_end[..2].copy_from_slice(&scan[offset..][..core::mem::size_of::<u16>()]);
}
let mut mbase_start = window.as_ptr();
let mut mbase_end = window[offset..].as_ptr();
// Don't waste too much time by following a chain if we already have a good match
chain_length = state.max_chain_length;
if best_len >= state.good_match as usize {
if best_len >= state.good_match {
chain_length >>= 2;
}
let nice_match = state.nice_match;
@@ -127,9 +156,6 @@ fn longest_match_help<const SLOW: bool>(
early_exit = state.level < EARLY_EXIT_TRIGGER_LEVEL;
}
let scan_start = window[strstart..].as_ptr();
let mut scan_end = window[strstart + offset..].as_ptr();
assert!(
strstart <= state.window_size.saturating_sub(MIN_LOOKAHEAD),
"need lookahead"
@@ -178,47 +204,47 @@ fn longest_match_help<const SLOW: bool>(
// first, do a quick check on the start and end bytes. Go to the next item in the chain if
// these bytes don't match.
// SAFETY: we read up to 8 bytes in this block.
// Note that scan_start >= mbase_start and scan_end >= mbase_end.
// the surrounding loop breaks before cur_match gets past strstart, which is bounded by
// `window_size - 258 + 3 + 1` (`window_size - MIN_LOOKAHEAD`).
//
// With 262 bytes of space at the end, and 8 byte reads of scan_start is always in-bounds.
//
// scan_end is a bit trickier: it reads at a bounded offset from scan_start:
//
// - >= 8: scan_end is bounded by `258 - (4 + 2 + 1)`, so an 8-byte read is in-bounds
// - >= 4: scan_end is bounded by `258 - (2 + 1)`, so a 4-byte read is in-bounds
// - >= 2: scan_end is bounded by `258 - 1`, so a 2-byte read is in-bounds
let mut len = 0;
// SAFETY: we read up to 8 bytes in this block. scan_start and start_end are 8 byte arrays.
// this loop also breaks before cur_match gets past strstart, which is bounded by
// window_size - MIN_LOOKAHEAD, so 8 byte reads of mbase_end/start are in-bounds.
unsafe {
if best_len < core::mem::size_of::<u64>() {
let scan_val = u64::from_ne_bytes(
core::slice::from_raw_parts(scan_start, 8).try_into().unwrap());
loop {
let bs = mbase_start.wrapping_add(cur_match as usize);
let match_val = u64::from_ne_bytes(
core::slice::from_raw_parts(bs, 8).try_into().unwrap());
let cmp = scan_val ^ match_val;
if cmp == 0 {
// The first 8 bytes all matched. Additional scanning will be needed
// (the compare256 call below) to determine the full match length.
break;
let scan_start = scan_start.as_ptr();
let scan_end = scan_end.as_ptr();
if UNALIGNED_OK {
if best_len < core::mem::size_of::<u32>() {
loop {
if is_match::<2>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}
goto_next_in_chain!();
}
// Compute the number of leading bytes that match.
let cmp_len = cmp.to_le().trailing_zeros() as usize / 8;
if cmp_len > best_len {
// The match is fully contained within the 8 bytes just compared,
// so we know the match length without needing to do the more
// expensive compare256 operation.
len = cmp_len;
break;
} else if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
loop {
if is_match::<8>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}
goto_next_in_chain!();
}
} else {
loop {
if is_match::<4>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
break;
}
goto_next_in_chain!();
}
goto_next_in_chain!();
}
} else {
loop {
if is_match::<8>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
if memcmp_n_ptr::<2>(mbase_end.wrapping_add(cur_match as usize), scan_end)
&& memcmp_n_ptr::<2>(
mbase_start.wrapping_add(cur_match as usize),
scan.as_ptr(),
)
{
break;
}
@@ -228,17 +254,15 @@ fn longest_match_help<const SLOW: bool>(
}
// we know that there is at least some match. Now count how many bytes really match
if len == 0 {
len = {
// SAFETY: cur_match is bounded by window_size - MIN_LOOKAHEAD, where MIN_LOOKAHEAD
// is 258 + 3 + 1, so 258-byte reads of mbase_start are in-bounds.
let src1 = unsafe {
core::slice::from_raw_parts(mbase_start.wrapping_add(cur_match as usize + 2), 256)
};
crate::deflate::compare256::compare256_slice(&scan[2..], src1) + 2
let len = {
// SAFETY: cur_match is bounded by window_size - MIN_LOOKAHEAD, where MIN_LOOKAHEAD
// is 256 + 2, so 258-byte reads of mbase_start are in-bounds.
let src1 = unsafe {
core::slice::from_raw_parts(mbase_start.wrapping_add(cur_match as usize + 2), 256)
};
}
crate::deflate::compare256::compare256_slice(&scan[2..], src1) + 2
};
assert!(
scan.as_ptr() as usize + len <= window.as_ptr() as usize + (state.window_size - 1),
@@ -246,29 +270,35 @@ fn longest_match_help<const SLOW: bool>(
);
if len > best_len {
match_start = cur_match - match_offset;
match_start = (cur_match - match_offset) as usize;
/* Do not look for matches beyond the end of the input. */
if len > lookahead {
return (lookahead, match_start);
}
best_len = len;
if best_len >= nice_match as usize {
if best_len >= nice_match {
return (best_len, match_start);
}
offset = best_len - 1;
if best_len >= core::mem::size_of::<u32>() {
if best_len >= core::mem::size_of::<u32>() && UNALIGNED_OK {
offset -= 2;
if best_len >= core::mem::size_of::<u64>() {
if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
offset -= 4;
}
}
scan_end = window[strstart + offset..].as_ptr();
if UNALIGNED64_OK {
scan_end.copy_from_slice(&scan[offset..][..core::mem::size_of::<u64>()]);
} else if UNALIGNED_OK {
scan_end[..4].copy_from_slice(&scan[offset..][..core::mem::size_of::<u32>()]);
} else {
scan_end[..2].copy_from_slice(&scan[offset..][..core::mem::size_of::<u16>()]);
}
// Look for a better string offset
if SLOW && len > STD_MIN_MATCH && match_start as usize + len < strstart {
if SLOW && len > STD_MIN_MATCH && match_start + len < strstart {
let mut pos: Pos;
// uint32_t i, hash;
// unsigned char *scan_endstr;
@@ -335,6 +365,6 @@ fn longest_match_help<const SLOW: bool>(
(best_len, match_start)
}
fn break_matching(state: &State, best_len: usize, match_start: u16) -> (usize, u16) {
fn break_matching(state: &State, best_len: usize, match_start: usize) -> (usize, usize) {
(Ord::min(best_len, state.lookahead), match_start)
}

View File

@@ -80,7 +80,7 @@ impl<'a> Pending<'a> {
pub(crate) fn new_in(alloc: &Allocator<'a>, len: usize) -> Option<Self> {
let ptr = alloc.allocate_slice_raw::<MaybeUninit<u8>>(len)?;
// SAFETY: freshly allocated buffer
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) };
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) };
Some(Self {
buf,

View File

@@ -70,27 +70,6 @@ pub const STATIC_LTREE: [Value; L_CODES + 2] = [
h(163,8), h( 99,8), h(227,8)
];
/// Precomputes the `Values` generated by the `encode_len` function for all `u8` input values
#[rustfmt::skip]
pub const STATIC_LTREE_ENCODINGS: [Value; 256] = {
let mut table = [Value::new(0, 0); 256];
let mut lc = 0;
while lc < table.len() {
let (code, len) = super::encode_len(&STATIC_LTREE, lc as u8);
// assert that there is no precision loss
assert!(code as u16 as u64 == code);
assert!(len as u16 as usize == len);
table[lc] = Value::new(code as u16, len as u16);
lc += 1;
}
table
};
#[rustfmt::skip]
pub const STATIC_DTREE: [Value; D_CODES] = [
h( 0,5), h(16,5), h( 8,5), h(24,5), h( 4,5),

View File

@@ -1,22 +1,33 @@
use crate::{allocate::Allocator, weak_slice::WeakSliceMut};
use core::mem::MaybeUninit;
#[derive(Debug)]
pub struct Window<'a> {
// the full window allocation. This is longer than w_size so that operations don't need to
// perform bounds checks.
buf: WeakSliceMut<'a, u8>,
buf: WeakSliceMut<'a, MaybeUninit<u8>>,
// number of initialized bytes
filled: usize,
window_bits: usize,
high_water: usize,
}
impl<'a> Window<'a> {
pub fn new_in(alloc: &Allocator<'a>, window_bits: usize) -> Option<Self> {
let len = 2 * ((1 << window_bits) + Self::padding());
let ptr = alloc.allocate_zeroed(len)?;
let ptr = alloc.allocate_slice_raw::<MaybeUninit<u8>>(len)?;
// SAFETY: freshly allocated buffer
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) };
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) };
Some(Self { buf, window_bits })
Some(Self {
buf,
filled: 0,
window_bits,
high_water: 0,
})
}
pub fn clone_in(&self, alloc: &Allocator<'a>) -> Option<Self> {
@@ -26,6 +37,8 @@ impl<'a> Window<'a> {
.buf
.as_mut_slice()
.copy_from_slice(self.buf.as_slice());
clone.filled = self.filled;
clone.high_water = self.high_water;
Some(clone)
}
@@ -48,14 +61,14 @@ impl<'a> Window<'a> {
#[inline]
pub fn filled(&self) -> &[u8] {
// SAFETY: `self.buf` has been initialized for at least `filled` elements
unsafe { core::slice::from_raw_parts(self.buf.as_ptr().cast(), self.buf.len()) }
unsafe { core::slice::from_raw_parts(self.buf.as_ptr().cast(), self.filled) }
}
/// Returns a mutable reference to the filled portion of the buffer.
#[inline]
pub fn filled_mut(&mut self) -> &mut [u8] {
// SAFETY: `self.buf` has been initialized for at least `filled` elements
unsafe { core::slice::from_raw_parts_mut(self.buf.as_mut_ptr().cast(), self.buf.len()) }
unsafe { core::slice::from_raw_parts_mut(self.buf.as_mut_ptr().cast(), self.filled) }
}
/// # Safety
@@ -64,8 +77,66 @@ impl<'a> Window<'a> {
pub unsafe fn copy_and_initialize(&mut self, range: core::ops::Range<usize>, src: *const u8) {
let (start, end) = (range.start, range.end);
let dst = self.buf.as_mut_slice()[range].as_mut_ptr();
let dst = self.buf.as_mut_slice()[range].as_mut_ptr() as *mut u8;
unsafe { core::ptr::copy_nonoverlapping(src, dst, end - start) };
if start >= self.filled {
self.filled = Ord::max(self.filled, end);
}
self.high_water = Ord::max(self.high_water, self.filled);
}
// this library has many functions that operated in a chunked fashion on memory. For
// performance, we want to minimize bounds checks. Therefore we reserve initialize some extra
// memory at the end of the window so that chunked operations can use the whole buffer. If they
// go slightly over `self.capacity` that's okay, we account for that here by making sure the
// memory there is initialized!
pub fn initialize_out_of_bounds(&mut self) {
const WIN_INIT: usize = crate::deflate::STD_MAX_MATCH;
// If the WIN_INIT bytes after the end of the current data have never been
// written, then zero those bytes in order to avoid memory check reports of
// the use of uninitialized (or uninitialised as Julian writes) bytes by
// the longest match routines. Update the high water mark for the next
// time through here. WIN_INIT is set to STD_MAX_MATCH since the longest match
// routines allow scanning to strstart + STD_MAX_MATCH, ignoring lookahead.
if self.high_water < self.capacity() {
let curr = self.filled().len();
if self.high_water < curr {
// Previous high water mark below current data -- zero WIN_INIT
// bytes or up to end of window, whichever is less.
let init = Ord::min(self.capacity() - curr, WIN_INIT);
self.buf.as_mut_slice()[curr..][..init].fill(MaybeUninit::new(0));
self.high_water = curr + init;
self.filled += init;
} else if self.high_water < curr + WIN_INIT {
// High water mark at or above current data, but below current data
// plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
// to end of window, whichever is less.
let init = Ord::min(
curr + WIN_INIT - self.high_water,
self.capacity() - self.high_water,
);
self.buf.as_mut_slice()[self.high_water..][..init].fill(MaybeUninit::new(0));
self.high_water += init;
self.filled += init;
}
}
}
pub fn initialize_at_least(&mut self, at_least: usize) {
let end = at_least.clamp(self.high_water, self.buf.len());
self.buf.as_mut_slice()[self.high_water..end].fill(MaybeUninit::new(0));
self.high_water = end;
self.filled = end;
}
// padding required so that SIMD operations going out-of-bounds are not a problem

View File

@@ -14,7 +14,6 @@ mod writer;
use crate::allocate::Allocator;
use crate::c_api::internal_state;
use crate::cpu_features::CpuFeatures;
use crate::{
adler32::adler32,
c_api::{gz_header, z_checksum, z_size, z_stream, Z_DEFLATED},
@@ -1857,28 +1856,7 @@ impl State<'_> {
}
}
fn inflate_fast_help(state: &mut State, start: usize) {
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
if crate::cpu_features::is_enabled_avx2() {
// SAFETY: we've verified the target features
return unsafe { inflate_fast_help_avx2(state, start) };
}
inflate_fast_help_vanilla(state, start);
}
#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
#[target_feature(enable = "avx2")]
unsafe fn inflate_fast_help_avx2(state: &mut State, start: usize) {
inflate_fast_help_impl::<{ CpuFeatures::AVX2 }>(state, start);
}
fn inflate_fast_help_vanilla(state: &mut State, start: usize) {
inflate_fast_help_impl::<{ CpuFeatures::NONE }>(state, start);
}
#[inline(always)]
fn inflate_fast_help_impl<const FEATURES: usize>(state: &mut State, _start: usize) {
fn inflate_fast_help(state: &mut State, _start: usize) {
let mut bit_reader = BitReader::new(&[]);
core::mem::swap(&mut bit_reader, &mut state.bit_reader);
@@ -2010,32 +1988,23 @@ fn inflate_fast_help_impl<const FEATURES: usize>(state: &mut State, _start: usiz
// window, and part of it has wrapped around to the start. Copy
// the end section here, the start section will be copied below.
len -= op as u16;
writer.extend_from_window_with_features::<FEATURES>(
&state.window,
from..from + op,
);
writer.extend_from_window(&state.window, from..from + op);
from = 0;
op = window_next;
}
}
let copy = Ord::min(op, len as usize);
writer.extend_from_window_with_features::<FEATURES>(
&state.window,
from..from + copy,
);
writer.extend_from_window(&state.window, from..from + copy);
if op < len as usize {
// here we need some bytes from the output itself
writer.copy_match_with_features::<FEATURES>(
dist as usize,
len as usize - op,
);
writer.copy_match(dist as usize, len as usize - op);
}
} else if extra_safe {
todo!()
} else {
writer.copy_match_with_features::<FEATURES>(dist as usize, len as usize)
writer.copy_match(dist as usize, len as usize)
}
} else if (op & 64) == 0 {
// 2nd level distance code
@@ -2152,9 +2121,8 @@ pub fn init(stream: &mut z_stream, config: InflateConfig) -> ReturnCode {
return ReturnCode::MemError;
};
// FIXME: write is stable for NonNull since 1.80.0
unsafe { state_allocation.as_ptr().write(state) };
stream.state = state_allocation.as_ptr() as *mut internal_state;
unsafe { state_allocation.write(state) };
stream.state = state_allocation as *mut internal_state;
// SAFETY: we've correctly initialized the stream to be an InflateStream
let ret = if let Some(stream) = unsafe { InflateStream::from_stream_mut(stream) } {
@@ -2502,7 +2470,7 @@ pub unsafe fn copy<'a>(
if !state.window.is_empty() {
let Some(window) = state.window.clone_in(&source.alloc) else {
// SAFETY: state_allocation is not used again.
source.alloc.deallocate(state_allocation.as_ptr(), 1);
source.alloc.deallocate(state_allocation, 1);
return ReturnCode::MemError;
};
@@ -2510,11 +2478,11 @@ pub unsafe fn copy<'a>(
}
// write the cloned state into state_ptr
unsafe { state_allocation.as_ptr().write(copy) }; // FIXME: write is stable for NonNull since 1.80.0
unsafe { state_allocation.write(copy) };
// insert the state_ptr into `dest`
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) };
unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation.as_ptr()) };
unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation) };
// update the writer; it cannot be cloned so we need to use some shennanigans
let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state.writer) };

View File

@@ -147,10 +147,14 @@ impl<'a> Window<'a> {
pub fn new_in(alloc: &Allocator<'a>, window_bits: usize) -> Option<Self> {
let len = (1 << window_bits) + Self::padding();
let ptr = alloc.allocate_zeroed(len)?;
let ptr = alloc.allocate_zeroed(len);
if ptr.is_null() {
return None;
}
Some(Self {
buf: unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) },
buf: unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) },
have: 0,
next: 0,
})
@@ -158,10 +162,14 @@ impl<'a> Window<'a> {
pub fn clone_in(&self, alloc: &Allocator<'a>) -> Option<Self> {
let len = self.buf.len();
let ptr = alloc.allocate_zeroed(len)?;
let ptr = alloc.allocate_zeroed(len);
if ptr.is_null() {
return None;
}
Some(Self {
buf: unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) },
buf: unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) },
have: self.have,
next: self.next,
})

View File

@@ -2,7 +2,6 @@ use core::fmt;
use core::mem::MaybeUninit;
use core::ops::Range;
use crate::cpu_features::CpuFeatures;
use crate::weak_slice::WeakSliceMut;
pub struct Writer<'a> {
@@ -78,34 +77,10 @@ impl<'a> Writer<'a> {
#[inline(always)]
pub fn extend_from_window(&mut self, window: &super::window::Window, range: Range<usize>) {
self.extend_from_window_with_features::<{ CpuFeatures::NONE }>(window, range)
}
pub fn extend_from_window_with_features<const FEATURES: usize>(
&mut self,
window: &super::window::Window,
range: Range<usize>,
) {
match FEATURES {
#[cfg(target_arch = "x86_64")]
CpuFeatures::AVX2 => {
self.extend_from_window_help::<core::arch::x86_64::__m256i>(window, range)
}
_ => self.extend_from_window_runtime_dispatch(window, range),
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx512() {
return self.extend_from_window_help::<core::arch::x86_64::__m512i>(window, range);
}
}
fn extend_from_window_runtime_dispatch(
&mut self,
window: &super::window::Window,
range: Range<usize>,
) {
// NOTE: the dynamic check for avx512 makes avx2 slower. Measure this carefully before re-enabling
//
// #[cfg(target_arch = "x86_64")]
// if crate::cpu_features::is_enabled_avx512() {
// return self.extend_from_window_help::<core::arch::x86_64::__m512i>(window, range);
// }
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {
@@ -163,31 +138,10 @@ impl<'a> Writer<'a> {
#[inline(always)]
pub fn copy_match(&mut self, offset_from_end: usize, length: usize) {
self.copy_match_with_features::<{ CpuFeatures::NONE }>(offset_from_end, length)
}
#[inline(always)]
pub fn copy_match_with_features<const FEATURES: usize>(
&mut self,
offset_from_end: usize,
length: usize,
) {
match FEATURES {
#[cfg(target_arch = "x86_64")]
CpuFeatures::AVX2 => {
self.copy_match_help::<core::arch::x86_64::__m256i>(offset_from_end, length)
}
_ => self.copy_match_runtime_dispatch(offset_from_end, length),
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx512() {
return self.copy_match_help::<core::arch::x86_64::__m512i>(offset_from_end, length);
}
}
fn copy_match_runtime_dispatch(&mut self, offset_from_end: usize, length: usize) {
// NOTE: the dynamic check for avx512 makes avx2 slower. Measure this carefully before re-enabling
//
// #[cfg(target_arch = "x86_64")]
// if crate::cpu_features::is_enabled_avx512() {
// return self.copy_match_help::<core::arch::x86_64::__m512i>(offset_from_end, length);
// }
#[cfg(target_arch = "x86_64")]
if crate::cpu_features::is_enabled_avx2() {

View File

@@ -71,10 +71,14 @@ impl<'a> ReadBuf<'a> {
}
pub(crate) fn new_in(alloc: &Allocator<'a>, len: usize) -> Option<Self> {
let ptr = alloc.allocate_zeroed(len)?;
let ptr = alloc.allocate_zeroed(len);
if ptr.is_null() {
return None;
}
// safety: all elements are now initialized
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) };
let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) };
Some(Self { buf, filled: 0 })
}