Backed out changeset 96d954eae2a0 (bug 1949947) requested by glandium.

2025-03-12 02:25:38 +02:00
parent 687f08a875
commit 5b89d7c1ef
29 changed files with 725 additions and 741 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -3576,9 +3576,9 @@ dependencies = [

 [[package]]
 name = "libz-rs-sys"
-version = "0.4.2"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "902bc563b5d65ad9bba616b490842ef0651066a1a1dc3ce1087113ffcb873c8d"
+checksum = "a90e19106f1b2c93f1fa6cdeec2e56facbf2e403559c1e1c0ddcc6d46e979cdf"
 dependencies = [
 "zlib-rs",
 ]
@@ -7855,6 +7855,6 @@ dependencies = [

 [[package]]
 name = "zlib-rs"
-version = "0.4.2"
+version = "0.4.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05"
+checksum = "aada01553a9312bad4b9569035a1f12b05e5ec9770a1a4b323757356928944f8"
--- a/supply-chain/audits.toml
+++ b/supply-chain/audits.toml
@@ -3093,8 +3093,8 @@ delta = "0.28.0 -> 0.31.0"
 [[audits.libz-rs-sys]]
 who = "Mike Hommey <mh+mozilla@glandium.org>"
 criteria = "safe-to-deploy"
-delta = "0.4.1 -> 0.4.2"
-notes = "Only documentation changes."
+delta = "0.2.1 -> 0.2.1@git:4aa430ccb77537d0d60dab8db993ca51bb1194c5"
+importable = false

 [[audits.linked-hash-map]]
 who = "Aria Beingessner <a.beingessner@gmail.com>"
--- a/supply-chain/imports.lock
+++ b/supply-chain/imports.lock
@@ -1940,11 +1940,6 @@ who = "Ameer Ghani <inahga@divviup.org>"
 criteria = "safe-to-deploy"
 delta = "0.4.0 -> 0.4.1"

-[[audits.isrg.audits.zlib-rs]]
-who = "Ameer Ghani <inahga@divviup.org>"
-criteria = "safe-to-deploy"
-delta = "0.4.1 -> 0.4.2"
-
 [[audits.mozilla.wildcard-audits.weedle2]]
 who = "Jan-Erik Rediger <jrediger@mozilla.com>"
 criteria = "safe-to-deploy"
--- a/third_party/rust/libz-rs-sys/.cargo-checksum.json
+++ b/third_party/rust/libz-rs-sys/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"Cargo.lock":"4e51d6269a51ea8732fd452060a11235871dc8209a06f711e8515517b0ea47ad","Cargo.toml":"50c7e977783911c7b8c09b75c6a2d129c3c5be709dbfb78aefdad22cef6849ce","LICENSE":"e72111c52b7d96ebe25348dee19f0744f444d3c95ae6b1ecb6ccaecc5bce05ba","README.md":"e0a044d7f02db9dfe9b7462ccf527207c591d8ac226d6465e125e98b88111d9b","src/lib.rs":"1c85cd2696e769762c7c37ca7dff8109fb7491f10b838bb5e837bce39b8c9aa3"},"package":"902bc563b5d65ad9bba616b490842ef0651066a1a1dc3ce1087113ffcb873c8d"}
+{"files":{"Cargo.toml":"33f49be9129ddbe0c32b7bf627a67b7e1e239cfd7aa9e1c9414844f68e7afbc7","LICENSE":"e72111c52b7d96ebe25348dee19f0744f444d3c95ae6b1ecb6ccaecc5bce05ba","README.md":"46f48b56018d0efef5738be7d930019631899dede51ee5e92f44bd53f6e26749","src/lib.rs":"eb21c2e4d653c6f4a781a81492ebc4483175c4609078d534e7f15e6e0e095a56"},"package":"a90e19106f1b2c93f1fa6cdeec2e56facbf2e403559c1e1c0ddcc6d46e979cdf"}
--- a/third_party/rust/libz-rs-sys/Cargo.lock
+++ b/third_party/rust/libz-rs-sys/Cargo.lock
@@ -1,16 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "libz-rs-sys"
-version = "0.4.2"
-dependencies = [
- "zlib-rs",
-]
-
-[[package]]
-name = "zlib-rs"
-version = "0.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05"
--- a/third_party/rust/libz-rs-sys/Cargo.toml
+++ b/third_party/rust/libz-rs-sys/Cargo.toml
@@ -13,7 +13,7 @@
 edition = "2021"
 rust-version = "1.75"
 name = "libz-rs-sys"
-version = "0.4.2"
+version = "0.4.1"
 build = false
 publish = true
 autolib = false
@@ -32,7 +32,7 @@ name = "libz_rs_sys"
 path = "src/lib.rs"

 [dependencies.zlib-rs]
-version = "0.4.2"
+version = "0.4.1"
 default-features = false

 [features]
--- a/third_party/rust/libz-rs-sys/README.md
+++ b/third_party/rust/libz-rs-sys/README.md
@@ -3,7 +3,7 @@ This crate is a C API for [zlib-rs](https://docs.rs/zlib-rs/latest/zlib_rs/). Th
 From a rust perspective, this API is not very ergonomic. Use the [`flate2`](https://crates.io/crates/flate2) crate for a more
 ergonomic rust interface to zlib.

-## Features
+# Features

 **`custom-prefix`**

@@ -29,13 +29,13 @@ Pick the default allocator implementation that is used if no `zalloc` and `zfree
 - `c-allocator`: use `malloc`/`free` for the implementation of `zalloc` and `zfree`
 - `rust-allocator`: the rust global allocator for the implementation of `zalloc` and `zfree`

-The `rust-allocator` is the default when this crate is used as a rust dependency, and slightly more efficient because alignment is handled by the allocator. When building a dynamic library, it may make sense to use `c-allocator` instead.
+The `rust-allocator` is the default when this crate is used as a rust dependency, and slightly more efficient because alignment is handled by the allocator. When building a dynamic library, it may make sense to use `c-allocator` instead. 

 **`std`**

 Assume that `std` is available. When this feature is turned off, this crate is compatible with `#![no_std]`.

-## Example
+# Example

 This example compresses ("deflates") the string `"Hello, World!"` and then decompresses
 ("inflates") it again.
@@ -88,39 +88,3 @@ let inflated = &output[..strm.total_out as usize];
 assert_eq!(inflated, input.as_bytes())
 ```

-## Compression Levels
-
-The zlib library supports compression levels 0 up to and including 9. The level indicates a tradeoff between time spent on the compression versus the compression ratio, the factor by which the input is reduced in size:
-
- level 0: no compression at all
- level 1: fastest compression
- level 6: default (a good tradeoff between speed and compression ratio)
- level 9: best compression
-
-Beyond this intuition, the exact behavior of the compression levels is not specified. The implementation of `zlib-rs` follows the implementation of [`zlig-ng`](https://github.com/zlib-ng/zlib-ng), and deviates from the one in stock zlib.
-
-In particular, our compression level 1 is extremely fast, but also just does not compress that well. On the `silesia-small.tar` input file, we see these output sizes:
-
-| implementation | compression level | output size (mb) |
-| --- | --- | --- |
-| -     | 0 | `15.74` |
-| stock | 1 | ` 7.05` |
-| rs    | 1 | ` 8.52` |
-| rs    | 2 | ` 6.90` |
-| rs    | 4 | ` 6.55` |
-
-But, `zlib-rs` is much faster than stock zlib. In our benchmarks, it is only at level 4 that we spend roughly as much time as stock zlib on level 1:
-
-| implementation | compression level | wall time (ms) |
-| --- | --- | --- |
-| stock | 1 | 185 |
-| rs | 2 | 139 |
-| rs | 4 | 181 |
-
-In our example, the main options are:
-
- level 1: worse compression, but much faster
- level 2: equivalent compression, but significantly faster
- level 4: better compression, at the same speed
-
-In summary, when you upgrade from stock zlib, we recommend that you benchmark on your data and target platform, and pick the right compression level for your use case. 
--- a/third_party/rust/libz-rs-sys/src/lib.rs
+++ b/third_party/rust/libz-rs-sys/src/lib.rs
@@ -712,7 +712,7 @@ pub unsafe extern "C-unwind" fn inflateReset(strm: *mut z_stream) -> i32 {
 ///
 /// - [`Z_OK`] if success
 /// - [`Z_STREAM_ERROR`] if the source stream state was inconsistent, or if the `windowBits`
-///   parameter is invalid
+///     parameter is invalid
 ///
 /// # Safety
 ///
@@ -788,8 +788,8 @@ pub unsafe extern "C-unwind" fn inflateSetDictionary(
 /// - The `text`, `time`, `xflags`, and `os` fields are filled in with the gzip header contents.
 /// - `hcrc` is set to true if there is a header CRC. (The header CRC was valid if done is set to one.)
 /// - If `extra` is not `NULL`, then `extra_max` contains the maximum number of bytes to write to extra.
-///   Once `done` is `true`, `extra_len` contains the actual extra field length,
-///   and `extra` contains the extra field, or that field truncated if `extra_max` is less than `extra_len`.
+///     Once `done` is `true`, `extra_len` contains the actual extra field length,
+///     and `extra` contains the extra field, or that field truncated if `extra_max` is less than `extra_len`.
 /// - If `name` is not `NULL`, then up to `name_max` characters are written there, terminated with a zero unless the length is greater than `name_max`.
 /// - If `comment` is not `NULL`, then up to `comm_max` characters are written there, terminated with a zero unless the length is greater than `comm_max`.
 ///
--- a/third_party/rust/zlib-rs/.cargo-checksum.json
+++ b/third_party/rust/zlib-rs/.cargo-checksum.json
@@ -1 +1 @@
-{"files":{"Cargo.lock":"8f77aba472fab186eed3ed07f3d7c0160536107dc86ea521f02917ecf930afab","Cargo.toml":"4a7d43e17e2e2f35c327c37921ec615e35849896c8b44e6a28ff759b33888380","LICENSE":"e72111c52b7d96ebe25348dee19f0744f444d3c95ae6b1ecb6ccaecc5bce05ba","README.md":"9938581c82330440be5f3b6b9125cc02c0874b250dc62093f167bf2158dbe29a","src/adler32.rs":"5077a887f8ed1b54ce436f4c50765cfb2773bde20b0ea21ca938dd6faf986fb7","src/adler32/avx2.rs":"b484a541efe367774248fb0a562ea9a8b788b48e721d68873b91e1d135f9af6b","src/adler32/generic.rs":"4ee8d80f0fdfae891e036592ca86a0ecc0817c1ece7e9057bab5f2d17669657a","src/adler32/neon.rs":"46efef19fce0fa81dc4015508781f7e686203251723c169fb71d92c07cfa58ac","src/adler32/wasm.rs":"235628f7ad37316ddfa6a62e2811ce5f90e2673247a2df46d64ad04708a548c7","src/allocate.rs":"398f6d622444cede8c828b56f0e60f1d4b97b5da258cdf6980123c3c0b6589d6","src/c_api.rs":"8328f52b477beecfc7f68240c40e775516cb773dad8c46cf24aad178c4ea0f6c","src/cpu_features.rs":"b9c7df9afd1e09787c182594ed9ce995e0cd449ec6051e3ba2d11f35574f0c78","src/crc32.rs":"8a67c0f5eee767399344a93f2a6f314127a8b6d8fd5b332cc51d42c7995ddeaf","src/crc32/acle.rs":"de881ff90d938f9ff38bfaa3f51c74ca75f9d5152a0427e145c28fe27bd40209","src/crc32/braid.rs":"e1f25477829f43b46529aded6b52c2094990d6c2dbe8d6ddcf907e80fc1ae0e0","src/crc32/combine.rs":"a1aded8c5f1886f60daad9765886299c65feb5240f24f8e9f67ebced14e267f0","src/crc32/pclmulqdq.rs":"70e4fa9d408ca804f496403d87f56e51a6a58c808750e90854aaa03af2143bc2","src/deflate.rs":"60f14c532a3dbe0cc7fca4926a5cb680011d7ad37edf6b98ef4e8b8dd247d271","src/deflate/algorithm/fast.rs":"7009bda9f552d1b158cd030a9f1db015bbde807f31808709533df5cdcb19f47c","src/deflate/algorithm/huff.rs":"2ed0a098571d4e056bb4e1d8655ec8d37e6d291ba3e2d5d7c581c2486e6abbce","src/deflate/algorithm/medium.rs":"88fd17246a8f4e9f98f3868fe4d8081894d4f5942de89a689c399976b0e25c43","src/deflate/algorithm/mod.rs":"184151cde5952a4ff0029c6647705be5f884d558bd8552a3d29f9c7a16598c93","src/deflate/algorithm/quick.rs":"7e59bcfb9994ac9c89d34781f2b175135c134fab9595c4fe00e3cb5fa7170cac","src/deflate/algorithm/rle.rs":"549427a5a8a69610afd612f89a9cbde97fe78c38c85442083b5dde10e8be4d73","src/deflate/algorithm/slow.rs":"3da90214d031f178f9bc896abadcbb67ad4c790d6fa0560f4bd63121f439e659","src/deflate/algorithm/stored.rs":"40c60591307409a5ea5f8e8505fbae70ca4a69a258b151c829db9bc5ffe02e04","src/deflate/compare256.rs":"3398a810b1cf86114523329e357684dc9cbd9f5793783d101d5994206504f2d8","src/deflate/hash_calc.rs":"9a148125444f822c80a25e8215d406b77000799ced223e229e86b0c96c69cf8b","src/deflate/longest_match.rs":"53edbca5db81df33cc0aa25527c7f24e0a32b0c09d0c1d131cb9cc492e8b28e7","src/deflate/pending.rs":"0f476c2d43429c864ce8545e65137af57369f907d9f884a66b166e40d7dbc510","src/deflate/slide_hash.rs":"6069f7d02259a6defafa67c6397b0388bf6220fec3b7d15b1deb63bce4288de5","src/deflate/test-data/inflate_buf_error.dat":"254f280f8f1e8914bd12d8bcd3813e5c08083b1b46d8d643c8e2ebe109e75cb8","src/deflate/test-data/paper-100k.pdf":"60f73a051b7ca35bfec44734b2eed7736cb5c0b7f728beb7b97ade6c5e44849b","src/deflate/trees_tbl.rs":"cbb897a638b6fa7bd007b8394dbaa5ac52c6d935e21e9593d66e8c348b6e44c7","src/deflate/window.rs":"c7f1ec7e0c3ffe38608b35e839e198cd6f3676ecb08a741b1a5265236eceb7b1","src/inflate.rs":"ceabfa271c2969a2635caa25a030f51c64e0d41e26f8351e29cd92cae8e3a694","src/inflate/bitreader.rs":"cac96b20be765bd1645219145f6a1dacdff6f34b9f0c3bb66cdae6d1a9aa574d","src/inflate/inffixed_tbl.rs":"eb1ed1927ca07b61fe30ae8461ce62e7da28c595416e687a26db57c8eac8f4a1","src/inflate/inftrees.rs":"44efb568c9cc2dbbc6c51e50f3cc38d6c8e896b93936f47b3879396fc814abfe","src/inflate/window.rs":"f0b65ef776685c64a03b75491e9f60ecad3279eac01f549a3e859023102000d2","src/inflate/writer.rs":"4b3260eec9e8d726fc3b7107ea4d96a0c11dfb2ccea5621b87629c7130527d54","src/lib.rs":"d47f73e77dcfeb1f1a8063def826a82f491f0cbe06914aa89bf30db06acf2a00","src/read_buf.rs":"e9724f21763589cfd3e2c3bca6b6b1cbf5b3bec9445e19eb7947a1fb2a2064ff","src/weak_slice.rs":"1a2075ba9bbd7c3305282c17b7467c66c5c0c464be3346fb2d25b2c73c62792c"},"package":"8b20717f0917c908dc63de2e44e97f1e6b126ca58d0e391cee86d504eb8fbd05"}
+{"files":{"Cargo.toml":"bead245e487afcf0b64a5d633761ecb8121feefaee2054e5b09f39d8e5457784","LICENSE":"e72111c52b7d96ebe25348dee19f0744f444d3c95ae6b1ecb6ccaecc5bce05ba","README.md":"9938581c82330440be5f3b6b9125cc02c0874b250dc62093f167bf2158dbe29a","src/adler32.rs":"5077a887f8ed1b54ce436f4c50765cfb2773bde20b0ea21ca938dd6faf986fb7","src/adler32/avx2.rs":"b484a541efe367774248fb0a562ea9a8b788b48e721d68873b91e1d135f9af6b","src/adler32/generic.rs":"4ee8d80f0fdfae891e036592ca86a0ecc0817c1ece7e9057bab5f2d17669657a","src/adler32/neon.rs":"46efef19fce0fa81dc4015508781f7e686203251723c169fb71d92c07cfa58ac","src/adler32/wasm.rs":"235628f7ad37316ddfa6a62e2811ce5f90e2673247a2df46d64ad04708a548c7","src/allocate.rs":"d5afe99477b7cb22b5fbbd59860e91dcfbdc3a1b0f622bb98876d7eba994ac3a","src/c_api.rs":"8328f52b477beecfc7f68240c40e775516cb773dad8c46cf24aad178c4ea0f6c","src/cpu_features.rs":"67f44433971d7ae094dbe9d67b14170bc1f5eed585cbf707a21a0425f7233afa","src/crc32.rs":"20397402d6692304354f73542315c90dd5311c62e6a21f9d7d7d20e00f9234ac","src/crc32/acle.rs":"2eebb297ca47d0ad4cc49e455c42e48a4a2f58b885b3da63a0a9f7961f2e95f3","src/crc32/braid.rs":"e1f25477829f43b46529aded6b52c2094990d6c2dbe8d6ddcf907e80fc1ae0e0","src/crc32/combine.rs":"a1aded8c5f1886f60daad9765886299c65feb5240f24f8e9f67ebced14e267f0","src/crc32/pclmulqdq.rs":"70e4fa9d408ca804f496403d87f56e51a6a58c808750e90854aaa03af2143bc2","src/deflate.rs":"b4a57cd6057d7a83bdbca8423d02fbe8eb7cf6ec6b5a6e0f97fc812ea93e5ba4","src/deflate/algorithm/fast.rs":"686c0a35c1baff2d842287354f919e166fe5eca1748ad46ed14d6127611bffa0","src/deflate/algorithm/huff.rs":"2ed0a098571d4e056bb4e1d8655ec8d37e6d291ba3e2d5d7c581c2486e6abbce","src/deflate/algorithm/medium.rs":"03237619c654ee48ce176c7a6dd685025634fa9686d1c0602066b07d13659f10","src/deflate/algorithm/mod.rs":"184151cde5952a4ff0029c6647705be5f884d558bd8552a3d29f9c7a16598c93","src/deflate/algorithm/quick.rs":"8d44e91a21de91316b6bf577f3b4318e1895a4aeae3afafad44ff5db0c7fb2f8","src/deflate/algorithm/rle.rs":"549427a5a8a69610afd612f89a9cbde97fe78c38c85442083b5dde10e8be4d73","src/deflate/algorithm/slow.rs":"2fa351c77604fad7d5e113ed3b90ba2abc83be0ff589a0e367d012aee5ce967b","src/deflate/algorithm/stored.rs":"40c60591307409a5ea5f8e8505fbae70ca4a69a258b151c829db9bc5ffe02e04","src/deflate/compare256.rs":"3398a810b1cf86114523329e357684dc9cbd9f5793783d101d5994206504f2d8","src/deflate/hash_calc.rs":"057bc4d1cde94860c1e66d675c05786ccd80c797409bf0c68d6e258756e0e30b","src/deflate/longest_match.rs":"f164f072061ad5724217d213a43207d6ba7f8df31b6dec141878ec7df9b9761b","src/deflate/pending.rs":"1212cd8b301c0ccf1eb2532a16465dd1d296b9e4ad061c8fc448d68904b22b03","src/deflate/slide_hash.rs":"6069f7d02259a6defafa67c6397b0388bf6220fec3b7d15b1deb63bce4288de5","src/deflate/test-data/inflate_buf_error.dat":"254f280f8f1e8914bd12d8bcd3813e5c08083b1b46d8d643c8e2ebe109e75cb8","src/deflate/test-data/paper-100k.pdf":"60f73a051b7ca35bfec44734b2eed7736cb5c0b7f728beb7b97ade6c5e44849b","src/deflate/trees_tbl.rs":"503c65c7648405619a95dc9f5a52ecd558e439e870c116f61ef94128c6a4c52e","src/deflate/window.rs":"f864752ef33615f73fab2e2033358e1915f55a999738289b93a478d89e107557","src/inflate.rs":"6ada7430c815e4fec0a29ba78ef53b1ae7d231465d3586fc47be722678ae22a8","src/inflate/bitreader.rs":"cac96b20be765bd1645219145f6a1dacdff6f34b9f0c3bb66cdae6d1a9aa574d","src/inflate/inffixed_tbl.rs":"eb1ed1927ca07b61fe30ae8461ce62e7da28c595416e687a26db57c8eac8f4a1","src/inflate/inftrees.rs":"44efb568c9cc2dbbc6c51e50f3cc38d6c8e896b93936f47b3879396fc814abfe","src/inflate/window.rs":"55eb946c50bc8798c9965b44227364decb36ff89969530d78dd5f96a3e09f6b8","src/inflate/writer.rs":"bb0968dbc6f8f881ffe49bcad3fd5037eecdbdb0e04e24661c276e924f67ee31","src/lib.rs":"d47f73e77dcfeb1f1a8063def826a82f491f0cbe06914aa89bf30db06acf2a00","src/read_buf.rs":"9b79c1c3aa0454758d0a4ab8365e62b34e33fdb57590755d9f09c52f75b038ee","src/weak_slice.rs":"1a2075ba9bbd7c3305282c17b7467c66c5c0c464be3346fb2d25b2c73c62792c"},"package":"aada01553a9312bad4b9569035a1f12b05e5ec9770a1a4b323757356928944f8"}
--- a/third_party/rust/zlib-rs/Cargo.lock
+++ b/third_party/rust/zlib-rs/Cargo.lock
@@ -1,148 +0,0 @@
-# This file is automatically @generated by Cargo.
-# It is not intended for manual editing.
-version = 3
-
-[[package]]
-name = "arbitrary"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "dde20b3d026af13f561bdd0f15edf01fc734f0dafcedbaf42bba506a9517f223"
-dependencies = [
- "derive_arbitrary",
-]
-
-[[package]]
-name = "autocfg"
-version = "1.4.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26"
-
-[[package]]
-name = "cfg-if"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
-
-[[package]]
-name = "crc32fast"
-version = "1.4.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
-dependencies = [
- "cfg-if",
-]
-
-[[package]]
-name = "derive_arbitrary"
-version = "1.4.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "30542c1ad912e0e3d22a1935c290e12e8a29d704a420177a31faad4a601a0800"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn",
-]
-
-[[package]]
-name = "getrandom"
-version = "0.2.15"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "c4567c8db10ae91089c99af84c68c38da3ec2f087c3f82960bcdbf3656b6f4d7"
-dependencies = [
- "cfg-if",
- "libc",
- "wasi",
-]
-
-[[package]]
-name = "libc"
-version = "0.2.169"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "b5aba8db14291edd000dfcc4d620c7ebfb122c613afb886ca8803fa4e128a20a"
-
-[[package]]
-name = "memoffset"
-version = "0.9.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "488016bfae457b036d996092f6cb448677611ce4449e970ceaf42695203f218a"
-dependencies = [
- "autocfg",
-]
-
-[[package]]
-name = "proc-macro2"
-version = "1.0.92"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
-dependencies = [
- "unicode-ident",
-]
-
-[[package]]
-name = "quickcheck"
-version = "1.0.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "588f6378e4dd99458b60ec275b4477add41ce4fa9f64dcba6f15adccb19b50d6"
-dependencies = [
- "rand",
-]
-
-[[package]]
-name = "quote"
-version = "1.0.38"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e4dccaaaf89514f546c693ddc140f729f958c247918a13380cccc6078391acc"
-dependencies = [
- "proc-macro2",
-]
-
-[[package]]
-name = "rand"
-version = "0.8.5"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "34af8d1a0e25924bc5b7c43c079c942339d8f0a8b57c39049bef581b46327404"
-dependencies = [
- "rand_core",
-]
-
-[[package]]
-name = "rand_core"
-version = "0.6.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
-dependencies = [
- "getrandom",
-]
-
-[[package]]
-name = "syn"
-version = "2.0.95"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "46f71c0377baf4ef1cc3e3402ded576dccc315800fbc62dfc7fe04b009773b4a"
-dependencies = [
- "proc-macro2",
- "quote",
- "unicode-ident",
-]
-
-[[package]]
-name = "unicode-ident"
-version = "1.0.14"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
-
-[[package]]
-name = "wasi"
-version = "0.11.0+wasi-snapshot-preview1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
-
-[[package]]
-name = "zlib-rs"
-version = "0.4.2"
-dependencies = [
- "arbitrary",
- "crc32fast",
- "memoffset",
- "quickcheck",
-]
--- a/third_party/rust/zlib-rs/Cargo.toml
+++ b/third_party/rust/zlib-rs/Cargo.toml
@@ -13,7 +13,7 @@
 edition = "2021"
 rust-version = "1.75"
 name = "zlib-rs"
-version = "0.4.2"
+version = "0.4.1"
 build = false
 publish = true
 autolib = false
@@ -45,9 +45,6 @@ default-features = false
 [dev-dependencies.crc32fast]
 version = "1.3.2"

-[dev-dependencies.memoffset]
-version = "0.9.1"
-
 [dev-dependencies.quickcheck]
 version = "1.0.3"
 features = []
--- a/third_party/rust/zlib-rs/src/allocate.rs
+++ b/third_party/rust/zlib-rs/src/allocate.rs
@@ -4,7 +4,6 @@ use core::{
    alloc::Layout,
    ffi::{c_uint, c_void},
    marker::PhantomData,
-    ptr::NonNull,
 };

 #[cfg(feature = "rust-allocator")]
@@ -246,21 +245,33 @@ impl Allocator<'_> {
        ptr
    }

-    pub fn allocate_raw<T>(&self) -> Option<NonNull<T>> {
-        NonNull::new(self.allocate_layout(Layout::new::<T>()).cast())
+    pub fn allocate_raw<T>(&self) -> Option<*mut T> {
+        let ptr = self.allocate_layout(Layout::new::<T>());
+
+        if ptr.is_null() {
+            None
+        } else {
+            Some(ptr as *mut T)
+        }
    }

-    pub fn allocate_slice_raw<T>(&self, len: usize) -> Option<NonNull<T>> {
-        NonNull::new(self.allocate_layout(Layout::array::<T>(len).ok()?).cast())
+    pub fn allocate_slice_raw<T>(&self, len: usize) -> Option<*mut T> {
+        let ptr = self.allocate_layout(Layout::array::<T>(len).ok()?);
+
+        if ptr.is_null() {
+            None
+        } else {
+            Some(ptr.cast())
+        }
    }

-    pub fn allocate_zeroed(&self, len: usize) -> Option<NonNull<u8>> {
+    pub fn allocate_zeroed(&self, len: usize) -> *mut u8 {
        #[cfg(feature = "rust-allocator")]
        if self.zalloc == Allocator::RUST.zalloc {
            // internally, we want to align allocations to 64 bytes (in part for SIMD reasons)
            let layout = Layout::from_size_align(len, 64).unwrap();

-            return NonNull::new(unsafe { std::alloc::System.alloc_zeroed(layout) });
+            return unsafe { std::alloc::System.alloc_zeroed(layout) };
        }

        #[cfg(feature = "c-allocator")]
@@ -274,18 +285,24 @@ impl Allocator<'_> {

            let ptr = alloc.allocate_layout(Layout::array::<u8>(len).ok().unwrap());

-            return NonNull::new(ptr.cast());
+            if ptr.is_null() {
+                return core::ptr::null_mut();
+            }
+
+            return ptr.cast();
        }

        // create the allocation (contents are uninitialized)
        let ptr = self.allocate_layout(Layout::array::<u8>(len).ok().unwrap());

-        let ptr = NonNull::new(ptr)?;
+        if ptr.is_null() {
+            return core::ptr::null_mut();
+        }

        // zero all contents (thus initializing the buffer)
-        unsafe { core::ptr::write_bytes(ptr.as_ptr(), 0, len) };
+        unsafe { core::ptr::write_bytes(ptr, 0, len) };

-        Some(ptr.cast())
+        ptr.cast()
    }

    /// # Panics
@@ -356,11 +373,11 @@ mod tests {
                _marker: PhantomData,
            };

-            let ptr = allocator.allocate_raw::<T>().unwrap().as_ptr();
+            let ptr = allocator.allocate_raw::<T>().unwrap();
            assert_eq!(ptr as usize % core::mem::align_of::<T>(), 0);
            unsafe { allocator.deallocate(ptr, 1) }

-            let ptr = allocator.allocate_slice_raw::<T>(10).unwrap().as_ptr();
+            let ptr = allocator.allocate_slice_raw::<T>(10).unwrap();
            assert_eq!(ptr as usize % core::mem::align_of::<T>(), 0);
            unsafe { allocator.deallocate(ptr, 10) }
        }
@@ -411,15 +428,15 @@ mod tests {

    fn test_allocate_zeroed_help(allocator: Allocator) {
        let len = 42;
-        let Some(buf) = allocator.allocate_zeroed(len) else {
-            return;
-        };
+        let buf = allocator.allocate_zeroed(len);

-        let slice = unsafe { core::slice::from_raw_parts_mut(buf.as_ptr(), len) };
+        if !buf.is_null() {
+            let slice = unsafe { core::slice::from_raw_parts_mut(buf, len) };

-        assert_eq!(slice.iter().sum::<u8>(), 0);
+            assert_eq!(slice.iter().sum::<u8>(), 0);
+        }

-        unsafe { allocator.deallocate(buf.as_ptr(), len) };
+        unsafe { allocator.deallocate(buf, len) };
    }

    #[test]
--- a/third_party/rust/zlib-rs/src/cpu_features.rs
+++ b/third_party/rust/zlib-rs/src/cpu_features.rs
@@ -1,13 +1,6 @@
 #![allow(dead_code)]
 #![allow(unreachable_code)]

-pub struct CpuFeatures;
-
-impl CpuFeatures {
-    pub const NONE: usize = 0;
-    pub const AVX2: usize = 1;
-}
-
 #[inline(always)]
 pub fn is_enabled_sse() -> bool {
    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
--- a/third_party/rust/zlib-rs/src/crc32.rs
+++ b/third_party/rust/zlib-rs/src/crc32.rs
@@ -52,14 +52,24 @@ impl Crc32Fold {
        }
    }

+    #[cfg_attr(not(target_arch = "x86_64"), allow(unused))]
+    pub(crate) fn is_pclmulqdq_enabled() -> bool {
+        crate::cpu_features::is_enabled_pclmulqdq()
+    }
+
+    #[cfg_attr(not(target_arch = "aarch64"), allow(unused))]
+    pub(crate) fn is_crc_enabled() -> bool {
+        crate::cpu_features::is_enabled_crc()
+    }
+
    pub fn fold(&mut self, src: &[u8], _start: u32) {
        #[cfg(target_arch = "x86_64")]
-        if crate::cpu_features::is_enabled_pclmulqdq() {
+        if Self::is_pclmulqdq_enabled() {
            return unsafe { self.fold.fold(src, _start) };
        }

        #[cfg(target_arch = "aarch64")]
-        if crate::cpu_features::is_enabled_crc() {
+        if Self::is_crc_enabled() {
            self.value = unsafe { self::acle::crc32_acle_aarch64(self.value, src) };
            return;
        }
@@ -70,7 +80,7 @@ impl Crc32Fold {

    pub fn fold_copy(&mut self, dst: &mut [u8], src: &[u8]) {
        #[cfg(target_arch = "x86_64")]
-        if crate::cpu_features::is_enabled_pclmulqdq() {
+        if Self::is_pclmulqdq_enabled() {
            return unsafe { self.fold.fold_copy(dst, src) };
        }

@@ -80,7 +90,7 @@ impl Crc32Fold {

    pub fn finish(self) -> u32 {
        #[cfg(target_arch = "x86_64")]
-        if crate::cpu_features::is_enabled_pclmulqdq() {
+        if Self::is_pclmulqdq_enabled() {
            return unsafe { self.fold.finish() };
        }

--- a/third_party/rust/zlib-rs/src/crc32/acle.rs
+++ b/third_party/rust/zlib-rs/src/crc32/acle.rs
@@ -3,7 +3,6 @@
 //! The functions in this module must only be executed on an ARM system with the CRC feature.

 #[cfg_attr(not(target_arch = "aarch64"), allow(unused))]
-#[target_feature(enable = "crc")]
 pub unsafe fn crc32_acle_aarch64(crc: u32, buf: &[u8]) -> u32 {
    let mut c = !crc;

@@ -25,9 +24,29 @@ pub unsafe fn crc32_acle_aarch64(crc: u32, buf: &[u8]) -> u32 {
    !c
 }

-#[inline]
-#[target_feature(enable = "crc")]
-unsafe fn remainder(mut c: u32, mut buf: &[u8]) -> u32 {
+#[cfg_attr(not(target_arch = "arm"), allow(unused))]
+pub unsafe fn crc32_acle_arm(crc: u32, buf: &[u8]) -> u32 {
+    let mut c = !crc;
+
+    // SAFETY: [u8; 4] safely transmutes into u32.
+    let (before, middle, after) = unsafe { buf.align_to::<u32>() };
+
+    c = remainder(c, before);
+
+    if middle.is_empty() && after.is_empty() {
+        return !c;
+    }
+
+    for w in middle {
+        c = unsafe { __crc32w(c, *w) };
+    }
+
+    c = remainder(c, after);
+
+    !c
+}
+
+fn remainder(mut c: u32, mut buf: &[u8]) -> u32 {
    if let [b0, b1, b2, b3, rest @ ..] = buf {
        c = unsafe { __crc32w(c, u32::from_le_bytes([*b0, *b1, *b2, *b3])) };
        buf = rest;
@@ -48,9 +67,6 @@ unsafe fn remainder(mut c: u32, mut buf: &[u8]) -> u32 {
    c
 }

-// FIXME the intrinsics below are stable since rust 1.80.0: remove these and use the standard
-// library versions once our MSRV reaches that version.
-
 /// CRC32 single round checksum for bytes (8 bits).
 ///
 /// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32b)
@@ -99,6 +115,18 @@ unsafe fn __crc32d(mut crc: u32, data: u64) -> u32 {
    }
 }

+/// CRC32-C single round checksum for words (32 bits).
+///
+/// [Arm's documentation](https://developer.arm.com/architectures/instruction-sets/intrinsics/__crc32cw)
+#[target_feature(enable = "crc")]
+#[cfg_attr(target_arch = "arm", target_feature(enable = "v8"))]
+pub unsafe fn __crc32cw(mut crc: u32, data: u32) -> u32 {
+    unsafe {
+        core::arch::asm!("crc32cw {crc:w}, {crc:w}, {data:w}", crc = inout(reg) crc, data = in(reg) data);
+        crc
+    }
+}
+
 #[cfg(test)]
 mod tests {
    use super::*;
@@ -114,11 +142,21 @@ mod tests {

            a == b
        }
+
+        fn crc32_acle_arm_is_crc32fast(v: Vec<u8>, start: u32) -> bool {
+            let mut h = crc32fast::Hasher::new_with_initial(start);
+            h.update(&v);
+
+            let a = unsafe { crc32_acle_arm(start, &v) };
+            let b = h.finalize();
+
+            a == b
+        }
    }

    #[test]
    fn test_crc32b() {
-        if !crate::cpu_features::is_enabled_crc() {
+        if !crate::crc32::Crc32Fold::is_crc_enabled() {
            return;
        }

@@ -130,7 +168,7 @@ mod tests {

    #[test]
    fn test_crc32h() {
-        if !crate::cpu_features::is_enabled_crc() {
+        if !crate::crc32::Crc32Fold::is_crc_enabled() {
            return;
        }

@@ -142,7 +180,7 @@ mod tests {

    #[test]
    fn test_crc32w() {
-        if !crate::cpu_features::is_enabled_crc() {
+        if !crate::crc32::Crc32Fold::is_crc_enabled() {
            return;
        }

@@ -155,7 +193,7 @@ mod tests {
    #[test]
    #[cfg(target_arch = "aarch64")]
    fn test_crc32d() {
-        if !crate::cpu_features::is_enabled_crc() {
+        if !crate::crc32::Crc32Fold::is_crc_enabled() {
            return;
        }

@@ -164,4 +202,16 @@ mod tests {
            assert_eq!(__crc32d(0, 18446744073709551615), 1147535477);
        }
    }
+
+    #[test]
+    fn test_crc32cw() {
+        if !crate::crc32::Crc32Fold::is_crc_enabled() {
+            return;
+        }
+
+        unsafe {
+            assert_eq!(__crc32cw(0, 0), 0);
+            assert_eq!(__crc32cw(0, 4294967295), 3080238136);
+        }
+    }
 }
--- a/third_party/rust/zlib-rs/src/deflate.rs
+++ b/third_party/rust/zlib-rs/src/deflate.rs
@@ -14,7 +14,7 @@ use crate::{

 use self::{
    algorithm::CONFIGURATION_TABLE,
-    hash_calc::{HashCalcVariant, RollHashCalc, StandardHashCalc},
+    hash_calc::{Crc32HashCalc, HashCalcVariant, RollHashCalc, StandardHashCalc},
    pending::Pending,
    trees_tbl::STATIC_LTREE,
    window::Window,
@@ -29,9 +29,6 @@ mod slide_hash;
 mod trees_tbl;
 mod window;

-// Position relative to the current window
-pub(crate) type Pos = u16;
-
 // SAFETY: This struct must have the same layout as [`z_stream`], so that casts and transmutations
 // between the two can work without UB.
 #[repr(C)]
@@ -285,16 +282,16 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
                    pending.drop_in(&alloc);
                }
                if let Some(head) = head {
-                    alloc.deallocate(head.as_ptr(), 1)
+                    alloc.deallocate(head, 1)
                }
                if let Some(prev) = prev {
-                    alloc.deallocate(prev.as_ptr(), w_size)
+                    alloc.deallocate(prev, w_size)
                }
                if let Some(mut window) = window {
                    window.drop_in(&alloc);
                }

-                alloc.deallocate(state_allocation.as_ptr(), 1);
+                alloc.deallocate(state_allocation, 1);
            }

            return ReturnCode::MemError;
@@ -302,12 +299,10 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
    };

    // zero initialize the memory
-    let prev = prev.as_ptr(); // FIXME: write_bytes is stable for NonNull since 1.80.0
    unsafe { prev.write_bytes(0, w_size) };
    let prev = unsafe { WeakSliceMut::from_raw_parts_mut(prev, w_size) };

    // zero out head's first element
-    let head = head.as_ptr(); // FIXME: write_bytes is stable for NonNull since 1.80.0
    unsafe { head.write_bytes(0, 1) };
    let head = unsafe { WeakArrayMut::<u16, HASH_SIZE>::from_ptr(head) };

@@ -315,6 +310,7 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
        status: Status::Init,

        // window
+        w_bits: window_bits,
        w_size,
        w_mask: w_size - 1,

@@ -357,6 +353,11 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {
        d_desc: TreeDesc::EMPTY,
        bl_desc: TreeDesc::EMPTY,

+        bl_count: [0u16; MAX_BITS + 1],
+
+        //
+        heap: Heap::new(),
+
        //
        crc_fold: Crc32Fold::new(),
        gzhead: None,
@@ -364,22 +365,17 @@ pub fn init(stream: &mut z_stream, config: DeflateConfig) -> ReturnCode {

        //
        match_start: 0,
+        match_length: 0,
        prev_match: 0,
        match_available: false,
        prev_length: 0,

        // just provide a valid default; gets set properly later
        hash_calc_variant: HashCalcVariant::Standard,
-        _cache_line_0: (),
-        _cache_line_1: (),
-        _cache_line_2: (),
-        _cache_line_3: (),
-        _padding_0: 0,
    };

-    unsafe { state_allocation.as_ptr().write(state) }; // FIXME: write is stable for NonNull since 1.80.0
-
-    stream.state = state_allocation.as_ptr() as *mut internal_state;
+    unsafe { state_allocation.write(state) };
+    stream.state = state_allocation as *mut internal_state;

    let Some(stream) = (unsafe { DeflateStream::from_stream_mut(stream) }) else {
        if cfg!(debug_assertions) {
@@ -600,16 +596,16 @@ pub fn copy<'a>(
                    pending.drop_in(alloc);
                }
                if let Some(head) = head {
-                    alloc.deallocate(head.as_ptr(), HASH_SIZE)
+                    alloc.deallocate(head, HASH_SIZE)
                }
                if let Some(prev) = prev {
-                    alloc.deallocate(prev.as_ptr(), source_state.w_size)
+                    alloc.deallocate(prev, source_state.w_size)
                }
                if let Some(mut window) = window {
                    window.drop_in(alloc);
                }

-                alloc.deallocate(state_allocation.as_ptr(), 1);
+                alloc.deallocate(state_allocation, 1);
            }

            return ReturnCode::MemError;
@@ -617,14 +613,11 @@ pub fn copy<'a>(
    };

    let prev = unsafe {
-        let prev = prev.as_ptr();
        prev.copy_from_nonoverlapping(source_state.prev.as_ptr(), source_state.prev.len());
        WeakSliceMut::from_raw_parts_mut(prev, source_state.prev.len())
    };

-    // FIXME: write_bytes is stable for NonNull since 1.80.0
    let head = unsafe {
-        let head = head.as_ptr();
        head.write_bytes(0, 1);
        head.cast::<u16>().write(source_state.head.as_slice()[0]);
        WeakArrayMut::from_ptr(head)
@@ -646,6 +639,8 @@ pub fn copy<'a>(
        l_desc: source_state.l_desc.clone(),
        d_desc: source_state.d_desc.clone(),
        bl_desc: source_state.bl_desc.clone(),
+        bl_count: source_state.bl_count,
+        match_length: source_state.match_length,
        prev_match: source_state.prev_match,
        match_available: source_state.match_available,
        strstart: source_state.strstart,
@@ -664,28 +659,25 @@ pub fn copy<'a>(
        static_len: source_state.static_len,
        insert: source_state.insert,
        w_size: source_state.w_size,
+        w_bits: source_state.w_bits,
        w_mask: source_state.w_mask,
        lookahead: source_state.lookahead,
        prev,
        head,
        ins_h: source_state.ins_h,
+        heap: source_state.heap.clone(),
        hash_calc_variant: source_state.hash_calc_variant,
        crc_fold: source_state.crc_fold,
        gzhead: None,
        gzindex: source_state.gzindex,
-        _cache_line_0: (),
-        _cache_line_1: (),
-        _cache_line_2: (),
-        _cache_line_3: (),
-        _padding_0: source_state._padding_0,
    };

    // write the cloned state into state_ptr
-    unsafe { state_allocation.as_ptr().write(dest_state) }; // FIXME: write is stable for NonNull since 1.80.0
+    unsafe { state_allocation.write(dest_state) };

    // insert the state_ptr into `dest`
    let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) };
-    unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation.as_ptr()) };
+    unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation) };

    // update the gzhead field (it contains a mutable reference so we need to be careful
    let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state.gzhead) };
@@ -792,10 +784,10 @@ fn lm_init(state: &mut State) {
 }

 fn lm_set_level(state: &mut State, level: i8) {
-    state.max_lazy_match = CONFIGURATION_TABLE[level as usize].max_lazy;
-    state.good_match = CONFIGURATION_TABLE[level as usize].good_length;
-    state.nice_match = CONFIGURATION_TABLE[level as usize].nice_length;
-    state.max_chain_length = CONFIGURATION_TABLE[level as usize].max_chain;
+    state.max_lazy_match = CONFIGURATION_TABLE[level as usize].max_lazy as usize;
+    state.good_match = CONFIGURATION_TABLE[level as usize].good_length as usize;
+    state.nice_match = CONFIGURATION_TABLE[level as usize].nice_length as usize;
+    state.max_chain_length = CONFIGURATION_TABLE[level as usize].max_chain as usize;

    state.hash_calc_variant = HashCalcVariant::for_max_chain_length(state.max_chain_length);
    state.level = level;
@@ -808,10 +800,10 @@ pub fn tune(
    nice_length: usize,
    max_chain: usize,
 ) -> ReturnCode {
-    stream.state.good_match = good_length as u16;
-    stream.state.max_lazy_match = max_lazy as u16;
-    stream.state.nice_match = nice_length as u16;
-    stream.state.max_chain_length = max_chain as u16;
+    stream.state.good_match = good_length;
+    stream.state.max_lazy_match = max_lazy;
+    stream.state.nice_match = nice_length;
+    stream.state.max_chain_length = max_chain;

    ReturnCode::Ok
 }
@@ -849,18 +841,15 @@ impl Value {
        self.a
    }

-    #[inline(always)]
-    pub(crate) const fn code(self) -> u16 {
+    pub(crate) fn code(self) -> u16 {
        self.a
    }

-    #[inline(always)]
-    pub(crate) const fn dad(self) -> u16 {
+    pub(crate) fn dad(self) -> u16 {
        self.b
    }

-    #[inline(always)]
-    pub(crate) const fn len(self) -> u16 {
+    pub(crate) fn len(self) -> u16 {
        self.b
    }
 }
@@ -904,50 +893,6 @@ struct BitWriter<'a> {
    bits_sent: usize,
 }

-#[inline]
-const fn encode_len(ltree: &[Value], lc: u8) -> (u64, usize) {
-    let mut lc = lc as usize;
-
-    /* Send the length code, len is the match length - STD_MIN_MATCH */
-    let code = self::trees_tbl::LENGTH_CODE[lc] as usize;
-    let c = code + LITERALS + 1;
-    assert!(c < L_CODES, "bad l_code");
-    // send_code_trace(s, c);
-
-    let lnode = ltree[c];
-    let mut match_bits: u64 = lnode.code() as u64;
-    let mut match_bits_len = lnode.len() as usize;
-    let extra = StaticTreeDesc::EXTRA_LBITS[code] as usize;
-    if extra != 0 {
-        lc -= self::trees_tbl::BASE_LENGTH[code] as usize;
-        match_bits |= (lc as u64) << match_bits_len;
-        match_bits_len += extra;
-    }
-
-    (match_bits, match_bits_len)
-}
-
-#[inline]
-const fn encode_dist(dtree: &[Value], mut dist: u16) -> (u64, usize) {
-    dist -= 1; /* dist is now the match distance - 1 */
-    let code = State::d_code(dist as usize) as usize;
-    assert!(code < D_CODES, "bad d_code");
-    // send_code_trace(s, code);
-
-    /* Send the distance code */
-    let dnode = dtree[code];
-    let mut match_bits = dnode.code() as u64;
-    let mut match_bits_len = dnode.len() as usize;
-    let extra = StaticTreeDesc::EXTRA_DBITS[code] as usize;
-    if extra != 0 {
-        dist -= self::trees_tbl::BASE_DIST[code];
-        match_bits |= (dist as u64) << match_bits_len;
-        match_bits_len += extra;
-    }
-
-    (match_bits, match_bits_len)
-}
-
 impl<'a> BitWriter<'a> {
    pub(crate) const BIT_BUF_SIZE: u8 = 64;

@@ -1108,30 +1053,41 @@ impl<'a> BitWriter<'a> {
        ltree: &[Value],
        dtree: &[Value],
        lc: u8,
-        dist: u16,
+        mut dist: usize,
    ) -> usize {
-        let (mut match_bits, mut match_bits_len) = encode_len(ltree, lc);
+        let mut lc = lc as usize;

-        let (dist_match_bits, dist_match_bits_len) = encode_dist(dtree, dist);
+        /* Send the length code, len is the match length - STD_MIN_MATCH */
+        let mut code = self::trees_tbl::LENGTH_CODE[lc] as usize;
+        let c = code + LITERALS + 1;
+        assert!(c < L_CODES, "bad l_code");
+        // send_code_trace(s, c);

-        match_bits |= dist_match_bits << match_bits_len;
-        match_bits_len += dist_match_bits_len;
+        let lnode = ltree[c];
+        let mut match_bits: u64 = lnode.code() as u64;
+        let mut match_bits_len = lnode.len() as usize;
+        let mut extra = StaticTreeDesc::EXTRA_LBITS[code] as usize;
+        if extra != 0 {
+            lc -= self::trees_tbl::BASE_LENGTH[code] as usize;
+            match_bits |= (lc as u64) << match_bits_len;
+            match_bits_len += extra;
+        }

-        self.send_bits(match_bits, match_bits_len as u8);
+        dist -= 1; /* dist is now the match distance - 1 */
+        code = State::d_code(dist) as usize;
+        assert!(code < D_CODES, "bad d_code");
+        // send_code_trace(s, code);

-        match_bits_len
-    }
-
-    pub(crate) fn emit_dist_static(&mut self, lc: u8, dist: u16) -> usize {
-        let precomputed_len = trees_tbl::STATIC_LTREE_ENCODINGS[lc as usize];
-        let mut match_bits = precomputed_len.code() as u64;
-        let mut match_bits_len = precomputed_len.len() as usize;
-
-        let dtree = self::trees_tbl::STATIC_DTREE.as_slice();
-        let (dist_match_bits, dist_match_bits_len) = encode_dist(dtree, dist);
-
-        match_bits |= dist_match_bits << match_bits_len;
-        match_bits_len += dist_match_bits_len;
+        /* Send the distance code */
+        let dnode = dtree[code];
+        match_bits |= (dnode.code() as u64) << match_bits_len;
+        match_bits_len += dnode.len() as usize;
+        extra = StaticTreeDesc::EXTRA_DBITS[code] as usize;
+        if extra != 0 {
+            dist -= self::trees_tbl::BASE_DIST[code] as usize;
+            match_bits |= (dist as u64) << match_bits_len;
+            match_bits_len += extra;
+        }

        self.send_bits(match_bits, match_bits_len as u8);

@@ -1144,7 +1100,7 @@ impl<'a> BitWriter<'a> {
                unreachable!("out of bound access on the symbol buffer");
            };

-            match u16::from_le_bytes([dist_low, dist_high]) {
+            match u16::from_be_bytes([dist_high, dist_low]) as usize {
                0 => self.emit_lit(ltree, lc) as usize,
                dist => self.emit_dist(ltree, dtree, lc, dist),
            };
@@ -1218,7 +1174,7 @@ impl<'a> BitWriter<'a> {
    }
 }

-#[repr(C, align(64))]
+#[repr(C)]
 pub(crate) struct State<'a> {
    status: Status,

@@ -1230,47 +1186,40 @@ pub(crate) struct State<'a> {
    pub(crate) level: i8,

    /// Whether or not a block is currently open for the QUICK deflation scheme.
-    /// 0 if the block is closed, 1 if there is an active block, or 2 if there
-    /// is an active block and it is the last block.
+    /// true if there is an active block, or false if the block was just closed
    pub(crate) block_open: u8,

-    pub(crate) hash_calc_variant: HashCalcVariant,
-
-    pub(crate) match_available: bool, /* set if previous match exists */
+    bit_writer: BitWriter<'a>,

    /// Use a faster search when the previous match is longer than this
-    pub(crate) good_match: u16,
+    pub(crate) good_match: usize,

    /// Stop searching when current match exceeds this
-    pub(crate) nice_match: u16,
+    pub(crate) nice_match: usize,

-    pub(crate) match_start: Pos,      /* start of matching string */
-    pub(crate) prev_match: Pos,       /* previous match */
+    // part of the fields below
+    //    dyn_ltree: [Value; ],
+    //    dyn_dtree: [Value; ],
+    //    bl_tree: [Value; ],
+    l_desc: TreeDesc<HEAP_SIZE>,             /* literal and length tree */
+    d_desc: TreeDesc<{ 2 * D_CODES + 1 }>,   /* distance tree */
+    bl_desc: TreeDesc<{ 2 * BL_CODES + 1 }>, /* Huffman tree for bit lengths */
+
+    pub(crate) bl_count: [u16; MAX_BITS + 1],
+
+    pub(crate) match_length: usize,   /* length of best match */
+    pub(crate) prev_match: u16,       /* previous match */
+    pub(crate) match_available: bool, /* set if previous match exists */
    pub(crate) strstart: usize,       /* start of string to insert */
-
-    pub(crate) window: Window<'a>,
-    pub(crate) w_size: usize,    /* LZ77 window size (32K by default) */
-    pub(crate) w_mask: usize,    /* w_size - 1 */
-
-    _cache_line_0: (),
-
-    /// prev[N], where N is an offset in the current window, contains the offset in the window
-    /// of the previous 4-byte sequence that hashes to the same value as the 4-byte sequence
-    /// starting at N. Together with head, prev forms a chained hash table that can be used
-    /// to find earlier strings in the window that are potential matches for new input being
-    /// deflated.
-    pub(crate) prev: WeakSliceMut<'a, u16>,
-    /// head[H] contains the offset of the last 4-character sequence seen so far in
-    /// the current window that hashes to H (as calculated using the hash_calc_variant).
-    pub(crate) head: WeakArrayMut<'a, u16, HASH_SIZE>,
+    pub(crate) match_start: usize,    /* start of matching string */

    /// Length of the best match at previous step. Matches not greater than this
    /// are discarded. This is used in the lazy match evaluation.
-    pub(crate) prev_length: u16,
+    pub(crate) prev_length: usize,

    /// To speed up deflation, hash chains are never searched beyond this length.
    /// A higher limit improves compression ratio but degrades the speed.
-    pub(crate) max_chain_length: u16,
+    pub(crate) max_chain_length: usize,

    // TODO untangle this mess! zlib uses the same field differently based on compression level
    // we should just have 2 fields for clarity!
@@ -1281,21 +1230,15 @@ pub(crate) struct State<'a> {
    // define max_insert_length  max_lazy_match
    /// Attempt to find a better match only when the current match is strictly smaller
    /// than this value. This mechanism is used only for compression levels >= 4.
-    pub(crate) max_lazy_match: u16,
-
-    /// number of string matches in current block
-    /// NOTE: this is a saturating 8-bit counter, to help keep the struct compact. The code that
-    /// makes decisions based on this field only cares whether the count is greater than 2, so
-    /// an 8-bit counter is sufficient.
-    pub(crate) matches: u8,
+    pub(crate) max_lazy_match: usize,

    /// Window position at the beginning of the current output block. Gets
    /// negative when the window is moved backwards.
    pub(crate) block_start: isize,

-    pub(crate) sym_buf: ReadBuf<'a>,
+    pub(crate) window: Window<'a>,

-    _cache_line_1: (),
+    pub(crate) sym_buf: ReadBuf<'a>,

    /// Size of match buffer for literals/lengths.  There are 4 reasons for
    /// limiting lit_bufsize to 64K:
@@ -1316,12 +1259,11 @@ pub(crate) struct State<'a> {
    ///   - I can't count above 4
    lit_bufsize: usize,

-    /// Actual size of window: 2*w_size, except when the user input buffer is directly used as sliding window.
+    /// Actual size of window: 2*wSize, except when the user input buffer is directly used as sliding window.
    pub(crate) window_size: usize,

-    bit_writer: BitWriter<'a>,
-
-    _cache_line_2: (),
+    /// number of string matches in current block
+    pub(crate) matches: usize,

    /// bit length of current block with optimal trees
    opt_len: usize,
@@ -1331,23 +1273,24 @@ pub(crate) struct State<'a> {
    /// bytes at end of window left to insert
    pub(crate) insert: usize,

+    pub(crate) w_size: usize,    /* LZ77 window size (32K by default) */
+    pub(crate) w_bits: usize,    /* log2(w_size)  (8..16) */
+    pub(crate) w_mask: usize,    /* w_size - 1 */
    pub(crate) lookahead: usize, /* number of valid bytes ahead in window */

+    pub(crate) prev: WeakSliceMut<'a, u16>,
+    pub(crate) head: WeakArrayMut<'a, u16, HASH_SIZE>,
+
    ///  hash index of string to be inserted
-    pub(crate) ins_h: u32,
+    pub(crate) ins_h: usize,

-    gzhead: Option<&'a mut gz_header>,
-    gzindex: usize,
+    heap: Heap,

-    _padding_0: usize,
-
-    _cache_line_3: (),
+    pub(crate) hash_calc_variant: HashCalcVariant,

    crc_fold: crate::crc32::Crc32Fold,
-
-    l_desc: TreeDesc<HEAP_SIZE>,             /* literal and length tree */
-    d_desc: TreeDesc<{ 2 * D_CODES + 1 }>,   /* distance tree */
-    bl_desc: TreeDesc<{ 2 * BL_CODES + 1 }>, /* Huffman tree for bit lengths */
+    gzhead: Option<&'a mut gz_header>,
+    gzindex: usize,
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash, PartialOrd, Ord, Default)]
@@ -1386,11 +1329,6 @@ enum DataType {
 impl<'a> State<'a> {
    pub const BIT_BUF_SIZE: u8 = BitWriter::BIT_BUF_SIZE;

-    // log2(w_size)  (in the range MIN_WBITS..=MAX_WBITS)
-    pub(crate) fn w_bits(&self) -> u32 {
-        self.w_size.trailing_zeros()
-    }
-
    pub(crate) fn max_dist(&self) -> usize {
        self.w_size - MIN_LOOKAHEAD
    }
@@ -1398,7 +1336,7 @@ impl<'a> State<'a> {
    // TODO untangle this mess! zlib uses the same field differently based on compression level
    // we should just have 2 fields for clarity!
    pub(crate) fn max_insert_length(&self) -> usize {
-        self.max_lazy_match as usize
+        self.max_lazy_match
    }

    /// Total size of the pending buf. But because `pending` shares memory with `sym_buf`, this is
@@ -1411,6 +1349,9 @@ impl<'a> State<'a> {
    pub(crate) fn update_hash(&self, h: u32, val: u32) -> u32 {
        match self.hash_calc_variant {
            HashCalcVariant::Standard => StandardHashCalc::update_hash(h, val),
+            // SAFETY: self.hash_calc_variant is set by HashCalcVariant::for_max_chain_length,
+            // which avoids choosing Crc32 if the system doesn't have support.
+            HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::update_hash(h, val) },
            HashCalcVariant::Roll => RollHashCalc::update_hash(h, val),
        }
    }
@@ -1419,6 +1360,9 @@ impl<'a> State<'a> {
    pub(crate) fn quick_insert_string(&mut self, string: usize) -> u16 {
        match self.hash_calc_variant {
            HashCalcVariant::Standard => StandardHashCalc::quick_insert_string(self, string),
+            // SAFETY: self.hash_calc_variant is set by HashCalcVariant::for_max_chain_length,
+            // which avoids choosing Crc32 if the system doesn't have support.
+            HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::quick_insert_string(self, string) },
            HashCalcVariant::Roll => RollHashCalc::quick_insert_string(self, string),
        }
    }
@@ -1427,6 +1371,9 @@ impl<'a> State<'a> {
    pub(crate) fn insert_string(&mut self, string: usize, count: usize) {
        match self.hash_calc_variant {
            HashCalcVariant::Standard => StandardHashCalc::insert_string(self, string, count),
+            // SAFETY: self.hash_calc_variant is set by HashCalcVariant::for_max_chain_length,
+            // which avoids choosing Crc32 if the system doesn't have support.
+            HashCalcVariant::Crc32 => unsafe { Crc32HashCalc::insert_string(self, string, count) },
            HashCalcVariant::Roll => RollHashCalc::insert_string(self, string, count),
        }
    }
@@ -1464,7 +1411,7 @@ impl<'a> State<'a> {
    pub(crate) fn tally_dist(&mut self, mut dist: usize, len: usize) -> bool {
        self.sym_buf.push_dist(dist as u16, len as u8);

-        self.matches = self.matches.saturating_add(1);
+        self.matches += 1;
        dist -= 1;

        assert!(
@@ -1511,19 +1458,11 @@ impl<'a> State<'a> {
    }

    fn compress_block_static_trees(&mut self) {
-        let ltree = self::trees_tbl::STATIC_LTREE.as_slice();
-        for chunk in self.sym_buf.filled().chunks_exact(3) {
-            let [dist_low, dist_high, lc] = *chunk else {
-                unreachable!("out of bound access on the symbol buffer");
-            };
-
-            match u16::from_le_bytes([dist_low, dist_high]) {
-                0 => self.bit_writer.emit_lit(ltree, lc) as usize,
-                dist => self.bit_writer.emit_dist_static(lc, dist),
-            };
-        }
-
-        self.bit_writer.emit_end_block(ltree, false)
+        self.bit_writer.compress_block_help(
+            self.sym_buf.filled(),
+            self::trees_tbl::STATIC_LTREE.as_slice(),
+            self::trees_tbl::STATIC_DTREE.as_slice(),
+        )
    }

    fn compress_block_dynamic_trees(&mut self) {
@@ -1547,7 +1486,7 @@ impl<'a> State<'a> {
        };

        let h =
-            (Z_DEFLATED + ((self.w_bits() as u16 - 8) << 4)) << 8 | (self.level_flags() << 6) | dict;
+            (Z_DEFLATED + ((self.w_bits as u16 - 8) << 4)) << 8 | (self.level_flags() << 6) | dict;

        h + 31 - (h % 31)
    }
@@ -1660,6 +1599,7 @@ pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: u
        // we likely cannot fuse the crc32 and the copy here because the input can be changed by
        // a concurrent thread. Therefore it cannot be converted into a slice!
        let window = &mut stream.state.window;
+        window.initialize_at_least(offset + len);
        // SAFETY: len is bounded by avail_in, so this copy is in bounds.
        unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };

@@ -1669,6 +1609,7 @@ pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: u
        // we likely cannot fuse the adler32 and the copy here because the input can be changed by
        // a concurrent thread. Therefore it cannot be converted into a slice!
        let window = &mut stream.state.window;
+        window.initialize_at_least(offset + len);
        // SAFETY: len is bounded by avail_in, so this copy is in bounds.
        unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };

@@ -1676,6 +1617,7 @@ pub(crate) fn read_buf_window(stream: &mut DeflateStream, offset: usize, size: u
        stream.adler = adler32(stream.adler as u32, data) as _;
    } else {
        let window = &mut stream.state.window;
+        window.initialize_at_least(offset + len);
        // SAFETY: len is bounded by avail_in, so this copy is in bounds.
        unsafe { window.copy_and_initialize(offset..offset + len, stream.next_in) };
    }
@@ -1733,31 +1675,36 @@ pub(crate) const WANT_MIN_MATCH: usize = 4;

 pub(crate) const MIN_LOOKAHEAD: usize = STD_MAX_MATCH + STD_MIN_MATCH + 1;

-#[inline]
 pub(crate) fn fill_window(stream: &mut DeflateStream) {
    debug_assert!(stream.state.lookahead < MIN_LOOKAHEAD);

    let wsize = stream.state.w_size;

    loop {
-        let state = &mut *stream.state;
+        let state = &mut stream.state;
        let mut more = state.window_size - state.lookahead - state.strstart;

        // If the window is almost full and there is insufficient lookahead,
        // move the upper half to the lower one to make room in the upper half.
        if state.strstart >= wsize + state.max_dist() {
-            // shift the window to the left
-            let (old, new) = state.window.filled_mut()[..2 * wsize].split_at_mut(wsize);
-            old.copy_from_slice(new);
+            // in some cases zlib-ng copies uninitialized bytes here. We cannot have that, so
+            // explicitly initialize them with zeros.
+            //
+            // see also the "fill_window_out_of_bounds" test.
+            state.window.initialize_at_least(2 * wsize);
+            state.window.filled_mut().copy_within(wsize..2 * wsize, 0);

-            state.match_start = state.match_start.saturating_sub(wsize as u16);
-            if state.match_start == 0 {
+            if state.match_start >= wsize {
+                state.match_start -= wsize;
+            } else {
+                state.match_start = 0;
                state.prev_length = 0;
            }
-
            state.strstart -= wsize; /* we now have strstart >= MAX_DIST */
            state.block_start -= wsize as isize;
-            state.insert = Ord::min(state.insert, state.strstart);
+            if state.insert > state.strstart {
+                state.insert = state.strstart;
+            }

            self::slide_hash::slide_hash(state);

@@ -1782,7 +1729,7 @@ pub(crate) fn fill_window(stream: &mut DeflateStream) {

        let n = read_buf_window(stream, stream.state.strstart + stream.state.lookahead, more);

-        let state = &mut *stream.state;
+        let state = &mut stream.state;
        state.lookahead += n;

        // Initialize the hash value now that we have some input:
@@ -1791,7 +1738,7 @@ pub(crate) fn fill_window(stream: &mut DeflateStream) {
            if state.max_chain_length > 1024 {
                let v0 = state.window.filled()[string] as u32;
                let v1 = state.window.filled()[string + 1] as u32;
-                state.ins_h = state.update_hash(v0, v1);
+                state.ins_h = state.update_hash(v0, v1) as usize;
            } else if string >= 1 {
                state.quick_insert_string(string + 2 - STD_MIN_MATCH);
            }
@@ -1813,6 +1760,11 @@ pub(crate) fn fill_window(stream: &mut DeflateStream) {
        }
    }

+    // initialize some memory at the end of the (filled) window, so SIMD operations can go "out of
+    // bounds" without violating any requirements. The window allocation is already slightly bigger
+    // to allow for this.
+    stream.state.window.initialize_out_of_bounds();
+
    assert!(
        stream.state.strstart <= stream.state.window_size - MIN_LOOKAHEAD,
        "not enough room for search"
@@ -1906,15 +1858,14 @@ fn build_tree<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
    let stree = desc.stat_desc.static_tree;
    let elements = desc.stat_desc.elems;

-    let mut heap = Heap::new();
-    let mut max_code = heap.initialize(&mut tree[..elements]);
+    let mut max_code = state.heap.initialize(&mut tree[..elements]);

    // The pkzip format requires that at least one distance code exists,
    // and that at least one bit should be sent even if there is only one
    // possible code. So to avoid special checks later on we force at least
    // two codes of non zero frequency.
-    while heap.heap_len < 2 {
-        heap.heap_len += 1;
+    while state.heap.heap_len < 2 {
+        state.heap.heap_len += 1;
        let node = if max_code < 2 {
            max_code += 1;
            max_code
@@ -1925,9 +1876,9 @@ fn build_tree<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
        debug_assert!(node >= 0);
        let node = node as usize;

-        heap.heap[heap.heap_len] = node as u32;
+        state.heap.heap[state.heap.heap_len] = node as u32;
        *tree[node].freq_mut() = 1;
-        heap.depth[node] = 0;
+        state.heap.depth[node] = 0;
        state.opt_len -= 1;
        if !stree.is_empty() {
            state.static_len -= stree[node].len() as usize;
@@ -1941,27 +1892,25 @@ fn build_tree<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {

    // The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
    // establish sub-heaps of increasing lengths:
-    let mut n = heap.heap_len / 2;
+    let mut n = state.heap.heap_len / 2;
    while n >= 1 {
-        heap.pqdownheap(tree, n);
+        state.heap.pqdownheap(tree, n);
        n -= 1;
    }

-    heap.construct_huffman_tree(tree, elements);
+    state.heap.construct_huffman_tree(tree, elements);

    // At this point, the fields freq and dad are set. We can now
    // generate the bit lengths.
-    let bl_count = gen_bitlen(state, &mut heap, desc);
+    gen_bitlen(state, desc);

    // The field len is now set, we can generate the bit codes
-    gen_codes(&mut desc.dyn_tree, max_code, &bl_count);
+    gen_codes(&mut desc.dyn_tree, max_code, &state.bl_count);
 }

-fn gen_bitlen<const N: usize>(
-    state: &mut State,
-    heap: &mut Heap,
-    desc: &mut TreeDesc<N>,
-) -> [u16; MAX_BITS + 1] {
+fn gen_bitlen<const N: usize>(state: &mut State, desc: &mut TreeDesc<N>) {
+    let heap = &mut state.heap;
+
    let tree = &mut desc.dyn_tree;
    let max_code = desc.max_code;
    let stree = desc.stat_desc.static_tree;
@@ -1969,7 +1918,7 @@ fn gen_bitlen<const N: usize>(
    let base = desc.stat_desc.extra_base;
    let max_length = desc.stat_desc.max_length;

-    let mut bl_count = [0u16; MAX_BITS + 1];
+    state.bl_count.fill(0);

    // In a first pass, compute the optimal bit lengths (which may
    // overflow in the case of the bit length tree).
@@ -1995,7 +1944,7 @@ fn gen_bitlen<const N: usize>(
            continue;
        }

-        bl_count[bits as usize] += 1;
+        state.bl_count[bits as usize] += 1;
        let mut xbits = 0;
        if n >= base {
            xbits = extra[n - base] as usize;
@@ -2010,18 +1959,18 @@ fn gen_bitlen<const N: usize>(
    }

    if overflow == 0 {
-        return bl_count;
+        return;
    }

    /* Find the first bit length which could increase: */
    loop {
        let mut bits = max_length as usize - 1;
-        while bl_count[bits] == 0 {
+        while state.bl_count[bits] == 0 {
            bits -= 1;
        }
-        bl_count[bits] -= 1; /* move one leaf down the tree */
-        bl_count[bits + 1] += 2; /* move one overflow item as its brother */
-        bl_count[max_length as usize] -= 1;
+        state.bl_count[bits] -= 1; /* move one leaf down the tree */
+        state.bl_count[bits + 1] += 2; /* move one overflow item as its brother */
+        state.bl_count[max_length as usize] -= 1;
        /* The brother of the overflow item also moves one step up,
         * but this does not affect bl_count[max_length]
         */
@@ -2038,7 +1987,7 @@ fn gen_bitlen<const N: usize>(
    // from 'ar' written by Haruhiko Okumura.)
    let mut h = HEAP_SIZE;
    for bits in (1..=max_length).rev() {
-        let mut n = bl_count[bits as usize];
+        let mut n = state.bl_count[bits as usize];
        while n != 0 {
            h -= 1;
            let m = heap.heap[h] as usize;
@@ -2056,7 +2005,6 @@ fn gen_bitlen<const N: usize>(
            n -= 1;
        }
    }
-    bl_count
 }

 /// Checks that symbol is a printing character (excluding space)
@@ -2969,37 +2917,34 @@ impl Heap {
    /// Index within the heap array of least frequent node in the Huffman tree
    const SMALLEST: usize = 1;

+    fn smaller(tree: &[Value], n: u32, m: u32, depth: &[u8]) -> bool {
+        let (n, m) = (n as usize, m as usize);
+
+        match Ord::cmp(&tree[n].freq(), &tree[m].freq()) {
+            core::cmp::Ordering::Less => true,
+            core::cmp::Ordering::Equal => depth[n] <= depth[m],
+            core::cmp::Ordering::Greater => false,
+        }
+    }
+
    fn pqdownheap(&mut self, tree: &[Value], mut k: usize) {
        /* tree: the tree to restore */
        /* k: node to move down */

-        // Given the index $i of a node in the tree, pack the node's frequency and depth
-        // into a single integer. The heap ordering logic uses a primary sort on frequency
-        // and a secondary sort on depth, so packing both into one integer makes it
-        // possible to sort with fewer comparison operations.
-        macro_rules! freq_and_depth {
-            ($i:expr) => {
-                (tree[$i as usize].freq() as u32) << 8 | self.depth[$i as usize] as u32
-            };
-        }
-
        let v = self.heap[k];
-        let v_val = freq_and_depth!(v);
        let mut j = k << 1; /* left son of k */

        while j <= self.heap_len {
            /* Set j to the smallest of the two sons: */
-            let mut j_val = freq_and_depth!(self.heap[j]);
            if j < self.heap_len {
-                let j1_val = freq_and_depth!(self.heap[j + 1]);
-                if j1_val <= j_val {
+                let cond = Self::smaller(tree, self.heap[j + 1], self.heap[j], &self.depth);
+                if cond {
                    j += 1;
-                    j_val = j1_val;
                }
            }

            /* Exit if v is smaller than both sons */
-            if v_val <= j_val {
+            if Self::smaller(tree, v, self.heap[j], &self.depth) {
                break;
            }

@@ -3195,7 +3140,7 @@ pub fn bound(stream: Option<&mut DeflateStream>, source_len: usize) -> usize {
        }
    };

-    if stream.state.w_bits() != MAX_WBITS as u32 || HASH_BITS < 15 {
+    if stream.state.w_bits != MAX_WBITS as usize || HASH_BITS < 15 {
        if stream.state.level == 0 {
            /* upper bound for stored blocks with length 127 (memLevel == 1) ~4% overhead plus a small constant */
            source_len
@@ -3428,7 +3373,7 @@ mod test {
            };
            assert_eq!(init(&mut stream, config), ReturnCode::Ok);
            let stream = unsafe { DeflateStream::from_stream_mut(&mut stream) }.unwrap();
-            assert_eq!(stream.state.w_bits(), 9);
+            assert_eq!(stream.state.w_bits, 9);

            assert!(end(stream).is_ok());
        }
@@ -4214,27 +4159,34 @@ mod test {
            strategy: Strategy::Default,
        };

-        let expected = [
+        let crc32 = [
+            24, 149, 99, 96, 96, 96, 96, 208, 6, 17, 112, 138, 129, 193, 128, 1, 29, 24, 50, 208,
+            1, 200, 146, 169, 79, 24, 74, 59, 96, 147, 52, 71, 22, 70, 246, 88, 26, 94, 80, 128,
+            83, 6, 162, 219, 144, 76, 183, 210, 5, 8, 67, 105, 7, 108, 146, 230, 216, 133, 145,
+            129, 22, 3, 3, 131, 17, 3, 0, 3, 228, 25, 128,
+        ];
+
+        let other = [
            24, 149, 99, 96, 96, 96, 96, 208, 6, 17, 112, 138, 129, 193, 128, 1, 29, 24, 50, 208,
            1, 200, 146, 169, 79, 24, 74, 59, 96, 147, 52, 71, 22, 70, 246, 88, 26, 94, 80, 128,
            83, 6, 162, 219, 144, 76, 183, 210, 5, 8, 67, 105, 36, 159, 35, 128, 57, 118, 97, 100,
            160, 197, 192, 192, 96, 196, 0, 0, 3, 228, 25, 128,
        ];

-        fuzz_based_test(&input, config, &expected);
-    }
-
-    #[cfg(any(target_arch = "x86_64", target_arch = "aarch64"))]
-    mod _cache_lines {
-        use super::State;
-        // FIXME: once zlib-rs Minimum Supported Rust Version >= 1.77, switch to core::mem::offset_of
-        // and move this _cache_lines module from up a level from tests to super::
-        use memoffset::offset_of;
-
-        const _: () = assert!(offset_of!(State, status) == 0);
-        const _: () = assert!(offset_of!(State, _cache_line_0) == 64);
-        const _: () = assert!(offset_of!(State, _cache_line_1) == 128);
-        const _: () = assert!(offset_of!(State, _cache_line_2) == 192);
-        const _: () = assert!(offset_of!(State, _cache_line_3) == 256);
+        // the output is slightly different based on what hashing algorithm is used
+        match HashCalcVariant::for_compression_level(config.level as usize) {
+            HashCalcVariant::Crc32 => {
+                // the aarch64 hashing algorithm is different from the standard algorithm, but in
+                // this case they turn out to give the same output. Beware!
+                if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
+                    fuzz_based_test(&input, config, &crc32);
+                } else {
+                    fuzz_based_test(&input, config, &other);
+                }
+            }
+            HashCalcVariant::Standard | HashCalcVariant::Roll => {
+                fuzz_based_test(&input, config, &other);
+            }
+        }
    }
 }
--- a/third_party/rust/zlib-rs/src/deflate/algorithm/fast.rs
+++ b/third_party/rust/zlib-rs/src/deflate/algorithm/fast.rs
@@ -53,7 +53,7 @@ pub fn deflate_fast(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta

            // bflush = zng_tr_tally_dist(s, s->strstart - s->match_start, match_len - STD_MIN_MATCH);
            bflush = state.tally_dist(
-                state.strstart - state.match_start as usize,
+                state.strstart - state.match_start,
                match_len - STD_MIN_MATCH,
            );

--- a/third_party/rust/zlib-rs/src/deflate/algorithm/medium.rs
+++ b/third_party/rust/zlib-rs/src/deflate/algorithm/medium.rs
@@ -80,7 +80,7 @@ pub fn deflate_medium(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockS
                    crate::deflate::longest_match::longest_match(state, hash_head);
                state.match_start = match_start;
                current_match.match_length = match_length as u16;
-                current_match.match_start = match_start;
+                current_match.match_start = match_start as u16;
                if (current_match.match_length as usize) < WANT_MIN_MATCH {
                    current_match.match_length = 1;
                }
@@ -123,7 +123,7 @@ pub fn deflate_medium(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockS
                    crate::deflate::longest_match::longest_match(state, hash_head);
                state.match_start = match_start;
                next_match.match_length = match_length as u16;
-                next_match.match_start = match_start;
+                next_match.match_start = match_start as u16;

                if next_match.match_start >= next_match.strstart {
                    /* this can happen due to some restarts */
@@ -229,8 +229,12 @@ fn insert_match(state: &mut State, mut m: Match) {
        return;
    }

-    // Insert new strings in the hash table
-    if state.lookahead >= WANT_MIN_MATCH {
+    /* Insert new strings in the hash table only if the match length
+     * is not too large. This saves time but degrades compression.
+     */
+    if (m.match_length as usize) <= 16 * state.max_insert_length()
+        && state.lookahead >= WANT_MIN_MATCH
+    {
        m.match_length -= 1; /* string at strstart already in table */
        m.strstart += 1;

--- a/third_party/rust/zlib-rs/src/deflate/algorithm/quick.rs
+++ b/third_party/rust/zlib-rs/src/deflate/algorithm/quick.rs
@@ -102,10 +102,9 @@ pub fn deflate_quick(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSt

                macro_rules! first_two_bytes {
                    ($slice:expr, $offset:expr) => {
-                        u16::from_le_bytes($slice[$offset..$offset + 2].try_into().unwrap())
-                    };
+                        u16::from_le_bytes($slice[$offset..$offset+2].try_into().unwrap())
+                    }
                }
-
                if first_two_bytes!(str_start, 0) == first_two_bytes!(match_start, 0) {
                    let mut match_len = crate::deflate::compare256::compare256_slice(
                        &str_start[2..],
@@ -119,13 +118,12 @@ pub fn deflate_quick(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSt
                        // TODO do this with a debug_assert?
                        // check_match(s, state.strstart, hash_head, match_len);

-                        // The `dist` value is a distance within the window,
-                        // and MAX_WBITS == 15 (32k), hence a u16 can always represent this value.
-                        let dist = u16::try_from(dist).unwrap();
-
-                        state
-                            .bit_writer
-                            .emit_dist_static((match_len - STD_MIN_MATCH) as u8, dist);
+                        state.bit_writer.emit_dist(
+                            StaticTreeDesc::L.static_tree,
+                            StaticTreeDesc::D.static_tree,
+                            (match_len - STD_MIN_MATCH) as u8,
+                            dist as usize,
+                        );
                        state.lookahead -= match_len;
                        state.strstart += match_len;
                        continue;
--- a/third_party/rust/zlib-rs/src/deflate/algorithm/slow.rs
+++ b/third_party/rust/zlib-rs/src/deflate/algorithm/slow.rs
@@ -49,7 +49,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
        };

        // Find the longest match, discarding those <= prev_length.
-        state.prev_match = state.match_start;
+        state.prev_match = state.match_start as u16;
        match_len = STD_MIN_MATCH - 1;
        dist = state.strstart as isize - hash_head as isize;

@@ -76,7 +76,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta

        // If there was a match at the previous step and the current
        // match is not better, output the previous match:
-        if state.prev_length as usize >= STD_MIN_MATCH && match_len <= state.prev_length as usize {
+        if state.prev_length >= STD_MIN_MATCH && match_len <= state.prev_length {
            let max_insert = state.strstart + state.lookahead - STD_MIN_MATCH;
            /* Do not insert strings in hash table beyond this. */

@@ -84,7 +84,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta

            bflush = state.tally_dist(
                state.strstart - 1 - state.prev_match as usize,
-                state.prev_length as usize - STD_MIN_MATCH,
+                state.prev_length - STD_MIN_MATCH,
            );

            /* Insert in hash table all strings up to the end of the match.
@@ -93,9 +93,9 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
             * the hash table.
             */
            state.prev_length -= 1;
-            state.lookahead -= state.prev_length as usize;
+            state.lookahead -= state.prev_length;

-            let mov_fwd = state.prev_length as usize - 1;
+            let mov_fwd = state.prev_length - 1;
            if max_insert > state.strstart {
                let insert_cnt = Ord::min(mov_fwd, max_insert - state.strstart);
                state.insert_string(state.strstart + 1, insert_cnt);
@@ -118,7 +118,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
                flush_block_only(stream, false);
            }

-            stream.state.prev_length = match_len as u16;
+            stream.state.prev_length = match_len;
            stream.state.strstart += 1;
            stream.state.lookahead -= 1;
            if stream.avail_out == 0 {
@@ -127,7 +127,7 @@ pub fn deflate_slow(stream: &mut DeflateStream, flush: DeflateFlush) -> BlockSta
        } else {
            // There is no previous match to compare with, wait for
            // the next step to decide.
-            state.prev_length = match_len as u16;
+            state.prev_length = match_len;
            state.match_available = true;
            match_available = true;
            state.strstart += 1;
--- a/third_party/rust/zlib-rs/src/deflate/hash_calc.rs
+++ b/third_party/rust/zlib-rs/src/deflate/hash_calc.rs
@@ -3,15 +3,28 @@ use crate::deflate::{State, HASH_SIZE, STD_MIN_MATCH};
 #[derive(Debug, Clone, Copy)]
 pub enum HashCalcVariant {
    Standard,
+    /// # Safety
+    ///
+    /// This variant should only be used on supported systems, checked at runtime. See
+    /// [`Crc32HashCalc`].
+    Crc32,
    Roll,
 }

 impl HashCalcVariant {
+    #[cfg(test)]
+    pub fn for_compression_level(level: usize) -> Self {
+        let max_chain_length = crate::deflate::algorithm::CONFIGURATION_TABLE[level].max_chain;
+        Self::for_max_chain_length(max_chain_length as usize)
+    }
+
    /// Use rolling hash for deflate_slow algorithm with level 9. It allows us to
    /// properly lookup different hash chains to speed up longest_match search.
-    pub fn for_max_chain_length(max_chain_length: u16) -> Self {
+    pub fn for_max_chain_length(max_chain_length: usize) -> Self {
        if max_chain_length > 1024 {
            HashCalcVariant::Roll
+        } else if Crc32HashCalc::is_supported() {
+            HashCalcVariant::Crc32
        } else {
            HashCalcVariant::Standard
        }
@@ -91,10 +104,10 @@ impl RollHashCalc {
    pub fn quick_insert_string(state: &mut State, string: usize) -> u16 {
        let val = state.window.filled()[string + Self::HASH_CALC_OFFSET] as u32;

-        state.ins_h = Self::hash_calc(state.ins_h, val);
-        state.ins_h &= Self::HASH_CALC_MASK;
+        state.ins_h = Self::hash_calc(state.ins_h as u32, val) as usize;
+        state.ins_h &= Self::HASH_CALC_MASK as usize;

-        let hm = state.ins_h as usize;
+        let hm = state.ins_h;

        let head = state.head.as_slice()[hm];
        if head != string as u16 {
@@ -111,9 +124,108 @@ impl RollHashCalc {
        for (i, val) in slice.iter().copied().enumerate() {
            let idx = string as u16 + i as u16;

-            state.ins_h = Self::hash_calc(state.ins_h, val as u32);
-            state.ins_h &= Self::HASH_CALC_MASK;
-            let hm = state.ins_h as usize;
+            state.ins_h = Self::hash_calc(state.ins_h as u32, val as u32) as usize;
+            state.ins_h &= Self::HASH_CALC_MASK as usize;
+            let hm = state.ins_h;
+
+            let head = state.head.as_slice()[hm];
+            if head != idx {
+                state.prev.as_mut_slice()[idx as usize & state.w_mask] = head;
+                state.head.as_mut_slice()[hm] = idx;
+            }
+        }
+    }
+}
+
+/// # Safety
+///
+/// The methods of this struct can only be executed if the system has platform support, otherwise
+/// the result is UB. Use [`Self::is_supported()`] to check at runtime whether the system has
+/// support before executing any methods.
+pub struct Crc32HashCalc;
+
+impl Crc32HashCalc {
+    fn is_supported() -> bool {
+        #[cfg(any(target_arch = "x86", target_arch = "x86_64"))]
+        return crate::cpu_features::is_enabled_sse42();
+
+        // NOTE: more recent versions of zlib-ng no longer use the crc instructions on aarch64
+        #[cfg(target_arch = "aarch64")]
+        return crate::cpu_features::is_enabled_crc();
+
+        #[allow(unreachable_code)]
+        false
+    }
+
+    const HASH_CALC_OFFSET: usize = 0;
+
+    const HASH_CALC_MASK: u32 = (HASH_SIZE - 1) as u32;
+
+    #[cfg(target_arch = "x86")]
+    #[target_feature(enable = "sse4.2")]
+    unsafe fn hash_calc(h: u32, val: u32) -> u32 {
+        unsafe { core::arch::x86::_mm_crc32_u32(h, val) }
+    }
+
+    #[cfg(target_arch = "x86_64")]
+    #[target_feature(enable = "sse4.2")]
+    unsafe fn hash_calc(h: u32, val: u32) -> u32 {
+        unsafe { core::arch::x86_64::_mm_crc32_u32(h, val) }
+    }
+
+    #[cfg(target_arch = "aarch64")]
+    #[target_feature(enable = "neon")]
+    unsafe fn hash_calc(h: u32, val: u32) -> u32 {
+        unsafe { crate::crc32::acle::__crc32w(h, val) }
+    }
+
+    #[cfg(not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")))]
+    unsafe fn hash_calc(_h: u32, _val: u32) -> u32 {
+        assert!(!Self::is_supported());
+        unimplemented!("there is no hardware support on this platform")
+    }
+
+    #[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
+    #[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
+    #[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
+    pub unsafe fn update_hash(h: u32, val: u32) -> u32 {
+        (unsafe { Self::hash_calc(h, val) }) & Self::HASH_CALC_MASK
+    }
+
+    #[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
+    #[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
+    #[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
+    pub unsafe fn quick_insert_string(state: &mut State, string: usize) -> u16 {
+        let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
+        let val = u32::from_le_bytes(slice[..4].try_into().unwrap());
+
+        let hm = unsafe { Self::update_hash(0, val) } as usize;
+
+        let head = state.head.as_slice()[hm];
+        if head != string as u16 {
+            state.prev.as_mut_slice()[string & state.w_mask] = head;
+            state.head.as_mut_slice()[hm] = string as u16;
+        }
+
+        head
+    }
+
+    #[cfg_attr(target_arch = "aarch64", target_feature(enable = "neon"))]
+    #[cfg_attr(target_arch = "x86", target_feature(enable = "sse4.2"))]
+    #[cfg_attr(target_arch = "x86_64", target_feature(enable = "sse4.2"))]
+    pub unsafe fn insert_string(state: &mut State, string: usize, count: usize) {
+        let slice = &state.window.filled()[string + Self::HASH_CALC_OFFSET..];
+
+        // it can happen that insufficient bytes are initialized
+        // .take(count) generates worse assembly
+        let slice = &slice[..Ord::min(slice.len(), count + 3)];
+
+        for (i, w) in slice.windows(4).enumerate() {
+            let idx = string as u16 + i as u16;
+
+            let val = u32::from_le_bytes(w.try_into().unwrap());
+
+            let hm = unsafe { Self::update_hash(0, val) } as usize;

            let head = state.head.as_slice()[hm];
            if head != idx {
@@ -128,6 +240,48 @@ impl RollHashCalc {
 mod tests {
    use super::*;

+    #[test]
+    #[cfg_attr(
+        not(any(target_arch = "x86", target_arch = "x86_64", target_arch = "aarch64")),
+        ignore = "no crc32 hardware support on this platform"
+    )]
+    fn crc32_hash_calc() {
+        if !Crc32HashCalc::is_supported() {
+            return;
+        }
+
+        unsafe {
+            if cfg!(target_arch = "x86") || cfg!(target_arch = "x86_64") {
+                assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 1452438466);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 435552201);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2423125009);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 170926112), 500028708);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 537538592), 3694129053);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 538970672), 373925026);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 538976266), 4149335727);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 538976288), 1767342659);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 4090502627);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1744703325);
+            } else {
+                assert_eq!(Crc32HashCalc::hash_calc(0, 807411760), 2067507791);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 540024864), 2086141925);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 538980384), 716394180);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 775430176), 1396070634);
+                assert_eq!(Crc32HashCalc::hash_calc(0, 941629472), 637105634);
+            }
+        }
+    }
+
    #[test]
    fn roll_hash_calc() {
        assert_eq!(RollHashCalc::hash_calc(2565, 93), 82173);
--- a/third_party/rust/zlib-rs/src/deflate/longest_match.rs
+++ b/third_party/rust/zlib-rs/src/deflate/longest_match.rs
@@ -1,21 +1,37 @@
-use crate::deflate::{Pos, State, MIN_LOOKAHEAD, STD_MAX_MATCH, STD_MIN_MATCH};
+use crate::deflate::{State, MIN_LOOKAHEAD, STD_MAX_MATCH, STD_MIN_MATCH};
+
+type Pos = u16;

 const EARLY_EXIT_TRIGGER_LEVEL: i8 = 5;

-/// Find the (length, offset) in the window of the longest match for the string
-/// at offset cur_match
-pub fn longest_match(state: &crate::deflate::State, cur_match: u16) -> (usize, u16) {
+const UNALIGNED_OK: bool = cfg!(any(
+    target_arch = "wasm32",
+    target_arch = "x86",
+    target_arch = "x86_64",
+    target_arch = "arm",
+    target_arch = "aarch64",
+    target_arch = "powerpc64",
+));
+
+const UNALIGNED64_OK: bool = cfg!(any(
+    target_arch = "wasm32",
+    target_arch = "x86_64",
+    target_arch = "aarch64",
+    target_arch = "powerpc64",
+));
+
+pub fn longest_match(state: &crate::deflate::State, cur_match: u16) -> (usize, usize) {
    longest_match_help::<false>(state, cur_match)
 }

-pub fn longest_match_slow(state: &crate::deflate::State, cur_match: u16) -> (usize, u16) {
+pub fn longest_match_slow(state: &crate::deflate::State, cur_match: u16) -> (usize, usize) {
    longest_match_help::<true>(state, cur_match)
 }

 fn longest_match_help<const SLOW: bool>(
    state: &crate::deflate::State,
    mut cur_match: u16,
-) -> (usize, u16) {
+) -> (usize, usize) {
    let mut match_start = state.match_start;

    let strstart = state.strstart;
@@ -26,12 +42,15 @@ fn longest_match_help<const SLOW: bool>(
    let limit_base: Pos;
    let early_exit: bool;

-    let mut chain_length: u16;
+    let mut chain_length: usize;
    let mut best_len: usize;

    let lookahead = state.lookahead;
    let mut match_offset = 0;

+    let mut scan_start = [0u8; 8];
+    let mut scan_end = [0u8; 8];
+
    macro_rules! goto_next_in_chain {
        () => {
            chain_length -= 1;
@@ -50,28 +69,38 @@ fn longest_match_help<const SLOW: bool>(
    // The code is optimized for STD_MAX_MATCH-2 multiple of 16.
    assert_eq!(STD_MAX_MATCH, 258, "Code too clever");

-    // length of the previous match (if any), hence <= STD_MAX_MATCH
    best_len = if state.prev_length > 0 {
-        state.prev_length as usize
+        state.prev_length
    } else {
        STD_MIN_MATCH - 1
    };

    // Calculate read offset which should only extend an extra byte to find the next best match length.
    let mut offset = best_len - 1;
-    if best_len >= core::mem::size_of::<u32>() {
+    if best_len >= core::mem::size_of::<u32>() && UNALIGNED_OK {
        offset -= 2;
-        if best_len >= core::mem::size_of::<u64>() {
+        if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
            offset -= 4;
        }
    }

+    if UNALIGNED64_OK {
+        scan_start.copy_from_slice(&scan[..core::mem::size_of::<u64>()]);
+        scan_end.copy_from_slice(&scan[offset..][..core::mem::size_of::<u64>()]);
+    } else if UNALIGNED_OK {
+        scan_start[..4].copy_from_slice(&scan[..core::mem::size_of::<u32>()]);
+        scan_end[..4].copy_from_slice(&scan[offset..][..core::mem::size_of::<u32>()]);
+    } else {
+        scan_start[..2].copy_from_slice(&scan[..core::mem::size_of::<u16>()]);
+        scan_end[..2].copy_from_slice(&scan[offset..][..core::mem::size_of::<u16>()]);
+    }
+
    let mut mbase_start = window.as_ptr();
    let mut mbase_end = window[offset..].as_ptr();

    // Don't waste too much time by following a chain if we already have a good match
    chain_length = state.max_chain_length;
-    if best_len >= state.good_match as usize {
+    if best_len >= state.good_match {
        chain_length >>= 2;
    }
    let nice_match = state.nice_match;
@@ -127,9 +156,6 @@ fn longest_match_help<const SLOW: bool>(
        early_exit = state.level < EARLY_EXIT_TRIGGER_LEVEL;
    }

-    let scan_start = window[strstart..].as_ptr();
-    let mut scan_end = window[strstart + offset..].as_ptr();
-
    assert!(
        strstart <= state.window_size.saturating_sub(MIN_LOOKAHEAD),
        "need lookahead"
@@ -178,47 +204,47 @@ fn longest_match_help<const SLOW: bool>(

        // first, do a quick check on the start and end bytes. Go to the next item in the chain if
        // these bytes don't match.
-        // SAFETY: we read up to 8 bytes in this block.
-        // Note that scan_start >= mbase_start and scan_end >= mbase_end.
-        // the surrounding loop breaks before cur_match gets past strstart, which is bounded by
-        // `window_size - 258 + 3 + 1` (`window_size - MIN_LOOKAHEAD`).
-        //
-        // With 262 bytes of space at the end, and 8 byte reads of scan_start is always in-bounds.
-        //
-        // scan_end is a bit trickier: it reads at a bounded offset from scan_start:
-        //
-        // - >= 8: scan_end is bounded by `258 - (4 + 2 + 1)`, so an 8-byte read is in-bounds
-        // - >= 4: scan_end is bounded by `258 - (2 + 1)`, so a 4-byte read is in-bounds
-        // - >= 2: scan_end is bounded by `258 - 1`, so a 2-byte read is in-bounds
-        let mut len = 0;
+        // SAFETY: we read up to 8 bytes in this block. scan_start and start_end are 8 byte arrays.
+        // this loop also breaks before cur_match gets past strstart, which is bounded by
+        // window_size - MIN_LOOKAHEAD, so 8 byte reads of mbase_end/start are in-bounds.
        unsafe {
-            if best_len < core::mem::size_of::<u64>() {
-                let scan_val = u64::from_ne_bytes(
-                    core::slice::from_raw_parts(scan_start, 8).try_into().unwrap());
-                loop {
-                    let bs = mbase_start.wrapping_add(cur_match as usize);
-                    let match_val = u64::from_ne_bytes(
-                        core::slice::from_raw_parts(bs, 8).try_into().unwrap());
-                    let cmp = scan_val ^ match_val;
-                    if cmp == 0 {
-                        // The first 8 bytes all matched. Additional scanning will be needed
-                        // (the compare256 call below) to determine the full match length.
-                        break;
+            let scan_start = scan_start.as_ptr();
+            let scan_end = scan_end.as_ptr();
+
+            if UNALIGNED_OK {
+                if best_len < core::mem::size_of::<u32>() {
+                    loop {
+                        if is_match::<2>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
+                            break;
+                        }
+
+                        goto_next_in_chain!();
                    }
-                    // Compute the number of leading bytes that match.
-                    let cmp_len = cmp.to_le().trailing_zeros() as usize / 8;
-                    if cmp_len > best_len {
-                        // The match is fully contained within the 8 bytes just compared,
-                        // so we know the match length without needing to do the more
-                        // expensive compare256 operation.
-                        len = cmp_len;
-                        break;
+                } else if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
+                    loop {
+                        if is_match::<8>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
+                            break;
+                        }
+
+                        goto_next_in_chain!();
+                    }
+                } else {
+                    loop {
+                        if is_match::<4>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
+                            break;
+                        }
+
+                        goto_next_in_chain!();
                    }
-                    goto_next_in_chain!();
                }
            } else {
                loop {
-                    if is_match::<8>(cur_match, mbase_start, mbase_end, scan_start, scan_end) {
+                    if memcmp_n_ptr::<2>(mbase_end.wrapping_add(cur_match as usize), scan_end)
+                        && memcmp_n_ptr::<2>(
+                            mbase_start.wrapping_add(cur_match as usize),
+                            scan.as_ptr(),
+                        )
+                    {
                        break;
                    }

@@ -228,17 +254,15 @@ fn longest_match_help<const SLOW: bool>(
        }

        // we know that there is at least some match. Now count how many bytes really match
-        if len == 0 {
-            len = {
-                // SAFETY: cur_match is bounded by window_size - MIN_LOOKAHEAD, where MIN_LOOKAHEAD
-                // is 258 + 3 + 1, so 258-byte reads of mbase_start are in-bounds.
-                let src1 = unsafe {
-                    core::slice::from_raw_parts(mbase_start.wrapping_add(cur_match as usize + 2), 256)
-                };
-
-                crate::deflate::compare256::compare256_slice(&scan[2..], src1) + 2
+        let len = {
+            // SAFETY: cur_match is bounded by window_size - MIN_LOOKAHEAD, where MIN_LOOKAHEAD
+            // is 256 + 2, so 258-byte reads of mbase_start are in-bounds.
+            let src1 = unsafe {
+                core::slice::from_raw_parts(mbase_start.wrapping_add(cur_match as usize + 2), 256)
            };
-        }
+
+            crate::deflate::compare256::compare256_slice(&scan[2..], src1) + 2
+        };

        assert!(
            scan.as_ptr() as usize + len <= window.as_ptr() as usize + (state.window_size - 1),
@@ -246,29 +270,35 @@ fn longest_match_help<const SLOW: bool>(
        );

        if len > best_len {
-            match_start = cur_match - match_offset;
+            match_start = (cur_match - match_offset) as usize;

            /* Do not look for matches beyond the end of the input. */
            if len > lookahead {
                return (lookahead, match_start);
            }
            best_len = len;
-            if best_len >= nice_match as usize {
+            if best_len >= nice_match {
                return (best_len, match_start);
            }

            offset = best_len - 1;
-            if best_len >= core::mem::size_of::<u32>() {
+            if best_len >= core::mem::size_of::<u32>() && UNALIGNED_OK {
                offset -= 2;
-                if best_len >= core::mem::size_of::<u64>() {
+                if best_len >= core::mem::size_of::<u64>() && UNALIGNED64_OK {
                    offset -= 4;
                }
            }

-            scan_end = window[strstart + offset..].as_ptr();
+            if UNALIGNED64_OK {
+                scan_end.copy_from_slice(&scan[offset..][..core::mem::size_of::<u64>()]);
+            } else if UNALIGNED_OK {
+                scan_end[..4].copy_from_slice(&scan[offset..][..core::mem::size_of::<u32>()]);
+            } else {
+                scan_end[..2].copy_from_slice(&scan[offset..][..core::mem::size_of::<u16>()]);
+            }

            // Look for a better string offset
-            if SLOW && len > STD_MIN_MATCH && match_start as usize + len < strstart {
+            if SLOW && len > STD_MIN_MATCH && match_start + len < strstart {
                let mut pos: Pos;
                // uint32_t i, hash;
                // unsigned char *scan_endstr;
@@ -335,6 +365,6 @@ fn longest_match_help<const SLOW: bool>(
    (best_len, match_start)
 }

-fn break_matching(state: &State, best_len: usize, match_start: u16) -> (usize, u16) {
+fn break_matching(state: &State, best_len: usize, match_start: usize) -> (usize, usize) {
    (Ord::min(best_len, state.lookahead), match_start)
 }
--- a/third_party/rust/zlib-rs/src/deflate/pending.rs
+++ b/third_party/rust/zlib-rs/src/deflate/pending.rs
@@ -80,7 +80,7 @@ impl<'a> Pending<'a> {
    pub(crate) fn new_in(alloc: &Allocator<'a>, len: usize) -> Option<Self> {
        let ptr = alloc.allocate_slice_raw::<MaybeUninit<u8>>(len)?;
        // SAFETY: freshly allocated buffer
-        let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) };
+        let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) };

        Some(Self {
            buf,
--- a/third_party/rust/zlib-rs/src/deflate/trees_tbl.rs
+++ b/third_party/rust/zlib-rs/src/deflate/trees_tbl.rs
@@ -70,27 +70,6 @@ pub const STATIC_LTREE: [Value; L_CODES + 2] = [
    h(163,8), h( 99,8), h(227,8)
 ];

-/// Precomputes the `Values` generated by the `encode_len` function for all `u8` input values
-#[rustfmt::skip]
-pub const STATIC_LTREE_ENCODINGS: [Value; 256] = {
-    let mut table = [Value::new(0, 0); 256];
-
-    let mut lc = 0;
-    while lc < table.len() {
-        let (code, len) = super::encode_len(&STATIC_LTREE, lc as u8);
-
-        // assert that there is no precision loss
-        assert!(code as u16 as u64 == code);
-        assert!(len as u16 as usize == len);
-
-        table[lc] = Value::new(code as u16, len as u16);
-
-        lc += 1;
-    }
-
-    table
-};
-
 #[rustfmt::skip]
 pub const STATIC_DTREE: [Value; D_CODES] = [
    h( 0,5), h(16,5), h( 8,5), h(24,5), h( 4,5),
--- a/third_party/rust/zlib-rs/src/deflate/window.rs
+++ b/third_party/rust/zlib-rs/src/deflate/window.rs
@@ -1,22 +1,33 @@
 use crate::{allocate::Allocator, weak_slice::WeakSliceMut};
+use core::mem::MaybeUninit;

 #[derive(Debug)]
 pub struct Window<'a> {
    // the full window allocation. This is longer than w_size so that operations don't need to
    // perform bounds checks.
-    buf: WeakSliceMut<'a, u8>,
+    buf: WeakSliceMut<'a, MaybeUninit<u8>>,
+
+    // number of initialized bytes
+    filled: usize,

    window_bits: usize,
+
+    high_water: usize,
 }

 impl<'a> Window<'a> {
    pub fn new_in(alloc: &Allocator<'a>, window_bits: usize) -> Option<Self> {
        let len = 2 * ((1 << window_bits) + Self::padding());
-        let ptr = alloc.allocate_zeroed(len)?;
+        let ptr = alloc.allocate_slice_raw::<MaybeUninit<u8>>(len)?;
        // SAFETY: freshly allocated buffer
-        let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) };
+        let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) };

-        Some(Self { buf, window_bits })
+        Some(Self {
+            buf,
+            filled: 0,
+            window_bits,
+            high_water: 0,
+        })
    }

    pub fn clone_in(&self, alloc: &Allocator<'a>) -> Option<Self> {
@@ -26,6 +37,8 @@ impl<'a> Window<'a> {
            .buf
            .as_mut_slice()
            .copy_from_slice(self.buf.as_slice());
+        clone.filled = self.filled;
+        clone.high_water = self.high_water;

        Some(clone)
    }
@@ -48,14 +61,14 @@ impl<'a> Window<'a> {
    #[inline]
    pub fn filled(&self) -> &[u8] {
        // SAFETY: `self.buf` has been initialized for at least `filled` elements
-        unsafe { core::slice::from_raw_parts(self.buf.as_ptr().cast(), self.buf.len()) }
+        unsafe { core::slice::from_raw_parts(self.buf.as_ptr().cast(), self.filled) }
    }

    /// Returns a mutable reference to the filled portion of the buffer.
    #[inline]
    pub fn filled_mut(&mut self) -> &mut [u8] {
        // SAFETY: `self.buf` has been initialized for at least `filled` elements
-        unsafe { core::slice::from_raw_parts_mut(self.buf.as_mut_ptr().cast(), self.buf.len()) }
+        unsafe { core::slice::from_raw_parts_mut(self.buf.as_mut_ptr().cast(), self.filled) }
    }

    /// # Safety
@@ -64,8 +77,66 @@ impl<'a> Window<'a> {
    pub unsafe fn copy_and_initialize(&mut self, range: core::ops::Range<usize>, src: *const u8) {
        let (start, end) = (range.start, range.end);

-        let dst = self.buf.as_mut_slice()[range].as_mut_ptr();
+        let dst = self.buf.as_mut_slice()[range].as_mut_ptr() as *mut u8;
        unsafe { core::ptr::copy_nonoverlapping(src, dst, end - start) };
+
+        if start >= self.filled {
+            self.filled = Ord::max(self.filled, end);
+        }
+
+        self.high_water = Ord::max(self.high_water, self.filled);
+    }
+
+    // this library has many functions that operated in a chunked fashion on memory. For
+    // performance, we want to minimize bounds checks. Therefore we reserve initialize some extra
+    // memory at the end of the window so that chunked operations can use the whole buffer. If they
+    // go slightly over `self.capacity` that's okay, we account for that here by making sure the
+    // memory there is initialized!
+    pub fn initialize_out_of_bounds(&mut self) {
+        const WIN_INIT: usize = crate::deflate::STD_MAX_MATCH;
+
+        // If the WIN_INIT bytes after the end of the current data have never been
+        // written, then zero those bytes in order to avoid memory check reports of
+        // the use of uninitialized (or uninitialised as Julian writes) bytes by
+        // the longest match routines.  Update the high water mark for the next
+        // time through here.  WIN_INIT is set to STD_MAX_MATCH since the longest match
+        // routines allow scanning to strstart + STD_MAX_MATCH, ignoring lookahead.
+        if self.high_water < self.capacity() {
+            let curr = self.filled().len();
+
+            if self.high_water < curr {
+                // Previous high water mark below current data -- zero WIN_INIT
+                // bytes or up to end of window, whichever is less.
+                let init = Ord::min(self.capacity() - curr, WIN_INIT);
+
+                self.buf.as_mut_slice()[curr..][..init].fill(MaybeUninit::new(0));
+
+                self.high_water = curr + init;
+
+                self.filled += init;
+            } else if self.high_water < curr + WIN_INIT {
+                // High water mark at or above current data, but below current data
+                // plus WIN_INIT -- zero out to current data plus WIN_INIT, or up
+                // to end of window, whichever is less.
+                let init = Ord::min(
+                    curr + WIN_INIT - self.high_water,
+                    self.capacity() - self.high_water,
+                );
+
+                self.buf.as_mut_slice()[self.high_water..][..init].fill(MaybeUninit::new(0));
+
+                self.high_water += init;
+                self.filled += init;
+            }
+        }
+    }
+
+    pub fn initialize_at_least(&mut self, at_least: usize) {
+        let end = at_least.clamp(self.high_water, self.buf.len());
+        self.buf.as_mut_slice()[self.high_water..end].fill(MaybeUninit::new(0));
+
+        self.high_water = end;
+        self.filled = end;
    }

    // padding required so that SIMD operations going out-of-bounds are not a problem
--- a/third_party/rust/zlib-rs/src/inflate.rs
+++ b/third_party/rust/zlib-rs/src/inflate.rs
@@ -14,7 +14,6 @@ mod writer;

 use crate::allocate::Allocator;
 use crate::c_api::internal_state;
-use crate::cpu_features::CpuFeatures;
 use crate::{
    adler32::adler32,
    c_api::{gz_header, z_checksum, z_size, z_stream, Z_DEFLATED},
@@ -1857,28 +1856,7 @@ impl State<'_> {
    }
 }

-fn inflate_fast_help(state: &mut State, start: usize) {
-    #[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-    if crate::cpu_features::is_enabled_avx2() {
-        // SAFETY: we've verified the target features
-        return unsafe { inflate_fast_help_avx2(state, start) };
-    }
-
-    inflate_fast_help_vanilla(state, start);
-}
-
-#[cfg(any(target_arch = "x86_64", target_arch = "x86"))]
-#[target_feature(enable = "avx2")]
-unsafe fn inflate_fast_help_avx2(state: &mut State, start: usize) {
-    inflate_fast_help_impl::<{ CpuFeatures::AVX2 }>(state, start);
-}
-
-fn inflate_fast_help_vanilla(state: &mut State, start: usize) {
-    inflate_fast_help_impl::<{ CpuFeatures::NONE }>(state, start);
-}
-
-#[inline(always)]
-fn inflate_fast_help_impl<const FEATURES: usize>(state: &mut State, _start: usize) {
+fn inflate_fast_help(state: &mut State, _start: usize) {
    let mut bit_reader = BitReader::new(&[]);
    core::mem::swap(&mut bit_reader, &mut state.bit_reader);

@@ -2010,32 +1988,23 @@ fn inflate_fast_help_impl<const FEATURES: usize>(state: &mut State, _start: usiz
                                    // window, and part of it has wrapped around to the start. Copy
                                    // the end section here, the start section will be copied below.
                                    len -= op as u16;
-                                    writer.extend_from_window_with_features::<FEATURES>(
-                                        &state.window,
-                                        from..from + op,
-                                    );
+                                    writer.extend_from_window(&state.window, from..from + op);
                                    from = 0;
                                    op = window_next;
                                }
                            }

                            let copy = Ord::min(op, len as usize);
-                            writer.extend_from_window_with_features::<FEATURES>(
-                                &state.window,
-                                from..from + copy,
-                            );
+                            writer.extend_from_window(&state.window, from..from + copy);

                            if op < len as usize {
                                // here we need some bytes from the output itself
-                                writer.copy_match_with_features::<FEATURES>(
-                                    dist as usize,
-                                    len as usize - op,
-                                );
+                                writer.copy_match(dist as usize, len as usize - op);
                            }
                        } else if extra_safe {
                            todo!()
                        } else {
-                            writer.copy_match_with_features::<FEATURES>(dist as usize, len as usize)
+                            writer.copy_match(dist as usize, len as usize)
                        }
                    } else if (op & 64) == 0 {
                        // 2nd level distance code
@@ -2152,9 +2121,8 @@ pub fn init(stream: &mut z_stream, config: InflateConfig) -> ReturnCode {
        return ReturnCode::MemError;
    };

-    // FIXME: write is stable for NonNull since 1.80.0
-    unsafe { state_allocation.as_ptr().write(state) };
-    stream.state = state_allocation.as_ptr() as *mut internal_state;
+    unsafe { state_allocation.write(state) };
+    stream.state = state_allocation as *mut internal_state;

    // SAFETY: we've correctly initialized the stream to be an InflateStream
    let ret = if let Some(stream) = unsafe { InflateStream::from_stream_mut(stream) } {
@@ -2502,7 +2470,7 @@ pub unsafe fn copy<'a>(
    if !state.window.is_empty() {
        let Some(window) = state.window.clone_in(&source.alloc) else {
            // SAFETY: state_allocation is not used again.
-            source.alloc.deallocate(state_allocation.as_ptr(), 1);
+            source.alloc.deallocate(state_allocation, 1);
            return ReturnCode::MemError;
        };

@@ -2510,11 +2478,11 @@ pub unsafe fn copy<'a>(
    }

    // write the cloned state into state_ptr
-    unsafe { state_allocation.as_ptr().write(copy) }; // FIXME: write is stable for NonNull since 1.80.0
+    unsafe { state_allocation.write(copy) };

    // insert the state_ptr into `dest`
    let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state) };
-    unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation.as_ptr()) };
+    unsafe { core::ptr::write(field_ptr as *mut *mut State, state_allocation) };

    // update the writer; it cannot be cloned so we need to use some shennanigans
    let field_ptr = unsafe { core::ptr::addr_of_mut!((*dest.as_mut_ptr()).state.writer) };
--- a/third_party/rust/zlib-rs/src/inflate/window.rs
+++ b/third_party/rust/zlib-rs/src/inflate/window.rs
@@ -147,10 +147,14 @@ impl<'a> Window<'a> {

    pub fn new_in(alloc: &Allocator<'a>, window_bits: usize) -> Option<Self> {
        let len = (1 << window_bits) + Self::padding();
-        let ptr = alloc.allocate_zeroed(len)?;
+        let ptr = alloc.allocate_zeroed(len);
+
+        if ptr.is_null() {
+            return None;
+        }

        Some(Self {
-            buf: unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) },
+            buf: unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) },
            have: 0,
            next: 0,
        })
@@ -158,10 +162,14 @@ impl<'a> Window<'a> {

    pub fn clone_in(&self, alloc: &Allocator<'a>) -> Option<Self> {
        let len = self.buf.len();
-        let ptr = alloc.allocate_zeroed(len)?;
+        let ptr = alloc.allocate_zeroed(len);
+
+        if ptr.is_null() {
+            return None;
+        }

        Some(Self {
-            buf: unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) },
+            buf: unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) },
            have: self.have,
            next: self.next,
        })
--- a/third_party/rust/zlib-rs/src/inflate/writer.rs
+++ b/third_party/rust/zlib-rs/src/inflate/writer.rs
@@ -2,7 +2,6 @@ use core::fmt;
 use core::mem::MaybeUninit;
 use core::ops::Range;

-use crate::cpu_features::CpuFeatures;
 use crate::weak_slice::WeakSliceMut;

 pub struct Writer<'a> {
@@ -78,34 +77,10 @@ impl<'a> Writer<'a> {

    #[inline(always)]
    pub fn extend_from_window(&mut self, window: &super::window::Window, range: Range<usize>) {
-        self.extend_from_window_with_features::<{ CpuFeatures::NONE }>(window, range)
-    }
-
-    pub fn extend_from_window_with_features<const FEATURES: usize>(
-        &mut self,
-        window: &super::window::Window,
-        range: Range<usize>,
-    ) {
-        match FEATURES {
-            #[cfg(target_arch = "x86_64")]
-            CpuFeatures::AVX2 => {
-                self.extend_from_window_help::<core::arch::x86_64::__m256i>(window, range)
-            }
-            _ => self.extend_from_window_runtime_dispatch(window, range),
+        #[cfg(target_arch = "x86_64")]
+        if crate::cpu_features::is_enabled_avx512() {
+            return self.extend_from_window_help::<core::arch::x86_64::__m512i>(window, range);
        }
-    }
-
-    fn extend_from_window_runtime_dispatch(
-        &mut self,
-        window: &super::window::Window,
-        range: Range<usize>,
-    ) {
-        // NOTE: the dynamic check for avx512 makes avx2 slower. Measure this carefully before re-enabling
-        //
-        //        #[cfg(target_arch = "x86_64")]
-        //        if crate::cpu_features::is_enabled_avx512() {
-        //            return self.extend_from_window_help::<core::arch::x86_64::__m512i>(window, range);
-        //        }

        #[cfg(target_arch = "x86_64")]
        if crate::cpu_features::is_enabled_avx2() {
@@ -163,31 +138,10 @@ impl<'a> Writer<'a> {

    #[inline(always)]
    pub fn copy_match(&mut self, offset_from_end: usize, length: usize) {
-        self.copy_match_with_features::<{ CpuFeatures::NONE }>(offset_from_end, length)
-    }
-
-    #[inline(always)]
-    pub fn copy_match_with_features<const FEATURES: usize>(
-        &mut self,
-        offset_from_end: usize,
-        length: usize,
-    ) {
-        match FEATURES {
-            #[cfg(target_arch = "x86_64")]
-            CpuFeatures::AVX2 => {
-                self.copy_match_help::<core::arch::x86_64::__m256i>(offset_from_end, length)
-            }
-            _ => self.copy_match_runtime_dispatch(offset_from_end, length),
+        #[cfg(target_arch = "x86_64")]
+        if crate::cpu_features::is_enabled_avx512() {
+            return self.copy_match_help::<core::arch::x86_64::__m512i>(offset_from_end, length);
        }
-    }
-
-    fn copy_match_runtime_dispatch(&mut self, offset_from_end: usize, length: usize) {
-        // NOTE: the dynamic check for avx512 makes avx2 slower. Measure this carefully before re-enabling
-        //
-        //        #[cfg(target_arch = "x86_64")]
-        //        if crate::cpu_features::is_enabled_avx512() {
-        //            return self.copy_match_help::<core::arch::x86_64::__m512i>(offset_from_end, length);
-        //        }

        #[cfg(target_arch = "x86_64")]
        if crate::cpu_features::is_enabled_avx2() {
--- a/third_party/rust/zlib-rs/src/read_buf.rs
+++ b/third_party/rust/zlib-rs/src/read_buf.rs
@@ -71,10 +71,14 @@ impl<'a> ReadBuf<'a> {
    }

    pub(crate) fn new_in(alloc: &Allocator<'a>, len: usize) -> Option<Self> {
-        let ptr = alloc.allocate_zeroed(len)?;
+        let ptr = alloc.allocate_zeroed(len);
+
+        if ptr.is_null() {
+            return None;
+        }

        // safety: all elements are now initialized
-        let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr.as_ptr(), len) };
+        let buf = unsafe { WeakSliceMut::from_raw_parts_mut(ptr, len) };

        Some(Self { buf, filled: 0 })
    }