Bug 1903829 - Update zip crate and use new features for omnijar reading r=gsvelto,webdriver-reviewers,glandium,supply-chain-reviewers,whimboo
Replaces the patched zip crate that was previously used. Differential Revision: https://phabricator.services.mozilla.com/D214617
This commit is contained in:
38
Cargo.lock
generated
38
Cargo.lock
generated
@@ -132,9 +132,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
version = "1.3.0"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e"
|
||||
checksum = "7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"
|
||||
dependencies = [
|
||||
"derive_arbitrary",
|
||||
]
|
||||
@@ -1044,9 +1044,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.3.2"
|
||||
version = "1.4.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
|
||||
checksum = "a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
@@ -1097,12 +1097,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.14"
|
||||
version = "0.8.20"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
checksum = "22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"
|
||||
|
||||
[[package]]
|
||||
name = "crypto-common"
|
||||
@@ -1360,9 +1357,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "derive_arbitrary"
|
||||
version = "1.3.1"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "53e0efad4403bfc52dc201159c4b842a246a14b98c64b55dfd0f2d89729dfeb8"
|
||||
checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -1836,9 +1833,9 @@ checksum = "cda653ca797810c02f7ca4b804b40b8b95ae046eb989d356bce17919a8c25499"
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.26"
|
||||
version = "1.0.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "3b9429470923de8e8cbd4d2dc513535400b4b3fef0319fb5c4e1f520a7bef743"
|
||||
checksum = "5f54427cfd1c7829e2a139fcefea601bf088ebca651d2bf53ebc600eac295dae"
|
||||
dependencies = [
|
||||
"crc32fast",
|
||||
"miniz_oxide",
|
||||
@@ -2175,6 +2172,7 @@ dependencies = [
|
||||
"base64 0.21.3",
|
||||
"chrono",
|
||||
"clap",
|
||||
"flate2",
|
||||
"hyper",
|
||||
"icu_segmenter",
|
||||
"lazy_static",
|
||||
@@ -3570,9 +3568,9 @@ checksum = "df39d232f5c40b0891c10216992c2f250c054105cb1e56f0fc9032db6203ecc1"
|
||||
|
||||
[[package]]
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
version = "2.7.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"
|
||||
checksum = "78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"
|
||||
|
||||
[[package]]
|
||||
name = "memmap2"
|
||||
@@ -7119,10 +7117,16 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "zip"
|
||||
version = "0.6.4"
|
||||
version = "2.1.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "775a2b471036342aa69bc5a602bc889cb0a06cda00477d0c69566757d5553d39"
|
||||
dependencies = [
|
||||
"byteorder",
|
||||
"arbitrary",
|
||||
"crc32fast",
|
||||
"crossbeam-utils",
|
||||
"displaydoc",
|
||||
"flate2",
|
||||
"indexmap 2.2.6",
|
||||
"memchr",
|
||||
"thiserror",
|
||||
]
|
||||
|
||||
@@ -186,10 +186,6 @@ web-sys = { path = "build/rust/dummy-web/web-sys" }
|
||||
# Overrides to allow easier use of common internal crates.
|
||||
moz_asserts = { path = "mozglue/static/rust/moz_asserts" }
|
||||
|
||||
|
||||
# Patched version of zip 0.6.4 to allow for reading omnijars.
|
||||
zip = { path = "third_party/rust/zip" }
|
||||
|
||||
# Patch `rure` to disable building the cdylib and staticlib targets
|
||||
# Cargo has no way to disable building targets your dependencies provide which
|
||||
# you don't depend on, and linking the cdylib breaks during instrumentation
|
||||
|
||||
@@ -58,7 +58,7 @@ unicode-bidi = { version = "0.3", features = ["smallvec"], optional = true }
|
||||
url = { version = "2", features = ["serde"], optional = true }
|
||||
uuid = { version = "1", features = ["serde", "v4"], optional = true }
|
||||
yoke = { version = "0.7", features = ["derive"], optional = true }
|
||||
zip = { version = "0.6", default-features = false, features = ["deflate", "flate2"], optional = true }
|
||||
zip = { version = "2.1.2", default-features = false, features = ["deflate-flate2", "flate2"], optional = true }
|
||||
zerofrom = { version = "0.1", default-features = false, features = ["alloc", "derive"], optional = true }
|
||||
zerovec = { version = "0.10", default-features = false, features = ["derive", "yoke"], optional = true }
|
||||
|
||||
|
||||
@@ -1139,6 +1139,12 @@ who = "Gabriele Svelto <gsvelto@mozilla.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "0.6.0 -> 0.6.1"
|
||||
|
||||
[[audits.crc32fast]]
|
||||
who = "Alex Franchuk <afranchuk@mozilla.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "1.3.2 -> 1.4.2"
|
||||
notes = "Minor, safe changes."
|
||||
|
||||
[[audits.crossbeam-channel]]
|
||||
who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
@@ -1179,6 +1185,12 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "0.8.11 -> 0.8.14"
|
||||
|
||||
[[audits.crossbeam-utils]]
|
||||
who = "Alex Franchuk <afranchuk@mozilla.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "0.8.19 -> 0.8.20"
|
||||
notes = "Minor changes."
|
||||
|
||||
[[audits.crypto-common]]
|
||||
who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
@@ -1826,6 +1838,12 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "1.0.24 -> 1.0.25"
|
||||
|
||||
[[audits.flate2]]
|
||||
who = "Alex Franchuk <afranchuk@mozilla.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "1.0.28 -> 1.0.30"
|
||||
notes = "Some new unsafe code, however it has been verified and there are unit tests as well."
|
||||
|
||||
[[audits.fluent]]
|
||||
who = "Zibi Braniecki <zibi@unicode.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
@@ -5275,6 +5293,18 @@ who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-run"
|
||||
delta = "0.6.3 -> 0.6.4"
|
||||
|
||||
[[audits.zip]]
|
||||
who = "Alex Franchuk <afranchuk@mozilla.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "0.6.4 -> 2.1.3"
|
||||
notes = """
|
||||
There's a lot of new code and features, however it's almost entirely very
|
||||
straightforward and safe. All new dependencies are appropriate.
|
||||
`FixedSizeBlock::interpret` could be unsound if implemented on a
|
||||
non-1-byte-aligned type, however right now that is not the case
|
||||
(submitted https://github.com/zip-rs/zip2/issues/198).
|
||||
"""
|
||||
|
||||
[[trusted.aho-corasick]]
|
||||
criteria = "safe-to-deploy"
|
||||
user-id = 189 # Andrew Gallant (BurntSushi)
|
||||
@@ -5447,7 +5477,7 @@ end = "2024-05-05"
|
||||
criteria = "safe-to-deploy"
|
||||
user-id = 189 # Andrew Gallant (BurntSushi)
|
||||
start = "2019-07-07"
|
||||
end = "2024-05-03"
|
||||
end = "2025-06-20"
|
||||
|
||||
[[trusted.mime]]
|
||||
criteria = "safe-to-deploy"
|
||||
|
||||
@@ -239,10 +239,6 @@ notes = "Local override of the crates.io crate that uses a non-vendored local co
|
||||
[policy.wr_malloc_size_of]
|
||||
audit-as-crates-io = false
|
||||
|
||||
[policy.zip]
|
||||
audit-as-crates-io = true
|
||||
notes = "Locally patched version of the zip crate to allow for reading omnijars."
|
||||
|
||||
[[exemptions.ahash]]
|
||||
version = "0.7.6"
|
||||
criteria = "safe-to-deploy"
|
||||
|
||||
@@ -22,6 +22,13 @@ user-id = 696
|
||||
user-login = "fitzgen"
|
||||
user-name = "Nick Fitzgerald"
|
||||
|
||||
[[publisher.arbitrary]]
|
||||
version = "1.3.2"
|
||||
when = "2023-10-30"
|
||||
user-id = 696
|
||||
user-login = "fitzgen"
|
||||
user-name = "Nick Fitzgerald"
|
||||
|
||||
[[publisher.async-trait]]
|
||||
version = "0.1.68"
|
||||
when = "2023-03-24"
|
||||
@@ -162,6 +169,13 @@ user-id = 696
|
||||
user-login = "fitzgen"
|
||||
user-name = "Nick Fitzgerald"
|
||||
|
||||
[[publisher.derive_arbitrary]]
|
||||
version = "1.3.2"
|
||||
when = "2023-10-30"
|
||||
user-id = 696
|
||||
user-login = "fitzgen"
|
||||
user-name = "Nick Fitzgerald"
|
||||
|
||||
[[publisher.dogear]]
|
||||
version = "0.4.0"
|
||||
when = "2019-09-16"
|
||||
@@ -358,6 +372,13 @@ user-id = 189
|
||||
user-login = "BurntSushi"
|
||||
user-name = "Andrew Gallant"
|
||||
|
||||
[[publisher.memchr]]
|
||||
version = "2.7.4"
|
||||
when = "2024-06-14"
|
||||
user-id = 189
|
||||
user-login = "BurntSushi"
|
||||
user-name = "Andrew Gallant"
|
||||
|
||||
[[publisher.mime]]
|
||||
version = "0.3.16"
|
||||
when = "2020-01-07"
|
||||
@@ -984,6 +1005,12 @@ this crate has to do with iterators and `Result` and such. No `unsafe` or
|
||||
anything like that, all looks good.
|
||||
"""
|
||||
|
||||
[[audits.bytecode-alliance.audits.flate2]]
|
||||
who = "Andrew Brown <andrew.brown@intel.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "1.0.26 -> 1.0.28"
|
||||
notes = "No new `unsafe` and no large changes in function. This diff is mostly refactoring with a lot of docs, CI, test changes. Adds some defensive clearing out of certain variables as a safeguard."
|
||||
|
||||
[[audits.bytecode-alliance.audits.foreign-types]]
|
||||
who = "Pat Hickey <phickey@fastly.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
@@ -1674,6 +1701,12 @@ version = "0.1.2"
|
||||
notes = "TOML parser, forked from toml 0.5"
|
||||
aggregated-from = "https://raw.githubusercontent.com/mozilla/glean/main/supply-chain/audits.toml"
|
||||
|
||||
[[audits.mozilla.audits.crossbeam-utils]]
|
||||
who = "Jan-Erik Rediger <jrediger@mozilla.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "0.8.14 -> 0.8.19"
|
||||
aggregated-from = "https://raw.githubusercontent.com/mozilla/glean/main/supply-chain/audits.toml"
|
||||
|
||||
[[audits.mozilla.audits.either]]
|
||||
who = "Nika Layzell <nika@thelayzells.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
|
||||
@@ -25,6 +25,8 @@ anyhow = "1"
|
||||
base64 = "0.21"
|
||||
chrono = "0.4.6"
|
||||
clap = { version = "4", default-features = false, features = ["cargo", "std", "suggestions", "wrap_help", "string"] }
|
||||
# Depend on flate2 to enable the rust backend (the default) for flate2 used by the zip crate.
|
||||
flate2 = "1"
|
||||
hyper = "0.14"
|
||||
icu_segmenter = { version = "1.5", default-features = false, features = ["auto", "compiled_data"] }
|
||||
lazy_static = "1.0"
|
||||
@@ -44,7 +46,7 @@ thiserror = "1"
|
||||
url = "2.4"
|
||||
uuid = { version = "1.0", features = ["v4"] }
|
||||
webdriver = { path = "../webdriver", version="0.50.0" }
|
||||
zip = { version = "0.6", default-features = false, features = ["deflate"] }
|
||||
zip = { version = "2.1.2", default-features = false, features = ["deflate-flate2", "flate2"] }
|
||||
mozilla-central-workspace-hack = { version = "0.1", features = ["geckodriver"], optional = true }
|
||||
|
||||
[dev-dependencies]
|
||||
|
||||
@@ -1 +1 @@
|
||||
{"files":{"CHANGELOG.md":"24d77877bb19eb15479bee348ba70b3bfc42b53808315083b1c23e6995f32765","Cargo.lock":"a295bac76b0074bd335565a84f07dab52a023bb2126651a138531a024e7a1e3d","Cargo.toml":"63fd66366ab3462bed6e3acaa2d37faca054a399da0f24461554d314e3a2fe28","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"15656cc11a8331f28c0986b8ab97220d3e76f98e60ed388b5ffad37dfac4710c","README.md":"5a1c78cfd5295f86d11bf9baf39e7190366b94a2cfb39bb6ad59b26b66addd52","examples/derive_enum.rs":"ef93268fbed2b70c177bebf5757ef1ee0ca5ba0cb0d335d0989a81725463f8c9","publish.sh":"752e221bdd960666b127df15effddd3d789ff3f1762498961fc79ae99f9a27f1","src/error.rs":"33d74ddd002dd8cf73514c93b7e1ecb604718bea69e53f619bb7a87c7f8d71f2","src/lib.rs":"c5a7a59725a83475e0bbc5615df2c05f751712eea829be6a114c608114767626","src/size_hint.rs":"9762b183f8277ee4955fe5b22552961744b6237286758161a551f904ef43e3eb","src/unstructured.rs":"0a0ab0f889796bee2e1abe59f7b91c87f0a6991c155c2250a093f736ced4e4e6","tests/bound.rs":"32b8ce8083419ecadd66e1fde4da427b437ac9e007ed9d6b5229a4ef24a5d772","tests/derive.rs":"5aef1e3a8b709a3e3e0b154ecd51e34bb4d5f8968291ee691326897da58d9658","tests/path.rs":"7e3e5fa26d5671ac19e99ba010a2fa0894fafe996c5d28ee587919e486d7c7c1"},"package":"e2d098ff73c1ca148721f37baad5ea6a465a13f9573aba8641fbbbae8164a54e"}
|
||||
{"files":{"CHANGELOG.md":"793eec5cc439ed5ec27e1fb2d4e066b364672800f5ede24884c341e23bca7db9","Cargo.lock":"43827ead9223aa9c33bcf00aafbaf73543bb58a12fb0e875b8e9654440558b81","Cargo.toml":"41550a126543f3f287b9df3f655f7a41f0307defefa628ca784da1f1388923a6","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"15656cc11a8331f28c0986b8ab97220d3e76f98e60ed388b5ffad37dfac4710c","README.md":"5a1c78cfd5295f86d11bf9baf39e7190366b94a2cfb39bb6ad59b26b66addd52","examples/derive_enum.rs":"ef93268fbed2b70c177bebf5757ef1ee0ca5ba0cb0d335d0989a81725463f8c9","publish.sh":"752e221bdd960666b127df15effddd3d789ff3f1762498961fc79ae99f9a27f1","src/error.rs":"8cd2f06dc455d28ee00ac8305ca5f53764e8205e280aa3652988497859163e99","src/lib.rs":"a9ab7cff37c9c3fd322f0a220a4a20376a171bb50e3345e516bc41842d4bbd28","src/size_hint.rs":"9762b183f8277ee4955fe5b22552961744b6237286758161a551f904ef43e3eb","src/unstructured.rs":"c0c8697d9b02ff0dd75d3ca9d380f153c54a85a4bf0cf653f3d8236b3cbd93ab","tests/bound.rs":"32b8ce8083419ecadd66e1fde4da427b437ac9e007ed9d6b5229a4ef24a5d772","tests/derive.rs":"8302756c42beca824fa7990d9cbece87d0f115c42a16e745678c28bd9fb1b10e","tests/path.rs":"7e3e5fa26d5671ac19e99ba010a2fa0894fafe996c5d28ee587919e486d7c7c1"},"package":"7d5a26814d8dcb93b0e5a0ff3c6d80a8843bafb21b39e8e18a6f05471870e110"}
|
||||
28
third_party/rust/arbitrary/CHANGELOG.md
vendored
28
third_party/rust/arbitrary/CHANGELOG.md
vendored
@@ -28,6 +28,30 @@ Released YYYY-MM-DD.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
## 1.3.2
|
||||
|
||||
Released 2023-10-30.
|
||||
|
||||
### Added
|
||||
|
||||
* Added `Arbitrary` implementations for `Arc<[T]>` and
|
||||
`Rc<[T]>`. [#160](https://github.com/rust-fuzz/arbitrary/pull/160)
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
## 1.3.1
|
||||
|
||||
Released 2023-10-11.
|
||||
|
||||
### Fixed
|
||||
|
||||
* Fixed an issue with generating collections of collections in
|
||||
`arbitrary_take_rest` where `<Vec<Vec<u8>>>::arbitrary_take_rest` would never
|
||||
generate `vec![vec![]]` for example. See
|
||||
[#159](https://github.com/rust-fuzz/arbitrary/pull/159) for details.
|
||||
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
## 1.3.0
|
||||
|
||||
Released 2023-03-13.
|
||||
@@ -85,7 +109,7 @@ Released 2022-10-20.
|
||||
|
||||
## 1.1.6
|
||||
|
||||
Released 2022-09-08.
|
||||
Released 2022-09-20.
|
||||
|
||||
### Fixed
|
||||
|
||||
@@ -96,7 +120,7 @@ Released 2022-09-08.
|
||||
|
||||
## 1.1.5
|
||||
|
||||
Released 2022-09-20.
|
||||
Released 2022-09-08.
|
||||
|
||||
### Added
|
||||
|
||||
|
||||
37
third_party/rust/arbitrary/Cargo.lock
generated
vendored
37
third_party/rust/arbitrary/Cargo.lock
generated
vendored
@@ -4,16 +4,17 @@ version = 3
|
||||
|
||||
[[package]]
|
||||
name = "arbitrary"
|
||||
version = "1.3.0"
|
||||
version = "1.3.2"
|
||||
dependencies = [
|
||||
"derive_arbitrary",
|
||||
"exhaustigen",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "derive_arbitrary"
|
||||
version = "1.3.0"
|
||||
version = "1.3.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f3cdeb9ec472d588e539a818b2dee436825730da08ad0017c4b1a17676bdc8b7"
|
||||
checksum = "67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
@@ -21,36 +22,42 @@ dependencies = [
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.24"
|
||||
name = "exhaustigen"
|
||||
version = "0.1.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1e0704ee1a7e00d7bb417d0770ea303c1bccbabf0ef1667dae92b5967f5f8a71"
|
||||
checksum = "7d88f747710a968a0a32ce4c4ae90ead21dc36a06aceabbb4367452679a95eb6"
|
||||
|
||||
[[package]]
|
||||
name = "proc-macro2"
|
||||
version = "1.0.69"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "134c189feb4956b20f6f547d2cf727d4c0fe06722b20a0eec87ed445a97f92da"
|
||||
dependencies = [
|
||||
"unicode-xid",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "quote"
|
||||
version = "1.0.8"
|
||||
version = "1.0.33"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "991431c3519a3f36861882da93630ce66b52918dcf1b8e2fd66b397fc96f28df"
|
||||
checksum = "5267fca4496028628a95160fc423a33e8b2e6af8a5302579e322e4b520293cae"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "syn"
|
||||
version = "1.0.60"
|
||||
version = "2.0.38"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c700597eca8a5a762beb35753ef6b94df201c81cca676604f547495a0d7f0081"
|
||||
checksum = "e96b79aaa137db8f61e26363a0c9b47d8b4ec75da28b7d1d614c2303e232408b"
|
||||
dependencies = [
|
||||
"proc-macro2",
|
||||
"quote",
|
||||
"unicode-xid",
|
||||
"unicode-ident",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "unicode-xid"
|
||||
version = "0.2.1"
|
||||
name = "unicode-ident"
|
||||
version = "1.0.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "f7fe0bb3479651439c9112f72b6c505038574c9fbb575ed1bf3b797fa39dd564"
|
||||
checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b"
|
||||
|
||||
9
third_party/rust/arbitrary/Cargo.toml
vendored
9
third_party/rust/arbitrary/Cargo.toml
vendored
@@ -10,10 +10,10 @@
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
rust-version = "1.63.0"
|
||||
name = "arbitrary"
|
||||
version = "1.3.0"
|
||||
version = "1.3.2"
|
||||
authors = [
|
||||
"The Rust-Fuzz Project Developers",
|
||||
"Nick Fitzgerald <fitzgen@gmail.com>",
|
||||
@@ -43,8 +43,11 @@ path = "./tests/derive.rs"
|
||||
required-features = ["derive"]
|
||||
|
||||
[dependencies.derive_arbitrary]
|
||||
version = "1.3.0"
|
||||
version = "1.3.2"
|
||||
optional = true
|
||||
|
||||
[dev-dependencies.exhaustigen]
|
||||
version = "0.1.0"
|
||||
|
||||
[features]
|
||||
derive = ["derive_arbitrary"]
|
||||
|
||||
2
third_party/rust/arbitrary/src/error.rs
vendored
2
third_party/rust/arbitrary/src/error.rs
vendored
@@ -1,7 +1,7 @@
|
||||
use std::{error, fmt};
|
||||
|
||||
/// An enumeration of buffer creation errors
|
||||
#[derive(Debug, Clone, Copy)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq)]
|
||||
#[non_exhaustive]
|
||||
pub enum Error {
|
||||
/// No choices were provided to the Unstructured::choose call
|
||||
|
||||
264
third_party/rust/arbitrary/src/lib.rs
vendored
264
third_party/rust/arbitrary/src/lib.rs
vendored
@@ -47,7 +47,7 @@ use std::borrow::{Cow, ToOwned};
|
||||
use std::collections::{BTreeMap, BTreeSet, BinaryHeap, HashMap, HashSet, LinkedList, VecDeque};
|
||||
use std::ffi::{CString, OsString};
|
||||
use std::hash::BuildHasher;
|
||||
use std::net::{Ipv4Addr, Ipv6Addr};
|
||||
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
|
||||
use std::ops::Bound;
|
||||
use std::path::PathBuf;
|
||||
use std::rc::Rc;
|
||||
@@ -440,6 +440,7 @@ macro_rules! impl_range {
|
||||
|
||||
#[inline]
|
||||
fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
||||
#[allow(clippy::redundant_closure_call)]
|
||||
$size_hint_closure(depth)
|
||||
}
|
||||
}
|
||||
@@ -834,29 +835,33 @@ where
|
||||
}
|
||||
}
|
||||
|
||||
fn arbitrary_str<'a>(u: &mut Unstructured<'a>, size: usize) -> Result<&'a str> {
|
||||
match str::from_utf8(u.peek_bytes(size).unwrap()) {
|
||||
Ok(s) => {
|
||||
u.bytes(size).unwrap();
|
||||
Ok(s)
|
||||
}
|
||||
Err(e) => {
|
||||
let i = e.valid_up_to();
|
||||
let valid = u.bytes(i).unwrap();
|
||||
let s = unsafe {
|
||||
debug_assert!(str::from_utf8(valid).is_ok());
|
||||
str::from_utf8_unchecked(valid)
|
||||
};
|
||||
Ok(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Arbitrary<'a> for &'a str {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
let size = u.arbitrary_len::<u8>()?;
|
||||
match str::from_utf8(u.peek_bytes(size).unwrap()) {
|
||||
Ok(s) => {
|
||||
u.bytes(size).unwrap();
|
||||
Ok(s)
|
||||
}
|
||||
Err(e) => {
|
||||
let i = e.valid_up_to();
|
||||
let valid = u.bytes(i).unwrap();
|
||||
let s = unsafe {
|
||||
debug_assert!(str::from_utf8(valid).is_ok());
|
||||
str::from_utf8_unchecked(valid)
|
||||
};
|
||||
Ok(s)
|
||||
}
|
||||
}
|
||||
arbitrary_str(u, size)
|
||||
}
|
||||
|
||||
fn arbitrary_take_rest(u: Unstructured<'a>) -> Result<Self> {
|
||||
let bytes = u.take_rest();
|
||||
str::from_utf8(bytes).map_err(|_| Error::IncorrectFormat)
|
||||
fn arbitrary_take_rest(mut u: Unstructured<'a>) -> Result<Self> {
|
||||
let size = u.len();
|
||||
arbitrary_str(&mut u, size)
|
||||
}
|
||||
|
||||
#[inline]
|
||||
@@ -929,12 +934,16 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Box<A> {
|
||||
|
||||
impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Box<[A]> {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
<Vec<A> as Arbitrary>::arbitrary(u).map(|x| x.into_boxed_slice())
|
||||
u.arbitrary_iter()?.collect()
|
||||
}
|
||||
|
||||
fn arbitrary_take_rest(u: Unstructured<'a>) -> Result<Self> {
|
||||
u.arbitrary_take_rest_iter()?.collect()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
||||
<Vec<A> as Arbitrary>::size_hint(depth)
|
||||
fn size_hint(_depth: usize) -> (usize, Option<usize>) {
|
||||
(0, None)
|
||||
}
|
||||
}
|
||||
|
||||
@@ -973,6 +982,21 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Arc<A> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Arc<[A]> {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
u.arbitrary_iter()?.collect()
|
||||
}
|
||||
|
||||
fn arbitrary_take_rest(u: Unstructured<'a>) -> Result<Self> {
|
||||
u.arbitrary_take_rest_iter()?.collect()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(_depth: usize) -> (usize, Option<usize>) {
|
||||
(0, None)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Arbitrary<'a> for Arc<str> {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
<&str as Arbitrary>::arbitrary(u).map(Into::into)
|
||||
@@ -995,6 +1019,21 @@ impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Rc<A> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, A: Arbitrary<'a>> Arbitrary<'a> for Rc<[A]> {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
u.arbitrary_iter()?.collect()
|
||||
}
|
||||
|
||||
fn arbitrary_take_rest(u: Unstructured<'a>) -> Result<Self> {
|
||||
u.arbitrary_take_rest_iter()?.collect()
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn size_hint(_depth: usize) -> (usize, Option<usize>) {
|
||||
(0, None)
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Arbitrary<'a> for Rc<str> {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
<&str as Arbitrary>::arbitrary(u).map(Into::into)
|
||||
@@ -1136,10 +1175,77 @@ impl<'a> Arbitrary<'a> for Ipv6Addr {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Arbitrary<'a> for IpAddr {
|
||||
fn arbitrary(u: &mut Unstructured<'a>) -> Result<Self> {
|
||||
if u.arbitrary()? {
|
||||
Ok(IpAddr::V4(u.arbitrary()?))
|
||||
} else {
|
||||
Ok(IpAddr::V6(u.arbitrary()?))
|
||||
}
|
||||
}
|
||||
|
||||
fn size_hint(depth: usize) -> (usize, Option<usize>) {
|
||||
size_hint::and(
|
||||
bool::size_hint(depth),
|
||||
size_hint::or(Ipv4Addr::size_hint(depth), Ipv6Addr::size_hint(depth)),
|
||||
)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use super::*;
|
||||
|
||||
/// Assert that the given expected values are all generated.
|
||||
///
|
||||
/// Exhaustively enumerates all buffers up to length 10 containing the
|
||||
/// following bytes: `0x00`, `0x01`, `0x61` (aka ASCII 'a'), and `0xff`
|
||||
fn assert_generates<T>(expected_values: impl IntoIterator<Item = T>)
|
||||
where
|
||||
T: Clone + std::fmt::Debug + std::hash::Hash + Eq + for<'a> Arbitrary<'a>,
|
||||
{
|
||||
let expected_values: HashSet<_> = expected_values.into_iter().collect();
|
||||
let mut arbitrary_expected = expected_values.clone();
|
||||
let mut arbitrary_take_rest_expected = expected_values;
|
||||
|
||||
let bytes = [0, 1, b'a', 0xff];
|
||||
let max_len = 10;
|
||||
|
||||
let mut buf = Vec::with_capacity(max_len);
|
||||
|
||||
let mut g = exhaustigen::Gen::new();
|
||||
while !g.done() {
|
||||
let len = g.gen(max_len);
|
||||
|
||||
buf.clear();
|
||||
buf.extend(
|
||||
std::iter::repeat_with(|| {
|
||||
let index = g.gen(bytes.len() - 1);
|
||||
bytes[index]
|
||||
})
|
||||
.take(len),
|
||||
);
|
||||
|
||||
let mut u = Unstructured::new(&buf);
|
||||
let val = T::arbitrary(&mut u).unwrap();
|
||||
arbitrary_expected.remove(&val);
|
||||
|
||||
let u = Unstructured::new(&buf);
|
||||
let val = T::arbitrary_take_rest(u).unwrap();
|
||||
arbitrary_take_rest_expected.remove(&val);
|
||||
|
||||
if arbitrary_expected.is_empty() && arbitrary_take_rest_expected.is_empty() {
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
panic!(
|
||||
"failed to generate all expected values!\n\n\
|
||||
T::arbitrary did not generate: {arbitrary_expected:#?}\n\n\
|
||||
T::arbitrary_take_rest did not generate {arbitrary_take_rest_expected:#?}"
|
||||
)
|
||||
}
|
||||
|
||||
/// Generates an arbitrary `T`, and checks that the result is consistent with the
|
||||
/// `size_hint()` reported by `T`.
|
||||
fn checked_arbitrary<'a, T: Arbitrary<'a>>(u: &mut Unstructured<'a>) -> Result<T> {
|
||||
@@ -1210,6 +1316,16 @@ mod test {
|
||||
let expected = 1 | (2 << 8) | (3 << 16) | (4 << 24);
|
||||
let actual = checked_arbitrary::<i32>(&mut buf).unwrap();
|
||||
assert_eq!(expected, actual);
|
||||
|
||||
assert_generates([
|
||||
i32::from_ne_bytes([0, 0, 0, 0]),
|
||||
i32::from_ne_bytes([0, 0, 0, 1]),
|
||||
i32::from_ne_bytes([0, 0, 1, 0]),
|
||||
i32::from_ne_bytes([0, 1, 0, 0]),
|
||||
i32::from_ne_bytes([1, 0, 0, 0]),
|
||||
i32::from_ne_bytes([1, 1, 1, 1]),
|
||||
i32::from_ne_bytes([0xff, 0xff, 0xff, 0xff]),
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@@ -1230,6 +1346,74 @@ mod test {
|
||||
assert_eq!(expected, actual);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_for_vec_u8() {
|
||||
assert_generates::<Vec<u8>>([
|
||||
vec![],
|
||||
vec![0],
|
||||
vec![1],
|
||||
vec![0, 0],
|
||||
vec![0, 1],
|
||||
vec![1, 0],
|
||||
vec![1, 1],
|
||||
vec![0, 0, 0],
|
||||
vec![0, 0, 1],
|
||||
vec![0, 1, 0],
|
||||
vec![0, 1, 1],
|
||||
vec![1, 0, 0],
|
||||
vec![1, 0, 1],
|
||||
vec![1, 1, 0],
|
||||
vec![1, 1, 1],
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_for_vec_vec_u8() {
|
||||
assert_generates::<Vec<Vec<u8>>>([
|
||||
vec![],
|
||||
vec![vec![]],
|
||||
vec![vec![0]],
|
||||
vec![vec![1]],
|
||||
vec![vec![0, 1]],
|
||||
vec![vec![], vec![]],
|
||||
vec![vec![0], vec![]],
|
||||
vec![vec![], vec![1]],
|
||||
vec![vec![0], vec![1]],
|
||||
vec![vec![0, 1], vec![]],
|
||||
vec![vec![], vec![1, 0]],
|
||||
vec![vec![], vec![], vec![]],
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_for_vec_vec_vec_u8() {
|
||||
assert_generates::<Vec<Vec<Vec<u8>>>>([
|
||||
vec![],
|
||||
vec![vec![]],
|
||||
vec![vec![vec![0]]],
|
||||
vec![vec![vec![1]]],
|
||||
vec![vec![vec![0, 1]]],
|
||||
vec![vec![], vec![]],
|
||||
vec![vec![], vec![vec![]]],
|
||||
vec![vec![vec![]], vec![]],
|
||||
vec![vec![vec![]], vec![vec![]]],
|
||||
vec![vec![vec![0]], vec![]],
|
||||
vec![vec![], vec![vec![1]]],
|
||||
vec![vec![vec![0]], vec![vec![1]]],
|
||||
vec![vec![vec![0, 1]], vec![]],
|
||||
vec![vec![], vec![vec![0, 1]]],
|
||||
vec![vec![], vec![], vec![]],
|
||||
vec![vec![vec![]], vec![], vec![]],
|
||||
vec![vec![], vec![vec![]], vec![]],
|
||||
vec![vec![], vec![], vec![vec![]]],
|
||||
]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_for_string() {
|
||||
assert_generates::<String>(["".into(), "a".into(), "aa".into(), "aaa".into()]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn arbitrary_collection() {
|
||||
let x = [
|
||||
@@ -1243,6 +1427,18 @@ mod test {
|
||||
checked_arbitrary::<Vec<u8>>(&mut Unstructured::new(&x)).unwrap(),
|
||||
&[2, 4, 6, 8, 1]
|
||||
);
|
||||
assert_eq!(
|
||||
&*checked_arbitrary::<Box<[u8]>>(&mut Unstructured::new(&x)).unwrap(),
|
||||
&[2, 4, 6, 8, 1]
|
||||
);
|
||||
assert_eq!(
|
||||
&*checked_arbitrary::<Arc<[u8]>>(&mut Unstructured::new(&x)).unwrap(),
|
||||
&[2, 4, 6, 8, 1]
|
||||
);
|
||||
assert_eq!(
|
||||
&*checked_arbitrary::<Rc<[u8]>>(&mut Unstructured::new(&x)).unwrap(),
|
||||
&[2, 4, 6, 8, 1]
|
||||
);
|
||||
assert_eq!(
|
||||
checked_arbitrary::<Vec<u32>>(&mut Unstructured::new(&x)).unwrap(),
|
||||
&[84148994]
|
||||
@@ -1255,6 +1451,7 @@ mod test {
|
||||
|
||||
#[test]
|
||||
fn arbitrary_take_rest() {
|
||||
// Basic examples
|
||||
let x = [1, 2, 3, 4];
|
||||
assert_eq!(
|
||||
checked_arbitrary_take_rest::<&[u8]>(Unstructured::new(&x)).unwrap(),
|
||||
@@ -1262,17 +1459,30 @@ mod test {
|
||||
);
|
||||
assert_eq!(
|
||||
checked_arbitrary_take_rest::<Vec<u8>>(Unstructured::new(&x)).unwrap(),
|
||||
&[1, 2, 3, 4]
|
||||
&[2, 4]
|
||||
);
|
||||
assert_eq!(
|
||||
&*checked_arbitrary_take_rest::<Box<[u8]>>(Unstructured::new(&x)).unwrap(),
|
||||
&[2, 4]
|
||||
);
|
||||
assert_eq!(
|
||||
&*checked_arbitrary_take_rest::<Arc<[u8]>>(Unstructured::new(&x)).unwrap(),
|
||||
&[2, 4]
|
||||
);
|
||||
assert_eq!(
|
||||
&*checked_arbitrary_take_rest::<Rc<[u8]>>(Unstructured::new(&x)).unwrap(),
|
||||
&[2, 4]
|
||||
);
|
||||
assert_eq!(
|
||||
checked_arbitrary_take_rest::<Vec<u32>>(Unstructured::new(&x)).unwrap(),
|
||||
&[0x4030201]
|
||||
&[0x040302]
|
||||
);
|
||||
assert_eq!(
|
||||
checked_arbitrary_take_rest::<String>(Unstructured::new(&x)).unwrap(),
|
||||
"\x01\x02\x03\x04"
|
||||
);
|
||||
|
||||
// Empty remainder
|
||||
assert_eq!(
|
||||
checked_arbitrary_take_rest::<&[u8]>(Unstructured::new(&[])).unwrap(),
|
||||
&[]
|
||||
@@ -1281,6 +1491,12 @@ mod test {
|
||||
checked_arbitrary_take_rest::<Vec<u8>>(Unstructured::new(&[])).unwrap(),
|
||||
&[]
|
||||
);
|
||||
|
||||
// Cannot consume all but can consume part of the input
|
||||
assert_eq!(
|
||||
checked_arbitrary_take_rest::<String>(Unstructured::new(&[1, 0xFF, 2])).unwrap(),
|
||||
"\x01"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
||||
25
third_party/rust/arbitrary/src/unstructured.rs
vendored
25
third_party/rust/arbitrary/src/unstructured.rs
vendored
@@ -620,14 +620,8 @@ impl<'a> Unstructured<'a> {
|
||||
pub fn arbitrary_take_rest_iter<ElementType: Arbitrary<'a>>(
|
||||
self,
|
||||
) -> Result<ArbitraryTakeRestIter<'a, ElementType>> {
|
||||
let (lower, upper) = ElementType::size_hint(0);
|
||||
|
||||
let elem_size = upper.unwrap_or(lower * 2);
|
||||
let elem_size = std::cmp::max(1, elem_size);
|
||||
let size = self.len() / elem_size;
|
||||
Ok(ArbitraryTakeRestIter {
|
||||
size,
|
||||
u: Some(self),
|
||||
u: self,
|
||||
_marker: PhantomData,
|
||||
})
|
||||
}
|
||||
@@ -735,25 +729,16 @@ impl<'a, 'b, ElementType: Arbitrary<'a>> Iterator for ArbitraryIter<'a, 'b, Elem
|
||||
|
||||
/// Utility iterator produced by [`Unstructured::arbitrary_take_rest_iter`]
|
||||
pub struct ArbitraryTakeRestIter<'a, ElementType> {
|
||||
u: Option<Unstructured<'a>>,
|
||||
size: usize,
|
||||
u: Unstructured<'a>,
|
||||
_marker: PhantomData<ElementType>,
|
||||
}
|
||||
|
||||
impl<'a, ElementType: Arbitrary<'a>> Iterator for ArbitraryTakeRestIter<'a, ElementType> {
|
||||
type Item = Result<ElementType>;
|
||||
fn next(&mut self) -> Option<Result<ElementType>> {
|
||||
if let Some(mut u) = self.u.take() {
|
||||
if self.size == 1 {
|
||||
Some(Arbitrary::arbitrary_take_rest(u))
|
||||
} else if self.size == 0 {
|
||||
None
|
||||
} else {
|
||||
self.size -= 1;
|
||||
let ret = Arbitrary::arbitrary(&mut u);
|
||||
self.u = Some(u);
|
||||
Some(ret)
|
||||
}
|
||||
let keep_going = self.u.arbitrary().unwrap_or(false);
|
||||
if keep_going {
|
||||
Some(Arbitrary::arbitrary(&mut self.u))
|
||||
} else {
|
||||
None
|
||||
}
|
||||
|
||||
2
third_party/rust/arbitrary/tests/derive.rs
vendored
2
third_party/rust/arbitrary/tests/derive.rs
vendored
@@ -59,7 +59,7 @@ fn test_take_rest() {
|
||||
assert_eq!(s2.1, true);
|
||||
assert_eq!(s1.2, 0x4030201);
|
||||
assert_eq!(s2.2, 0x4030201);
|
||||
assert_eq!(s1.3, vec![0x605, 0x807]);
|
||||
assert_eq!(s1.3, vec![0x0706]);
|
||||
assert_eq!(s2.3, "\x05\x06\x07\x08");
|
||||
}
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
{"files":{"Cargo.toml":"be80bad9c3fe51a936b76e4ea82cdb13afb747f445e642280996f110ed584517","LICENSE-APACHE":"c6596eb7be8581c18be736c846fb9173b69eccf6ef94c5135893ec56bd92ba08","LICENSE-MIT":"61d383b05b87d78f94d2937e2580cce47226d17823c0430fbcad09596537efcf","README.md":"4b1a2bfbbda9923f78565bd52bd68cd172a188482061b0d2d159ff1a3b57127d","benches/bench.rs":"fb71f17c9f472d0985aead2c71e4ac4f717e5a1eeedc2fbfe75de8709033542d","build.rs":"4ccc50c3da67eb27f0b622440d2b7aee2f73fa9c71884571f3c041122231d105","src/baseline.rs":"bbe8fe49ceccbf9749052fa9c2756cf95f0fc79a063e5d3b509e3600283464ea","src/combine.rs":"7147fc4002190d36d253ea5e194e0419035b087304bcb17887efe09a8a198815","src/lib.rs":"5543027ec0594dd7cb9d67010912871b50d96ca1d255bc527ec9d01cabfc849a","src/specialized/aarch64.rs":"537c316b2e313421b7075e545d52ef1a32d6fe29808575742006247c6a4628f8","src/specialized/mod.rs":"267634816329f32d95b66e2fcc1a8eca6652334bddd454446f4d78b66470b06d","src/specialized/pclmulqdq.rs":"90ef05b5044eaeea736c6877e60820d6c5abfb90759e40fc81b9cc8feb36e96b","src/table.rs":"1a566a6311b32b654fa05b324301611e0866b10db63692b53e8c1b9b529d4c17"},"package":"b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"}
|
||||
{"files":{"Cargo.toml":"91382647edb52759ca5bfd49a9d3eb425007a0c0c5799b6f76819bf4471e34c3","LICENSE-APACHE":"c6596eb7be8581c18be736c846fb9173b69eccf6ef94c5135893ec56bd92ba08","LICENSE-MIT":"61d383b05b87d78f94d2937e2580cce47226d17823c0430fbcad09596537efcf","README.md":"797c17914bec5a71cb281a10c28058461c1a7e3531c4af7aabb654ee642ede01","benches/bench.rs":"fb71f17c9f472d0985aead2c71e4ac4f717e5a1eeedc2fbfe75de8709033542d","src/baseline.rs":"bbe8fe49ceccbf9749052fa9c2756cf95f0fc79a063e5d3b509e3600283464ea","src/combine.rs":"7147fc4002190d36d253ea5e194e0419035b087304bcb17887efe09a8a198815","src/lib.rs":"a545231a8162fd30b80bcb0c820db9eaa3a2eb169902ced472e0fc5a321a0a1e","src/specialized/aarch64.rs":"b1fb41b7eaa40b95b9603c445f6e502d6d65ecfb70f9eef4bb90c49c41dfaca9","src/specialized/mod.rs":"10e6bbd4fcd40c46a8480f6cdc10ddab80322f56fc9d0279ec35f11bdd8f6fd6","src/specialized/pclmulqdq.rs":"4a175c9bef6dd3ff20d4f0cd0e8da027fe9f2a4fddf1a87f8664d4fa8ae087e2","src/table.rs":"1a566a6311b32b654fa05b324301611e0866b10db63692b53e8c1b9b529d4c17"},"package":"a97769d94ddab943e4510d138150169a2758b5ef3eb191a9ee688de3e23ef7b3"}
|
||||
17
third_party/rust/crc32fast/Cargo.toml
vendored
17
third_party/rust/crc32fast/Cargo.toml
vendored
@@ -11,19 +11,30 @@
|
||||
|
||||
[package]
|
||||
name = "crc32fast"
|
||||
version = "1.3.2"
|
||||
authors = ["Sam Rijs <srijs@airpost.net>", "Alex Crichton <alex@alexcrichton.com>"]
|
||||
version = "1.4.2"
|
||||
authors = [
|
||||
"Sam Rijs <srijs@airpost.net>",
|
||||
"Alex Crichton <alex@alexcrichton.com>",
|
||||
]
|
||||
description = "Fast, SIMD-accelerated CRC32 (IEEE) checksum computation"
|
||||
readme = "README.md"
|
||||
keywords = ["checksum", "crc", "crc32", "simd", "fast"]
|
||||
keywords = [
|
||||
"checksum",
|
||||
"crc",
|
||||
"crc32",
|
||||
"simd",
|
||||
"fast",
|
||||
]
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/srijs/rust-crc32fast"
|
||||
|
||||
[[bench]]
|
||||
name = "bench"
|
||||
harness = false
|
||||
|
||||
[dependencies.cfg-if]
|
||||
version = "1.0"
|
||||
|
||||
[dev-dependencies.bencher]
|
||||
version = "0.1"
|
||||
|
||||
|
||||
6
third_party/rust/crc32fast/README.md
vendored
6
third_party/rust/crc32fast/README.md
vendored
@@ -1,7 +1,7 @@
|
||||
# crc32fast [![Build Status][travis-img]][travis] [![Crates.io][crates-img]][crates] [![Documentation][docs-img]][docs]
|
||||
# crc32fast [![Build Status][build-img]][build] [![Crates.io][crates-img]][crates] [![Documentation][docs-img]][docs]
|
||||
|
||||
[travis-img]: https://travis-ci.com/srijs/rust-crc32fast.svg?branch=master
|
||||
[travis]: https://travis-ci.com/srijs/rust-crc32fast
|
||||
[build-img]: https://github.com/srijs/rust-crc32fast/actions/workflows/ci.yml/badge.svg
|
||||
[build]: https://github.com/srijs/rust-crc32fast/actions/workflows/ci.yml
|
||||
[crates-img]: https://img.shields.io/crates/v/crc32fast.svg
|
||||
[crates]: https://crates.io/crates/crc32fast
|
||||
[docs-img]: https://docs.rs/crc32fast/badge.svg
|
||||
|
||||
35
third_party/rust/crc32fast/build.rs
vendored
35
third_party/rust/crc32fast/build.rs
vendored
@@ -1,35 +0,0 @@
|
||||
use std::env;
|
||||
use std::process::Command;
|
||||
use std::str;
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=build.rs");
|
||||
|
||||
let minor = match rustc_minor_version() {
|
||||
Some(n) => n,
|
||||
None => return,
|
||||
};
|
||||
|
||||
if minor >= 27 {
|
||||
println!("cargo:rustc-cfg=crc32fast_stdarchx86");
|
||||
}
|
||||
}
|
||||
|
||||
fn rustc_minor_version() -> Option<u32> {
|
||||
macro_rules! otry {
|
||||
($e:expr) => {
|
||||
match $e {
|
||||
Some(e) => e,
|
||||
None => return None,
|
||||
}
|
||||
};
|
||||
}
|
||||
let rustc = otry!(env::var_os("RUSTC"));
|
||||
let output = otry!(Command::new(rustc).arg("--version").output().ok());
|
||||
let version = otry!(str::from_utf8(&output.stdout).ok());
|
||||
let mut pieces = version.split('.');
|
||||
if pieces.next() != Some("rustc 1") {
|
||||
return None;
|
||||
}
|
||||
otry!(pieces.next()).parse().ok()
|
||||
}
|
||||
4
third_party/rust/crc32fast/src/lib.rs
vendored
4
third_party/rust/crc32fast/src/lib.rs
vendored
@@ -35,10 +35,6 @@
|
||||
//! optimal implementation for the current CPU feature set.
|
||||
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
#![cfg_attr(
|
||||
all(feature = "nightly", target_arch = "aarch64"),
|
||||
feature(stdsimd, aarch64_target_feature)
|
||||
)]
|
||||
|
||||
#[deny(missing_docs)]
|
||||
#[cfg(test)]
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
use std::arch::aarch64 as arch;
|
||||
use core::arch::aarch64 as arch;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct State {
|
||||
@@ -6,6 +6,18 @@ pub struct State {
|
||||
}
|
||||
|
||||
impl State {
|
||||
#[cfg(not(feature = "std"))]
|
||||
pub fn new(state: u32) -> Option<Self> {
|
||||
if cfg!(target_feature = "crc") {
|
||||
// SAFETY: The conditions above ensure that all
|
||||
// required instructions are supported by the CPU.
|
||||
Some(Self { state })
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
pub fn new(state: u32) -> Option<Self> {
|
||||
if std::arch::is_aarch64_feature_detected!("crc") {
|
||||
// SAFETY: The conditions above ensure that all
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
cfg_if! {
|
||||
if #[cfg(all(
|
||||
crc32fast_stdarchx86,
|
||||
target_feature = "sse2",
|
||||
any(target_arch = "x86", target_arch = "x86_64")
|
||||
))] {
|
||||
|
||||
@@ -61,7 +61,6 @@ const K2: i64 = 0x1c6e41596;
|
||||
const K3: i64 = 0x1751997d0;
|
||||
const K4: i64 = 0x0ccaa009e;
|
||||
const K5: i64 = 0x163cd6124;
|
||||
const K6: i64 = 0x1db710640;
|
||||
|
||||
const P_X: i64 = 0x1DB710641;
|
||||
const U_PRIME: i64 = 0x1F7011641;
|
||||
@@ -144,7 +143,6 @@ unsafe fn calculate(crc: u32, mut data: &[u8]) -> u32 {
|
||||
// It's not clear to me, reading the paper, where the xor operations are
|
||||
// happening or why things are shifting around. This implementation...
|
||||
// appears to work though!
|
||||
drop(K6);
|
||||
let x = arch::_mm_xor_si128(
|
||||
arch::_mm_clmulepi64_si128(x, k3k4, 0x10),
|
||||
arch::_mm_srli_si128(x, 8),
|
||||
|
||||
@@ -1 +1 @@
|
||||
{"files":{"CHANGELOG.md":"fbcdd2c242af3f8eab76ca3dff71f4c9b1d569db6039ab2f7e331417122d121d","Cargo.toml":"0916d9452f9f79784ac417256b661caa0c95f1b0d3107ad3af2991026707fa61","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"5734ed989dfca1f625b40281ee9f4530f91b2411ec01cb748223e7eb87e201ab","README.md":"2a19af38a52dd965c2d66bb39f90a85b430b51ee9ccb29e9e1978ee7091e5087","benches/atomic_cell.rs":"c927eb3cd1e5ecc4b91adbc3bde98af15ffab4086190792ba64d5cde0e24df3d","build-common.rs":"502cb7494549bed6fa10ac7bea36e880eeb60290dc69b679ac5c92b376469562","build.rs":"ec1d47ec36b3670a6e67955a104851ee7125616888e78bd03b93304e12cd1c50","no_atomic.rs":"3314524d2afa0360c947455a6e6566fb54ebf909c99479ca3b7435741fd3293e","src/atomic/atomic_cell.rs":"0fc99463e633144c5d59d39c35b5477da1f1b90f5448cadc37454b7f4b97707e","src/atomic/consume.rs":"7a7736fcd64f6473dfea7653559ffc5e1a2a234df43835f8aa8734862145ac15","src/atomic/mod.rs":"94193895fa03cece415e8d7be700b73a9a8a7015774ca821253438607f9b0736","src/atomic/seq_lock.rs":"27182e6b87a9db73c5f6831759f8625f9fcdec3c2828204c444aef04f427735a","src/atomic/seq_lock_wide.rs":"9888dd03116bb89ca36d4ab8d5a0b5032107a2983a7eb8024454263b09080088","src/backoff.rs":"8fd5e3dcccc05860680e49c8498de8096bee9140bcfee8723d97117106a020d0","src/cache_padded.rs":"8bb8925e2df44224ffa29f31a2f9c08d88d8bd3df6c1ce47003598225055fdb5","src/lib.rs":"6f1bcf157abe06ad8458a53e865bf8efab9fad4a9424790147cee8fefb3795d8","src/sync/mod.rs":"eca73c04f821859b8434d2b93db87d160dc6a3f65498ca201cd40d732ca4c134","src/sync/once_lock.rs":"c03dc9c05a817e087dccf8b682f7307501542805533551da3c2bab442bc40743","src/sync/parker.rs":"91f3a7d4ee8d9e06b6558d180e8a0df08ff5c6cef612b4ce4790f9f75cb34f84","src/sync/sharded_lock.rs":"6391b3b99b194b8e0888446c2dec340e4fb095753bcf0c1a80bc654f9c8be0e3","src/sync/wait_group.rs":"3e339aab014f50e214fea535c841755113ea058153378ed54e50a4acb403c937","src/thread.rs":"21cf9b3e965529e5c0a6ff8fc1ec846bfe0006c41deb238a149be8d07384e955","tests/atomic_cell.rs":"bf8bc869c922a1cbf929c3b741bae0cae98f2157f572b5a4eb2873d20a407c22","tests/cache_padded.rs":"1bfaff8354c8184e1ee1f902881ca9400b60effb273b0d3f752801a483d2b66d","tests/parker.rs":"6def4721287d9d70b1cfd63ebb34e1c83fbb3376edbad2bc8aac6ef69dd99d20","tests/sharded_lock.rs":"314adeb8a651a28935f7a49c9a261b8fa1fd82bf6a16c865a5aced6216d7e40b","tests/thread.rs":"9a7d7d3028c552fd834c68598b04a1cc252a816bc20ab62cec060d6cd09cab10","tests/wait_group.rs":"02661c2a820a5abe8b0c8fe15a6650aead707b57cdda0610d1b09a2680ed6969"},"package":"4fb766fa798726286dbbb842f174001dab8abc7b627a1dd86e0b7222a95d929f"}
|
||||
{"files":{"CHANGELOG.md":"7c2aa4bd785c04b76aee28092d94326ecdd2db9e835602b9ebd22d92691ff492","Cargo.toml":"23eb8e5ae0b5a1a16ac02e5948457cce8fe212a72dddbba9046384306aa29695","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"5734ed989dfca1f625b40281ee9f4530f91b2411ec01cb748223e7eb87e201ab","README.md":"3c82bbb994f54ab76a9ed30a42dfd095c6e636258d379b9be3fbf66324310e71","benches/atomic_cell.rs":"c927eb3cd1e5ecc4b91adbc3bde98af15ffab4086190792ba64d5cde0e24df3d","build-common.rs":"502cb7494549bed6fa10ac7bea36e880eeb60290dc69b679ac5c92b376469562","build.rs":"7a7f9e56ea7fb4f78c4e532b84b9d27be719d600e85eaeb3a2f4b79a4f0b419c","no_atomic.rs":"fc1baa4489d9842988bacaaa545a7d7d0e2f8b93cfa0b7d1ae31f21256e4cb0a","src/atomic/atomic_cell.rs":"cc7078265c6df82f544eb1adf0962ee9fffbb650b8f793d57047afbf205022ac","src/atomic/consume.rs":"381c2a8b13312ca0525d53ca1b7d0d4f525ddb154951fa3e216b061ad22012ff","src/atomic/mod.rs":"712e2337e710c07116e977154ea4247a1c065bf5599e6bf368138e715b403f6d","src/atomic/seq_lock.rs":"27182e6b87a9db73c5f6831759f8625f9fcdec3c2828204c444aef04f427735a","src/atomic/seq_lock_wide.rs":"9888dd03116bb89ca36d4ab8d5a0b5032107a2983a7eb8024454263b09080088","src/backoff.rs":"8715f0303ec91d1847c8ac3fc24bcc002a22a7284ade610e5eff4181f85827c7","src/cache_padded.rs":"2134de7661d9dd2dec05fdc862b9c0cb6483344d97095817ea83475a919d8c3b","src/lib.rs":"060dabc6dc07de92a7afa57dcbc47222a95ef5819d543ad854858c3b329d6637","src/sync/mod.rs":"eca73c04f821859b8434d2b93db87d160dc6a3f65498ca201cd40d732ca4c134","src/sync/once_lock.rs":"aa8f957604d1119c4fc7038a18c14a6281230e81005f31201c099acff284ad4b","src/sync/parker.rs":"698996e7530da1f3815df11c89df7d916155229cbfd022cccbd555f1d1d31985","src/sync/sharded_lock.rs":"f96d536f5622fe2a0a0f7d8117be31e4b1ed607544c52c7e2ffcd1f51a6b93a1","src/sync/wait_group.rs":"3e339aab014f50e214fea535c841755113ea058153378ed54e50a4acb403c937","src/thread.rs":"04610787ba88f1f59549874a13fc037f2dcf4d8b5f1daaf08378f05c2b3c0039","tests/atomic_cell.rs":"716c864d4e103039dc5cd8bf6110da4cbabafc7e4e03819aa197828e8fb0a9c7","tests/cache_padded.rs":"1bfaff8354c8184e1ee1f902881ca9400b60effb273b0d3f752801a483d2b66d","tests/parker.rs":"6def4721287d9d70b1cfd63ebb34e1c83fbb3376edbad2bc8aac6ef69dd99d20","tests/sharded_lock.rs":"314adeb8a651a28935f7a49c9a261b8fa1fd82bf6a16c865a5aced6216d7e40b","tests/thread.rs":"9a7d7d3028c552fd834c68598b04a1cc252a816bc20ab62cec060d6cd09cab10","tests/wait_group.rs":"2a41533a5f7f113d19cd2bdafcc2abf86509109652274156efdd74abd00896b6"},"package":"22ec99545bb0ed0ea7bb9b8e1e9122ea386ff8a48c0922e43f36d45ab09e0e80"}
|
||||
33
third_party/rust/crossbeam-utils/CHANGELOG.md
vendored
33
third_party/rust/crossbeam-utils/CHANGELOG.md
vendored
@@ -1,3 +1,36 @@
|
||||
# Version 0.8.20
|
||||
|
||||
- Implement `Display` for `CachePadded`. (#1097)
|
||||
|
||||
# Version 0.8.19
|
||||
|
||||
- Remove dependency on `cfg-if`. (#1072)
|
||||
|
||||
# Version 0.8.18
|
||||
|
||||
- Relax the minimum supported Rust version to 1.60. (#1056)
|
||||
- Improve scalability of `AtomicCell` fallback. (#1055)
|
||||
|
||||
# Version 0.8.17
|
||||
|
||||
- Bump the minimum supported Rust version to 1.61. (#1037)
|
||||
- Improve support for targets without atomic CAS or 64-bit atomic. (#1037)
|
||||
- Always implement `UnwindSafe` and `RefUnwindSafe` for `AtomicCell`. (#1045)
|
||||
- Improve compatibility with Miri, TSan, and loom. (#995, #1003)
|
||||
- Improve compatibility with unstable `oom=panic`. (#1045)
|
||||
- Improve implementation of `CachePadded`. (#1014, #1025)
|
||||
- Update `loom` dependency to 0.7.
|
||||
|
||||
# Version 0.8.16
|
||||
|
||||
- Improve implementation of `CachePadded`. (#967)
|
||||
|
||||
# Version 0.8.15
|
||||
|
||||
- Add `#[clippy::has_significant_drop]` to `ShardedLock{Read,Write}Guard`. (#958)
|
||||
- Improve handling of very large timeout. (#953)
|
||||
- Soft-deprecate `thread::scope()` in favor of the more efficient `std::thread::scope` that stabilized on Rust 1.63. (#954)
|
||||
|
||||
# Version 0.8.14
|
||||
|
||||
- Fix build script bug introduced in 0.8.13. (#932)
|
||||
|
||||
18
third_party/rust/crossbeam-utils/Cargo.toml
vendored
18
third_party/rust/crossbeam-utils/Cargo.toml
vendored
@@ -10,10 +10,10 @@
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
rust-version = "1.38"
|
||||
edition = "2021"
|
||||
rust-version = "1.60"
|
||||
name = "crossbeam-utils"
|
||||
version = "0.8.14"
|
||||
version = "0.8.20"
|
||||
description = "Utilities for concurrent programming"
|
||||
homepage = "https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-utils"
|
||||
readme = "README.md"
|
||||
@@ -32,20 +32,20 @@ categories = [
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/crossbeam-rs/crossbeam"
|
||||
|
||||
[dependencies.cfg-if]
|
||||
version = "1"
|
||||
[dependencies]
|
||||
|
||||
[dev-dependencies.rand]
|
||||
version = "0.8"
|
||||
|
||||
[dev-dependencies.rustversion]
|
||||
version = "1"
|
||||
|
||||
[features]
|
||||
default = ["std"]
|
||||
nightly = []
|
||||
std = []
|
||||
|
||||
[target."cfg(crossbeam_loom)".dependencies.loom]
|
||||
version = "0.5"
|
||||
version = "0.7.1"
|
||||
optional = true
|
||||
|
||||
[lints.rust.unexpected_cfgs]
|
||||
level = "warn"
|
||||
priority = 0
|
||||
|
||||
4
third_party/rust/crossbeam-utils/README.md
vendored
4
third_party/rust/crossbeam-utils/README.md
vendored
@@ -8,7 +8,7 @@ https://github.com/crossbeam-rs/crossbeam/tree/master/crossbeam-utils#license)
|
||||
https://crates.io/crates/crossbeam-utils)
|
||||
[](
|
||||
https://docs.rs/crossbeam-utils)
|
||||
[](
|
||||
[](
|
||||
https://www.rust-lang.org)
|
||||
[](https://discord.com/invite/JXYwgWZ)
|
||||
|
||||
@@ -55,7 +55,7 @@ crossbeam-utils = "0.8"
|
||||
|
||||
Crossbeam Utils supports stable Rust releases going back at least six months,
|
||||
and every time the minimum supported Rust version is increased, a new minor
|
||||
version is released. Currently, the minimum supported Rust version is 1.38.
|
||||
version is released. Currently, the minimum supported Rust version is 1.60.
|
||||
|
||||
## License
|
||||
|
||||
|
||||
29
third_party/rust/crossbeam-utils/build.rs
vendored
29
third_party/rust/crossbeam-utils/build.rs
vendored
@@ -1,24 +1,12 @@
|
||||
// The rustc-cfg listed below are considered public API, but it is *unstable*
|
||||
// and outside of the normal semver guarantees:
|
||||
//
|
||||
// - `crossbeam_no_atomic_cas`
|
||||
// Assume the target does *not* support atomic CAS operations.
|
||||
// This is usually detected automatically by the build script, but you may
|
||||
// need to enable it manually when building for custom targets or using
|
||||
// non-cargo build systems that don't run the build script.
|
||||
//
|
||||
// - `crossbeam_no_atomic`
|
||||
// Assume the target does *not* support any atomic operations.
|
||||
// This is usually detected automatically by the build script, but you may
|
||||
// need to enable it manually when building for custom targets or using
|
||||
// non-cargo build systems that don't run the build script.
|
||||
//
|
||||
// - `crossbeam_no_atomic_64`
|
||||
// Assume the target does *not* support AtomicU64/AtomicI64.
|
||||
// This is usually detected automatically by the build script, but you may
|
||||
// need to enable it manually when building for custom targets or using
|
||||
// non-cargo build systems that don't run the build script.
|
||||
//
|
||||
// With the exceptions mentioned above, the rustc-cfg emitted by the build
|
||||
// script are *not* public API.
|
||||
|
||||
@@ -30,6 +18,9 @@ include!("no_atomic.rs");
|
||||
include!("build-common.rs");
|
||||
|
||||
fn main() {
|
||||
println!("cargo:rerun-if-changed=no_atomic.rs");
|
||||
println!("cargo:rustc-check-cfg=cfg(crossbeam_no_atomic,crossbeam_sanitize_thread)");
|
||||
|
||||
let target = match env::var("TARGET") {
|
||||
Ok(target) => convert_custom_linux_target(target),
|
||||
Err(e) => {
|
||||
@@ -45,17 +36,13 @@ fn main() {
|
||||
// Note that this is `no_`*, not `has_*`. This allows treating as the latest
|
||||
// stable rustc is used when the build script doesn't run. This is useful
|
||||
// for non-cargo build systems that don't run the build script.
|
||||
if NO_ATOMIC_CAS.contains(&&*target) {
|
||||
println!("cargo:rustc-cfg=crossbeam_no_atomic_cas");
|
||||
}
|
||||
if NO_ATOMIC.contains(&&*target) {
|
||||
println!("cargo:rustc-cfg=crossbeam_no_atomic");
|
||||
println!("cargo:rustc-cfg=crossbeam_no_atomic_64");
|
||||
} else if NO_ATOMIC_64.contains(&&*target) {
|
||||
println!("cargo:rustc-cfg=crossbeam_no_atomic_64");
|
||||
} else {
|
||||
// Otherwise, assuming `"max-atomic-width" == 64` or `"max-atomic-width" == 128`.
|
||||
}
|
||||
|
||||
println!("cargo:rerun-if-changed=no_atomic.rs");
|
||||
// `cfg(sanitize = "..")` is not stabilized.
|
||||
let sanitize = env::var("CARGO_CFG_SANITIZE").unwrap_or_default();
|
||||
if sanitize.contains("thread") {
|
||||
println!("cargo:rustc-cfg=crossbeam_sanitize_thread");
|
||||
}
|
||||
}
|
||||
|
||||
75
third_party/rust/crossbeam-utils/no_atomic.rs
vendored
75
third_party/rust/crossbeam-utils/no_atomic.rs
vendored
@@ -1,82 +1,9 @@
|
||||
// This file is @generated by no_atomic.sh.
|
||||
// It is not intended for manual editing.
|
||||
|
||||
const NO_ATOMIC_CAS: &[&str] = &[
|
||||
"armv4t-none-eabi",
|
||||
"armv5te-none-eabi",
|
||||
"avr-unknown-gnu-atmega328",
|
||||
const NO_ATOMIC: &[&str] = &[
|
||||
"bpfeb-unknown-none",
|
||||
"bpfel-unknown-none",
|
||||
"msp430-none-elf",
|
||||
"riscv32i-unknown-none-elf",
|
||||
"riscv32im-unknown-none-elf",
|
||||
"riscv32imc-unknown-none-elf",
|
||||
"thumbv4t-none-eabi",
|
||||
"thumbv5te-none-eabi",
|
||||
"thumbv6m-none-eabi",
|
||||
];
|
||||
|
||||
#[allow(dead_code)] // Only crossbeam-utils uses this.
|
||||
const NO_ATOMIC_64: &[&str] = &[
|
||||
"arm-linux-androideabi",
|
||||
"armebv7r-none-eabi",
|
||||
"armebv7r-none-eabihf",
|
||||
"armv4t-none-eabi",
|
||||
"armv4t-unknown-linux-gnueabi",
|
||||
"armv5te-none-eabi",
|
||||
"armv5te-unknown-linux-gnueabi",
|
||||
"armv5te-unknown-linux-musleabi",
|
||||
"armv5te-unknown-linux-uclibceabi",
|
||||
"armv6k-nintendo-3ds",
|
||||
"armv7r-none-eabi",
|
||||
"armv7r-none-eabihf",
|
||||
"avr-unknown-gnu-atmega328",
|
||||
"hexagon-unknown-linux-musl",
|
||||
"m68k-unknown-linux-gnu",
|
||||
"mips-unknown-linux-gnu",
|
||||
"mips-unknown-linux-musl",
|
||||
"mips-unknown-linux-uclibc",
|
||||
"mipsel-sony-psp",
|
||||
"mipsel-sony-psx",
|
||||
"mipsel-unknown-linux-gnu",
|
||||
"mipsel-unknown-linux-musl",
|
||||
"mipsel-unknown-linux-uclibc",
|
||||
"mipsel-unknown-none",
|
||||
"mipsisa32r6-unknown-linux-gnu",
|
||||
"mipsisa32r6el-unknown-linux-gnu",
|
||||
"msp430-none-elf",
|
||||
"powerpc-unknown-freebsd",
|
||||
"powerpc-unknown-linux-gnu",
|
||||
"powerpc-unknown-linux-gnuspe",
|
||||
"powerpc-unknown-linux-musl",
|
||||
"powerpc-unknown-netbsd",
|
||||
"powerpc-unknown-openbsd",
|
||||
"powerpc-wrs-vxworks",
|
||||
"powerpc-wrs-vxworks-spe",
|
||||
"riscv32gc-unknown-linux-gnu",
|
||||
"riscv32gc-unknown-linux-musl",
|
||||
"riscv32i-unknown-none-elf",
|
||||
"riscv32im-unknown-none-elf",
|
||||
"riscv32imac-unknown-none-elf",
|
||||
"riscv32imac-unknown-xous-elf",
|
||||
"riscv32imc-unknown-none-elf",
|
||||
"thumbv4t-none-eabi",
|
||||
"thumbv5te-none-eabi",
|
||||
"thumbv6m-none-eabi",
|
||||
"thumbv7em-none-eabi",
|
||||
"thumbv7em-none-eabihf",
|
||||
"thumbv7m-none-eabi",
|
||||
"thumbv8m.base-none-eabi",
|
||||
"thumbv8m.main-none-eabi",
|
||||
"thumbv8m.main-none-eabihf",
|
||||
];
|
||||
|
||||
#[allow(dead_code)] // Only crossbeam-utils uses this.
|
||||
const NO_ATOMIC: &[&str] = &[
|
||||
"avr-unknown-gnu-atmega328",
|
||||
"mipsel-sony-psx",
|
||||
"msp430-none-elf",
|
||||
"riscv32i-unknown-none-elf",
|
||||
"riscv32im-unknown-none-elf",
|
||||
"riscv32imc-unknown-none-elf",
|
||||
];
|
||||
|
||||
@@ -1,18 +1,15 @@
|
||||
// Necessary for implementing atomic methods for `AtomicUnit`
|
||||
#![allow(clippy::unit_arg)]
|
||||
|
||||
use crate::primitive::sync::atomic::{self, AtomicBool};
|
||||
use crate::primitive::sync::atomic::{self, Ordering};
|
||||
use crate::CachePadded;
|
||||
use core::cell::UnsafeCell;
|
||||
use core::cmp;
|
||||
use core::fmt;
|
||||
use core::mem::{self, ManuallyDrop, MaybeUninit};
|
||||
use core::sync::atomic::Ordering;
|
||||
|
||||
use core::panic::{RefUnwindSafe, UnwindSafe};
|
||||
use core::ptr;
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
use std::panic::{RefUnwindSafe, UnwindSafe};
|
||||
|
||||
use super::seq_lock::SeqLock;
|
||||
|
||||
/// A thread-safe mutable memory location.
|
||||
@@ -49,9 +46,7 @@ pub struct AtomicCell<T> {
|
||||
unsafe impl<T: Send> Send for AtomicCell<T> {}
|
||||
unsafe impl<T: Send> Sync for AtomicCell<T> {}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
impl<T> UnwindSafe for AtomicCell<T> {}
|
||||
#[cfg(feature = "std")]
|
||||
impl<T> RefUnwindSafe for AtomicCell<T> {}
|
||||
|
||||
impl<T> AtomicCell<T> {
|
||||
@@ -322,6 +317,36 @@ impl<T> Drop for AtomicCell<T> {
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! atomic {
|
||||
// If values of type `$t` can be transmuted into values of the primitive atomic type `$atomic`,
|
||||
// declares variable `$a` of type `$atomic` and executes `$atomic_op`, breaking out of the loop.
|
||||
(@check, $t:ty, $atomic:ty, $a:ident, $atomic_op:expr) => {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
let $a: &$atomic;
|
||||
break $atomic_op;
|
||||
}
|
||||
};
|
||||
|
||||
// If values of type `$t` can be transmuted into values of a primitive atomic type, declares
|
||||
// variable `$a` of that type and executes `$atomic_op`. Otherwise, just executes
|
||||
// `$fallback_op`.
|
||||
($t:ty, $a:ident, $atomic_op:expr, $fallback_op:expr) => {
|
||||
loop {
|
||||
atomic!(@check, $t, AtomicUnit, $a, $atomic_op);
|
||||
|
||||
atomic!(@check, $t, atomic::AtomicU8, $a, $atomic_op);
|
||||
atomic!(@check, $t, atomic::AtomicU16, $a, $atomic_op);
|
||||
atomic!(@check, $t, atomic::AtomicU32, $a, $atomic_op);
|
||||
#[cfg(target_has_atomic = "64")]
|
||||
atomic!(@check, $t, atomic::AtomicU64, $a, $atomic_op);
|
||||
// TODO: AtomicU128 is unstable
|
||||
// atomic!(@check, $t, atomic::AtomicU128, $a, $atomic_op);
|
||||
|
||||
break $fallback_op;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
macro_rules! impl_arithmetic {
|
||||
($t:ty, fallback, $example:tt) => {
|
||||
impl AtomicCell<$t> {
|
||||
@@ -500,7 +525,7 @@ macro_rules! impl_arithmetic {
|
||||
}
|
||||
}
|
||||
};
|
||||
($t:ty, $atomic:ty, $example:tt) => {
|
||||
($t:ty, $atomic:ident, $example:tt) => {
|
||||
impl AtomicCell<$t> {
|
||||
/// Increments the current value by `val` and returns the previous value.
|
||||
///
|
||||
@@ -518,15 +543,19 @@ macro_rules! impl_arithmetic {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_add(&self, val: $t) -> $t {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
let a = unsafe { &*(self.as_ptr() as *const $atomic) };
|
||||
a.fetch_add(val, Ordering::AcqRel)
|
||||
} else {
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = value.wrapping_add(val);
|
||||
old
|
||||
atomic! {
|
||||
$t, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::$atomic) };
|
||||
a.fetch_add(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = value.wrapping_add(val);
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -546,15 +575,19 @@ macro_rules! impl_arithmetic {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_sub(&self, val: $t) -> $t {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
let a = unsafe { &*(self.as_ptr() as *const $atomic) };
|
||||
a.fetch_sub(val, Ordering::AcqRel)
|
||||
} else {
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = value.wrapping_sub(val);
|
||||
old
|
||||
atomic! {
|
||||
$t, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::$atomic) };
|
||||
a.fetch_sub(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = value.wrapping_sub(val);
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -572,15 +605,19 @@ macro_rules! impl_arithmetic {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_and(&self, val: $t) -> $t {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
let a = unsafe { &*(self.as_ptr() as *const $atomic) };
|
||||
a.fetch_and(val, Ordering::AcqRel)
|
||||
} else {
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value &= val;
|
||||
old
|
||||
atomic! {
|
||||
$t, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::$atomic) };
|
||||
a.fetch_and(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value &= val;
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -598,15 +635,19 @@ macro_rules! impl_arithmetic {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_nand(&self, val: $t) -> $t {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
let a = unsafe { &*(self.as_ptr() as *const $atomic) };
|
||||
a.fetch_nand(val, Ordering::AcqRel)
|
||||
} else {
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = !(old & val);
|
||||
old
|
||||
atomic! {
|
||||
$t, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::$atomic) };
|
||||
a.fetch_nand(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = !(old & val);
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -624,15 +665,19 @@ macro_rules! impl_arithmetic {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_or(&self, val: $t) -> $t {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
let a = unsafe { &*(self.as_ptr() as *const $atomic) };
|
||||
a.fetch_or(val, Ordering::AcqRel)
|
||||
} else {
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value |= val;
|
||||
old
|
||||
atomic! {
|
||||
$t, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::$atomic) };
|
||||
a.fetch_or(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value |= val;
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -650,15 +695,19 @@ macro_rules! impl_arithmetic {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_xor(&self, val: $t) -> $t {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
let a = unsafe { &*(self.as_ptr() as *const $atomic) };
|
||||
a.fetch_xor(val, Ordering::AcqRel)
|
||||
} else {
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value ^= val;
|
||||
old
|
||||
atomic! {
|
||||
$t, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::$atomic) };
|
||||
a.fetch_xor(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value ^= val;
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -677,15 +726,19 @@ macro_rules! impl_arithmetic {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_max(&self, val: $t) -> $t {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
// TODO: Atomic*::fetch_max requires Rust 1.45.
|
||||
self.fetch_update(|old| Some(cmp::max(old, val))).unwrap()
|
||||
} else {
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = cmp::max(old, val);
|
||||
old
|
||||
atomic! {
|
||||
$t, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::$atomic) };
|
||||
a.fetch_max(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = cmp::max(old, val);
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -704,51 +757,50 @@ macro_rules! impl_arithmetic {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_min(&self, val: $t) -> $t {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
// TODO: Atomic*::fetch_min requires Rust 1.45.
|
||||
self.fetch_update(|old| Some(cmp::min(old, val))).unwrap()
|
||||
} else {
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = cmp::min(old, val);
|
||||
old
|
||||
atomic! {
|
||||
$t, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::$atomic) };
|
||||
a.fetch_min(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = cmp::min(old, val);
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
impl_arithmetic!(u8, atomic::AtomicU8, "let a = AtomicCell::new(7u8);");
|
||||
impl_arithmetic!(i8, atomic::AtomicI8, "let a = AtomicCell::new(7i8);");
|
||||
impl_arithmetic!(u16, atomic::AtomicU16, "let a = AtomicCell::new(7u16);");
|
||||
impl_arithmetic!(i16, atomic::AtomicI16, "let a = AtomicCell::new(7i16);");
|
||||
impl_arithmetic!(u32, atomic::AtomicU32, "let a = AtomicCell::new(7u32);");
|
||||
impl_arithmetic!(i32, atomic::AtomicI32, "let a = AtomicCell::new(7i32);");
|
||||
#[cfg(not(crossbeam_no_atomic_64))]
|
||||
impl_arithmetic!(u64, atomic::AtomicU64, "let a = AtomicCell::new(7u64);");
|
||||
#[cfg(not(crossbeam_no_atomic_64))]
|
||||
impl_arithmetic!(i64, atomic::AtomicI64, "let a = AtomicCell::new(7i64);");
|
||||
#[cfg(crossbeam_no_atomic_64)]
|
||||
impl_arithmetic!(u8, AtomicU8, "let a = AtomicCell::new(7u8);");
|
||||
impl_arithmetic!(i8, AtomicI8, "let a = AtomicCell::new(7i8);");
|
||||
impl_arithmetic!(u16, AtomicU16, "let a = AtomicCell::new(7u16);");
|
||||
impl_arithmetic!(i16, AtomicI16, "let a = AtomicCell::new(7i16);");
|
||||
|
||||
impl_arithmetic!(u32, AtomicU32, "let a = AtomicCell::new(7u32);");
|
||||
impl_arithmetic!(i32, AtomicI32, "let a = AtomicCell::new(7i32);");
|
||||
|
||||
#[cfg(target_has_atomic = "64")]
|
||||
impl_arithmetic!(u64, AtomicU64, "let a = AtomicCell::new(7u64);");
|
||||
#[cfg(target_has_atomic = "64")]
|
||||
impl_arithmetic!(i64, AtomicI64, "let a = AtomicCell::new(7i64);");
|
||||
#[cfg(not(target_has_atomic = "64"))]
|
||||
impl_arithmetic!(u64, fallback, "let a = AtomicCell::new(7u64);");
|
||||
#[cfg(crossbeam_no_atomic_64)]
|
||||
#[cfg(not(target_has_atomic = "64"))]
|
||||
impl_arithmetic!(i64, fallback, "let a = AtomicCell::new(7i64);");
|
||||
|
||||
// TODO: AtomicU128 is unstable
|
||||
// impl_arithmetic!(u128, atomic::AtomicU128, "let a = AtomicCell::new(7u128);");
|
||||
// impl_arithmetic!(i128, atomic::AtomicI128, "let a = AtomicCell::new(7i128);");
|
||||
// impl_arithmetic!(u128, AtomicU128, "let a = AtomicCell::new(7u128);");
|
||||
// impl_arithmetic!(i128, AtomicI128, "let a = AtomicCell::new(7i128);");
|
||||
impl_arithmetic!(u128, fallback, "let a = AtomicCell::new(7u128);");
|
||||
impl_arithmetic!(i128, fallback, "let a = AtomicCell::new(7i128);");
|
||||
|
||||
impl_arithmetic!(
|
||||
usize,
|
||||
atomic::AtomicUsize,
|
||||
"let a = AtomicCell::new(7usize);"
|
||||
);
|
||||
impl_arithmetic!(
|
||||
isize,
|
||||
atomic::AtomicIsize,
|
||||
"let a = AtomicCell::new(7isize);"
|
||||
);
|
||||
impl_arithmetic!(usize, AtomicUsize, "let a = AtomicCell::new(7usize);");
|
||||
impl_arithmetic!(isize, AtomicIsize, "let a = AtomicCell::new(7isize);");
|
||||
|
||||
impl AtomicCell<bool> {
|
||||
/// Applies logical "and" to the current value and returns the previous value.
|
||||
@@ -768,8 +820,20 @@ impl AtomicCell<bool> {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_and(&self, val: bool) -> bool {
|
||||
let a = unsafe { &*(self.as_ptr() as *const AtomicBool) };
|
||||
a.fetch_and(val, Ordering::AcqRel)
|
||||
atomic! {
|
||||
bool, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::AtomicBool) };
|
||||
a.fetch_and(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value &= val;
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Applies logical "nand" to the current value and returns the previous value.
|
||||
@@ -792,8 +856,20 @@ impl AtomicCell<bool> {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_nand(&self, val: bool) -> bool {
|
||||
let a = unsafe { &*(self.as_ptr() as *const AtomicBool) };
|
||||
a.fetch_nand(val, Ordering::AcqRel)
|
||||
atomic! {
|
||||
bool, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::AtomicBool) };
|
||||
a.fetch_nand(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value = !(old & val);
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Applies logical "or" to the current value and returns the previous value.
|
||||
@@ -813,8 +889,20 @@ impl AtomicCell<bool> {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_or(&self, val: bool) -> bool {
|
||||
let a = unsafe { &*(self.as_ptr() as *const AtomicBool) };
|
||||
a.fetch_or(val, Ordering::AcqRel)
|
||||
atomic! {
|
||||
bool, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::AtomicBool) };
|
||||
a.fetch_or(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value |= val;
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Applies logical "xor" to the current value and returns the previous value.
|
||||
@@ -834,8 +922,20 @@ impl AtomicCell<bool> {
|
||||
/// ```
|
||||
#[inline]
|
||||
pub fn fetch_xor(&self, val: bool) -> bool {
|
||||
let a = unsafe { &*(self.as_ptr() as *const AtomicBool) };
|
||||
a.fetch_xor(val, Ordering::AcqRel)
|
||||
atomic! {
|
||||
bool, _a,
|
||||
{
|
||||
let a = unsafe { &*(self.as_ptr() as *const atomic::AtomicBool) };
|
||||
a.fetch_xor(val, Ordering::AcqRel)
|
||||
},
|
||||
{
|
||||
let _guard = lock(self.as_ptr() as usize).write();
|
||||
let value = unsafe { &mut *(self.as_ptr()) };
|
||||
let old = *value;
|
||||
*value ^= val;
|
||||
old
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -899,10 +999,10 @@ fn lock(addr: usize) -> &'static SeqLock {
|
||||
// Now, if we have a slice of type `&[Foo]`, it is possible that field `a` in all items gets
|
||||
// stored at addresses that are multiples of 3. It'd be too bad if `LEN` was divisible by 3.
|
||||
// In order to protect from such cases, we simply choose a large prime number for `LEN`.
|
||||
const LEN: usize = 97;
|
||||
const LEN: usize = 67;
|
||||
#[allow(clippy::declare_interior_mutable_const)]
|
||||
const L: SeqLock = SeqLock::new();
|
||||
static LOCKS: [SeqLock; LEN] = [L; LEN];
|
||||
const L: CachePadded<SeqLock> = CachePadded::new(SeqLock::new());
|
||||
static LOCKS: [CachePadded<SeqLock>; LEN] = [L; LEN];
|
||||
|
||||
// If the modulus is a constant number, the compiler will use crazy math to transform this into
|
||||
// a sequence of cheap arithmetic operations rather than using the slow modulo instruction.
|
||||
@@ -936,48 +1036,9 @@ impl AtomicUnit {
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! atomic {
|
||||
// If values of type `$t` can be transmuted into values of the primitive atomic type `$atomic`,
|
||||
// declares variable `$a` of type `$atomic` and executes `$atomic_op`, breaking out of the loop.
|
||||
(@check, $t:ty, $atomic:ty, $a:ident, $atomic_op:expr) => {
|
||||
if can_transmute::<$t, $atomic>() {
|
||||
let $a: &$atomic;
|
||||
break $atomic_op;
|
||||
}
|
||||
};
|
||||
|
||||
// If values of type `$t` can be transmuted into values of a primitive atomic type, declares
|
||||
// variable `$a` of that type and executes `$atomic_op`. Otherwise, just executes
|
||||
// `$fallback_op`.
|
||||
($t:ty, $a:ident, $atomic_op:expr, $fallback_op:expr) => {
|
||||
loop {
|
||||
atomic!(@check, $t, AtomicUnit, $a, $atomic_op);
|
||||
|
||||
atomic!(@check, $t, atomic::AtomicU8, $a, $atomic_op);
|
||||
atomic!(@check, $t, atomic::AtomicU16, $a, $atomic_op);
|
||||
atomic!(@check, $t, atomic::AtomicU32, $a, $atomic_op);
|
||||
#[cfg(not(crossbeam_no_atomic_64))]
|
||||
atomic!(@check, $t, atomic::AtomicU64, $a, $atomic_op);
|
||||
// TODO: AtomicU128 is unstable
|
||||
// atomic!(@check, $t, atomic::AtomicU128, $a, $atomic_op);
|
||||
|
||||
break $fallback_op;
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
/// Returns `true` if operations on `AtomicCell<T>` are lock-free.
|
||||
const fn atomic_is_lock_free<T>() -> bool {
|
||||
// HACK(taiki-e): This is equivalent to `atomic! { T, _a, true, false }`, but can be used in const fn even in our MSRV (Rust 1.38).
|
||||
let is_lock_free = can_transmute::<T, AtomicUnit>()
|
||||
| can_transmute::<T, atomic::AtomicU8>()
|
||||
| can_transmute::<T, atomic::AtomicU16>()
|
||||
| can_transmute::<T, atomic::AtomicU32>();
|
||||
#[cfg(not(crossbeam_no_atomic_64))]
|
||||
let is_lock_free = is_lock_free | can_transmute::<T, atomic::AtomicU64>();
|
||||
// TODO: AtomicU128 is unstable
|
||||
// let is_lock_free = is_lock_free | can_transmute::<T, atomic::AtomicU128>();
|
||||
is_lock_free
|
||||
atomic! { T, _a, true, false }
|
||||
}
|
||||
|
||||
/// Atomically reads data from `src`.
|
||||
|
||||
@@ -1,5 +1,3 @@
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||
use crate::primitive::sync::atomic::compiler_fence;
|
||||
#[cfg(not(crossbeam_no_atomic))]
|
||||
use core::sync::atomic::Ordering;
|
||||
|
||||
@@ -27,11 +25,21 @@ pub trait AtomicConsume {
|
||||
}
|
||||
|
||||
#[cfg(not(crossbeam_no_atomic))]
|
||||
#[cfg(any(target_arch = "arm", target_arch = "aarch64"))]
|
||||
// Miri and Loom don't support "consume" ordering and ThreadSanitizer doesn't treat
|
||||
// load(Relaxed) + compiler_fence(Acquire) as "consume" load.
|
||||
// LLVM generates machine code equivalent to fence(Acquire) in compiler_fence(Acquire)
|
||||
// on PowerPC, MIPS, etc. (https://godbolt.org/z/hffvjvW7h), so for now the fence
|
||||
// can be actually avoided here only on ARM and AArch64. See also
|
||||
// https://github.com/rust-lang/rust/issues/62256.
|
||||
#[cfg(all(
|
||||
any(target_arch = "arm", target_arch = "aarch64"),
|
||||
not(any(miri, crossbeam_loom, crossbeam_sanitize_thread)),
|
||||
))]
|
||||
macro_rules! impl_consume {
|
||||
() => {
|
||||
#[inline]
|
||||
fn load_consume(&self) -> Self::Val {
|
||||
use crate::primitive::sync::atomic::compiler_fence;
|
||||
let result = self.load(Ordering::Relaxed);
|
||||
compiler_fence(Ordering::Acquire);
|
||||
result
|
||||
@@ -40,7 +48,10 @@ macro_rules! impl_consume {
|
||||
}
|
||||
|
||||
#[cfg(not(crossbeam_no_atomic))]
|
||||
#[cfg(not(any(target_arch = "arm", target_arch = "aarch64")))]
|
||||
#[cfg(not(all(
|
||||
any(target_arch = "arm", target_arch = "aarch64"),
|
||||
not(any(miri, crossbeam_loom, crossbeam_sanitize_thread)),
|
||||
)))]
|
||||
macro_rules! impl_consume {
|
||||
() => {
|
||||
#[inline]
|
||||
@@ -72,11 +83,19 @@ impl_atomic!(AtomicU8, u8);
|
||||
impl_atomic!(AtomicI8, i8);
|
||||
impl_atomic!(AtomicU16, u16);
|
||||
impl_atomic!(AtomicI16, i16);
|
||||
#[cfg(any(target_has_atomic = "32", not(target_pointer_width = "16")))]
|
||||
impl_atomic!(AtomicU32, u32);
|
||||
#[cfg(any(target_has_atomic = "32", not(target_pointer_width = "16")))]
|
||||
impl_atomic!(AtomicI32, i32);
|
||||
#[cfg(not(crossbeam_no_atomic_64))]
|
||||
#[cfg(any(
|
||||
target_has_atomic = "64",
|
||||
not(any(target_pointer_width = "16", target_pointer_width = "32")),
|
||||
))]
|
||||
impl_atomic!(AtomicU64, u64);
|
||||
#[cfg(not(crossbeam_no_atomic_64))]
|
||||
#[cfg(any(
|
||||
target_has_atomic = "64",
|
||||
not(any(target_pointer_width = "16", target_pointer_width = "32")),
|
||||
))]
|
||||
impl_atomic!(AtomicI64, i64);
|
||||
|
||||
#[cfg(not(crossbeam_no_atomic))]
|
||||
|
||||
@@ -3,35 +3,30 @@
|
||||
//! * [`AtomicCell`], a thread-safe mutable memory location.
|
||||
//! * [`AtomicConsume`], for reading from primitive atomic types with "consume" ordering.
|
||||
|
||||
#[cfg(not(crossbeam_no_atomic_cas))]
|
||||
#[cfg(target_has_atomic = "ptr")]
|
||||
#[cfg(not(crossbeam_loom))]
|
||||
cfg_if::cfg_if! {
|
||||
// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
|
||||
// around.
|
||||
//
|
||||
// We are ignoring too wide architectures (pointer width >= 256), since such a system will not
|
||||
// appear in a conceivable future.
|
||||
//
|
||||
// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
|
||||
// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
|
||||
// counter will not be increased that fast.
|
||||
if #[cfg(any(target_pointer_width = "64", target_pointer_width = "128"))] {
|
||||
mod seq_lock;
|
||||
} else {
|
||||
#[path = "seq_lock_wide.rs"]
|
||||
mod seq_lock;
|
||||
}
|
||||
}
|
||||
// Use "wide" sequence lock if the pointer width <= 32 for preventing its counter against wrap
|
||||
// around.
|
||||
//
|
||||
// In narrow architectures (pointer width <= 16), the counter is still <= 32-bit and may be
|
||||
// vulnerable to wrap around. But it's mostly okay, since in such a primitive hardware, the
|
||||
// counter will not be increased that fast.
|
||||
// Note that Rust (and C99) pointers must be at least 16-bit (i.e., 8-bit targets are impossible): https://github.com/rust-lang/rust/pull/49305
|
||||
#[cfg_attr(
|
||||
any(target_pointer_width = "16", target_pointer_width = "32"),
|
||||
path = "seq_lock_wide.rs"
|
||||
)]
|
||||
mod seq_lock;
|
||||
|
||||
#[cfg(not(crossbeam_no_atomic_cas))]
|
||||
#[cfg(target_has_atomic = "ptr")]
|
||||
// We cannot provide AtomicCell under cfg(crossbeam_loom) because loom's atomic
|
||||
// types have a different in-memory representation than the underlying type.
|
||||
// TODO: The latest loom supports fences, so fallback using seqlock may be available.
|
||||
#[cfg(not(crossbeam_loom))]
|
||||
mod atomic_cell;
|
||||
mod consume;
|
||||
|
||||
#[cfg(not(crossbeam_no_atomic_cas))]
|
||||
#[cfg(target_has_atomic = "ptr")]
|
||||
#[cfg(not(crossbeam_loom))]
|
||||
pub use self::atomic_cell::AtomicCell;
|
||||
pub use self::consume::AtomicConsume;
|
||||
pub use atomic_cell::AtomicCell;
|
||||
|
||||
mod consume;
|
||||
pub use consume::AtomicConsume;
|
||||
|
||||
17
third_party/rust/crossbeam-utils/src/backoff.rs
vendored
17
third_party/rust/crossbeam-utils/src/backoff.rs
vendored
@@ -1,4 +1,4 @@
|
||||
use crate::primitive::sync::atomic;
|
||||
use crate::primitive::hint;
|
||||
use core::cell::Cell;
|
||||
use core::fmt;
|
||||
|
||||
@@ -145,10 +145,7 @@ impl Backoff {
|
||||
#[inline]
|
||||
pub fn spin(&self) {
|
||||
for _ in 0..1 << self.step.get().min(SPIN_LIMIT) {
|
||||
// TODO(taiki-e): once we bump the minimum required Rust version to 1.49+,
|
||||
// use [`core::hint::spin_loop`] instead.
|
||||
#[allow(deprecated)]
|
||||
atomic::spin_loop_hint();
|
||||
hint::spin_loop();
|
||||
}
|
||||
|
||||
if self.step.get() <= SPIN_LIMIT {
|
||||
@@ -209,18 +206,12 @@ impl Backoff {
|
||||
pub fn snooze(&self) {
|
||||
if self.step.get() <= SPIN_LIMIT {
|
||||
for _ in 0..1 << self.step.get() {
|
||||
// TODO(taiki-e): once we bump the minimum required Rust version to 1.49+,
|
||||
// use [`core::hint::spin_loop`] instead.
|
||||
#[allow(deprecated)]
|
||||
atomic::spin_loop_hint();
|
||||
hint::spin_loop();
|
||||
}
|
||||
} else {
|
||||
#[cfg(not(feature = "std"))]
|
||||
for _ in 0..1 << self.step.get() {
|
||||
// TODO(taiki-e): once we bump the minimum required Rust version to 1.49+,
|
||||
// use [`core::hint::spin_loop`] instead.
|
||||
#[allow(deprecated)]
|
||||
atomic::spin_loop_hint();
|
||||
hint::spin_loop();
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
|
||||
@@ -14,7 +14,8 @@ use core::ops::{Deref, DerefMut};
|
||||
/// Cache lines are assumed to be N bytes long, depending on the architecture:
|
||||
///
|
||||
/// * On x86-64, aarch64, and powerpc64, N = 128.
|
||||
/// * On arm, mips, mips64, and riscv64, N = 32.
|
||||
/// * On arm, mips, mips64, sparc, and hexagon, N = 32.
|
||||
/// * On m68k, N = 16.
|
||||
/// * On s390x, N = 256.
|
||||
/// * On all others, N = 64.
|
||||
///
|
||||
@@ -75,6 +76,7 @@ use core::ops::{Deref, DerefMut};
|
||||
//
|
||||
// Sources:
|
||||
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_ppc64x.go#L9
|
||||
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/powerpc/include/asm/cache.h#L26
|
||||
#[cfg_attr(
|
||||
any(
|
||||
target_arch = "x86_64",
|
||||
@@ -83,33 +85,45 @@ use core::ops::{Deref, DerefMut};
|
||||
),
|
||||
repr(align(128))
|
||||
)]
|
||||
// arm, mips, mips64, and riscv64 have 32-byte cache line size.
|
||||
// arm, mips, mips64, sparc, and hexagon have 32-byte cache line size.
|
||||
//
|
||||
// Sources:
|
||||
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_arm.go#L7
|
||||
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips.go#L7
|
||||
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mipsle.go#L7
|
||||
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_mips64x.go#L9
|
||||
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_riscv64.go#L7
|
||||
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L17
|
||||
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/hexagon/include/asm/cache.h#L12
|
||||
#[cfg_attr(
|
||||
any(
|
||||
target_arch = "arm",
|
||||
target_arch = "mips",
|
||||
target_arch = "mips32r6",
|
||||
target_arch = "mips64",
|
||||
target_arch = "riscv64",
|
||||
target_arch = "mips64r6",
|
||||
target_arch = "sparc",
|
||||
target_arch = "hexagon",
|
||||
),
|
||||
repr(align(32))
|
||||
)]
|
||||
// m68k has 16-byte cache line size.
|
||||
//
|
||||
// Sources:
|
||||
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/m68k/include/asm/cache.h#L9
|
||||
#[cfg_attr(target_arch = "m68k", repr(align(16)))]
|
||||
// s390x has 256-byte cache line size.
|
||||
//
|
||||
// Sources:
|
||||
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_s390x.go#L7
|
||||
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/s390/include/asm/cache.h#L13
|
||||
#[cfg_attr(target_arch = "s390x", repr(align(256)))]
|
||||
// x86 and wasm have 64-byte cache line size.
|
||||
// x86, wasm, riscv, and sparc64 have 64-byte cache line size.
|
||||
//
|
||||
// Sources:
|
||||
// - https://github.com/golang/go/blob/dda2991c2ea0c5914714469c4defc2562a907230/src/internal/cpu/cpu_x86.go#L9
|
||||
// - https://github.com/golang/go/blob/3dd58676054223962cd915bb0934d1f9f489d4d2/src/internal/cpu/cpu_wasm.go#L7
|
||||
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/riscv/include/asm/cache.h#L10
|
||||
// - https://github.com/torvalds/linux/blob/3516bd729358a2a9b090c1905bd2a3fa926e24c6/arch/sparc/include/asm/cache.h#L19
|
||||
//
|
||||
// All others are assumed to have 64-byte cache line size.
|
||||
#[cfg_attr(
|
||||
@@ -119,8 +133,12 @@ use core::ops::{Deref, DerefMut};
|
||||
target_arch = "powerpc64",
|
||||
target_arch = "arm",
|
||||
target_arch = "mips",
|
||||
target_arch = "mips32r6",
|
||||
target_arch = "mips64",
|
||||
target_arch = "riscv64",
|
||||
target_arch = "mips64r6",
|
||||
target_arch = "sparc",
|
||||
target_arch = "hexagon",
|
||||
target_arch = "m68k",
|
||||
target_arch = "s390x",
|
||||
)),
|
||||
repr(align(64))
|
||||
@@ -189,3 +207,9 @@ impl<T> From<T> for CachePadded<T> {
|
||||
CachePadded::new(t)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: fmt::Display> fmt::Display for CachePadded<T> {
|
||||
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
|
||||
fmt::Display::fmt(&self.value, f)
|
||||
}
|
||||
}
|
||||
|
||||
46
third_party/rust/crossbeam-utils/src/lib.rs
vendored
46
third_party/rust/crossbeam-utils/src/lib.rs
vendored
@@ -24,6 +24,7 @@
|
||||
//! [`WaitGroup`]: sync::WaitGroup
|
||||
//! [`scope`]: thread::scope
|
||||
|
||||
#![no_std]
|
||||
#![doc(test(
|
||||
no_crate_inject,
|
||||
attr(
|
||||
@@ -37,17 +38,21 @@
|
||||
rust_2018_idioms,
|
||||
unreachable_pub
|
||||
)]
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
extern crate std;
|
||||
|
||||
#[cfg(crossbeam_loom)]
|
||||
#[allow(unused_imports)]
|
||||
mod primitive {
|
||||
pub(crate) mod hint {
|
||||
pub(crate) use loom::hint::spin_loop;
|
||||
}
|
||||
pub(crate) mod sync {
|
||||
pub(crate) mod atomic {
|
||||
pub(crate) use loom::sync::atomic::spin_loop_hint;
|
||||
pub(crate) use loom::sync::atomic::{
|
||||
AtomicBool, AtomicI16, AtomicI32, AtomicI64, AtomicI8, AtomicIsize, AtomicU16,
|
||||
AtomicU32, AtomicU64, AtomicU8, AtomicUsize,
|
||||
AtomicU32, AtomicU64, AtomicU8, AtomicUsize, Ordering,
|
||||
};
|
||||
|
||||
// FIXME: loom does not support compiler_fence at the moment.
|
||||
@@ -63,19 +68,24 @@ mod primitive {
|
||||
#[cfg(not(crossbeam_loom))]
|
||||
#[allow(unused_imports)]
|
||||
mod primitive {
|
||||
pub(crate) mod hint {
|
||||
pub(crate) use core::hint::spin_loop;
|
||||
}
|
||||
pub(crate) mod sync {
|
||||
pub(crate) mod atomic {
|
||||
pub(crate) use core::sync::atomic::compiler_fence;
|
||||
// TODO(taiki-e): once we bump the minimum required Rust version to 1.49+,
|
||||
// use [`core::hint::spin_loop`] instead.
|
||||
#[allow(deprecated)]
|
||||
pub(crate) use core::sync::atomic::spin_loop_hint;
|
||||
pub(crate) use core::sync::atomic::{compiler_fence, Ordering};
|
||||
#[cfg(not(crossbeam_no_atomic))]
|
||||
pub(crate) use core::sync::atomic::{
|
||||
AtomicBool, AtomicI16, AtomicI32, AtomicI8, AtomicIsize, AtomicU16, AtomicU32,
|
||||
AtomicU8, AtomicUsize,
|
||||
AtomicBool, AtomicI16, AtomicI8, AtomicIsize, AtomicU16, AtomicU8, AtomicUsize,
|
||||
};
|
||||
#[cfg(not(crossbeam_no_atomic_64))]
|
||||
#[cfg(not(crossbeam_no_atomic))]
|
||||
#[cfg(any(target_has_atomic = "32", not(target_pointer_width = "16")))]
|
||||
pub(crate) use core::sync::atomic::{AtomicI32, AtomicU32};
|
||||
#[cfg(not(crossbeam_no_atomic))]
|
||||
#[cfg(any(
|
||||
target_has_atomic = "64",
|
||||
not(any(target_pointer_width = "16", target_pointer_width = "32")),
|
||||
))]
|
||||
pub(crate) use core::sync::atomic::{AtomicI64, AtomicU64};
|
||||
}
|
||||
|
||||
@@ -92,13 +102,9 @@ pub use crate::cache_padded::CachePadded;
|
||||
mod backoff;
|
||||
pub use crate::backoff::Backoff;
|
||||
|
||||
use cfg_if::cfg_if;
|
||||
#[cfg(feature = "std")]
|
||||
pub mod sync;
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(feature = "std")] {
|
||||
pub mod sync;
|
||||
|
||||
#[cfg(not(crossbeam_loom))]
|
||||
pub mod thread;
|
||||
}
|
||||
}
|
||||
#[cfg(feature = "std")]
|
||||
#[cfg(not(crossbeam_loom))]
|
||||
pub mod thread;
|
||||
|
||||
@@ -4,13 +4,10 @@
|
||||
|
||||
use core::cell::UnsafeCell;
|
||||
use core::mem::MaybeUninit;
|
||||
use core::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::Once;
|
||||
|
||||
pub(crate) struct OnceLock<T> {
|
||||
once: Once,
|
||||
// Once::is_completed requires Rust 1.43, so use this to track of whether they have been initialized.
|
||||
is_initialized: AtomicBool,
|
||||
value: UnsafeCell<MaybeUninit<T>>,
|
||||
// Unlike std::sync::OnceLock, we don't need PhantomData here because
|
||||
// we don't use #[may_dangle].
|
||||
@@ -25,7 +22,6 @@ impl<T> OnceLock<T> {
|
||||
pub(crate) const fn new() -> Self {
|
||||
Self {
|
||||
once: Once::new(),
|
||||
is_initialized: AtomicBool::new(false),
|
||||
value: UnsafeCell::new(MaybeUninit::uninit()),
|
||||
}
|
||||
}
|
||||
@@ -50,37 +46,26 @@ impl<T> OnceLock<T> {
|
||||
F: FnOnce() -> T,
|
||||
{
|
||||
// Fast path check
|
||||
if self.is_initialized() {
|
||||
if self.once.is_completed() {
|
||||
// SAFETY: The inner value has been initialized
|
||||
return unsafe { self.get_unchecked() };
|
||||
}
|
||||
self.initialize(f);
|
||||
|
||||
debug_assert!(self.is_initialized());
|
||||
|
||||
// SAFETY: The inner value has been initialized
|
||||
unsafe { self.get_unchecked() }
|
||||
}
|
||||
|
||||
#[inline]
|
||||
fn is_initialized(&self) -> bool {
|
||||
self.is_initialized.load(Ordering::Acquire)
|
||||
}
|
||||
|
||||
#[cold]
|
||||
fn initialize<F>(&self, f: F)
|
||||
where
|
||||
F: FnOnce() -> T,
|
||||
{
|
||||
let slot = self.value.get().cast::<T>();
|
||||
let is_initialized = &self.is_initialized;
|
||||
let slot = self.value.get();
|
||||
|
||||
self.once.call_once(|| {
|
||||
let value = f();
|
||||
unsafe {
|
||||
slot.write(value);
|
||||
}
|
||||
is_initialized.store(true, Ordering::Release);
|
||||
unsafe { slot.write(MaybeUninit::new(value)) }
|
||||
});
|
||||
}
|
||||
|
||||
@@ -88,16 +73,16 @@ impl<T> OnceLock<T> {
|
||||
///
|
||||
/// The value must be initialized
|
||||
unsafe fn get_unchecked(&self) -> &T {
|
||||
debug_assert!(self.is_initialized());
|
||||
debug_assert!(self.once.is_completed());
|
||||
&*self.value.get().cast::<T>()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Drop for OnceLock<T> {
|
||||
fn drop(&mut self) {
|
||||
if self.is_initialized() {
|
||||
if self.once.is_completed() {
|
||||
// SAFETY: The inner value has been initialized
|
||||
unsafe { self.value.get().cast::<T>().drop_in_place() };
|
||||
unsafe { (*self.value.get()).assume_init_drop() };
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
use crate::primitive::sync::atomic::AtomicUsize;
|
||||
use crate::primitive::sync::atomic::{AtomicUsize, Ordering::SeqCst};
|
||||
use crate::primitive::sync::{Arc, Condvar, Mutex};
|
||||
use core::sync::atomic::Ordering::SeqCst;
|
||||
use std::fmt;
|
||||
use std::marker::PhantomData;
|
||||
use std::time::{Duration, Instant};
|
||||
@@ -122,7 +121,10 @@ impl Parker {
|
||||
/// p.park_timeout(Duration::from_millis(500));
|
||||
/// ```
|
||||
pub fn park_timeout(&self, timeout: Duration) {
|
||||
self.park_deadline(Instant::now() + timeout)
|
||||
match Instant::now().checked_add(timeout) {
|
||||
Some(deadline) => self.park_deadline(deadline),
|
||||
None => self.park(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Blocks the current thread until the token is made available, or until a certain deadline.
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
use std::boxed::Box;
|
||||
use std::cell::UnsafeCell;
|
||||
use std::collections::HashMap;
|
||||
use std::fmt;
|
||||
@@ -8,6 +9,7 @@ use std::panic::{RefUnwindSafe, UnwindSafe};
|
||||
use std::sync::{LockResult, PoisonError, TryLockError, TryLockResult};
|
||||
use std::sync::{Mutex, RwLock, RwLockReadGuard, RwLockWriteGuard};
|
||||
use std::thread::{self, ThreadId};
|
||||
use std::vec::Vec;
|
||||
|
||||
use crate::sync::once_lock::OnceLock;
|
||||
use crate::CachePadded;
|
||||
@@ -356,7 +358,7 @@ impl<T: ?Sized> ShardedLock<T> {
|
||||
for shard in self.shards[0..i].iter().rev() {
|
||||
unsafe {
|
||||
let dest: *mut _ = shard.write_guard.get();
|
||||
let guard = mem::replace(&mut *dest, None);
|
||||
let guard = (*dest).take();
|
||||
drop(guard);
|
||||
}
|
||||
}
|
||||
@@ -480,6 +482,7 @@ impl<T> From<T> for ShardedLock<T> {
|
||||
}
|
||||
|
||||
/// A guard used to release the shared read access of a [`ShardedLock`] when dropped.
|
||||
#[clippy::has_significant_drop]
|
||||
pub struct ShardedLockReadGuard<'a, T: ?Sized> {
|
||||
lock: &'a ShardedLock<T>,
|
||||
_guard: RwLockReadGuard<'a, ()>,
|
||||
@@ -511,6 +514,7 @@ impl<T: ?Sized + fmt::Display> fmt::Display for ShardedLockReadGuard<'_, T> {
|
||||
}
|
||||
|
||||
/// A guard used to release the exclusive write access of a [`ShardedLock`] when dropped.
|
||||
#[clippy::has_significant_drop]
|
||||
pub struct ShardedLockWriteGuard<'a, T: ?Sized> {
|
||||
lock: &'a ShardedLock<T>,
|
||||
_marker: PhantomData<RwLockWriteGuard<'a, T>>,
|
||||
@@ -524,7 +528,7 @@ impl<T: ?Sized> Drop for ShardedLockWriteGuard<'_, T> {
|
||||
for shard in self.lock.shards.iter().rev() {
|
||||
unsafe {
|
||||
let dest: *mut _ = shard.write_guard.get();
|
||||
let guard = mem::replace(&mut *dest, None);
|
||||
let guard = (*dest).take();
|
||||
drop(guard);
|
||||
}
|
||||
}
|
||||
@@ -611,7 +615,7 @@ impl Drop for Registration {
|
||||
}
|
||||
}
|
||||
|
||||
thread_local! {
|
||||
std::thread_local! {
|
||||
static REGISTRATION: Registration = {
|
||||
let thread_id = thread::current().id();
|
||||
let mut indices = thread_indices().lock().unwrap();
|
||||
|
||||
84
third_party/rust/crossbeam-utils/src/thread.rs
vendored
84
third_party/rust/crossbeam-utils/src/thread.rs
vendored
@@ -84,7 +84,7 @@
|
||||
//! tricky because argument `s` lives *inside* the invocation of `thread::scope()` and as such
|
||||
//! cannot be borrowed by scoped threads:
|
||||
//!
|
||||
//! ```compile_fail,E0373,E0521
|
||||
//! ```compile_fail,E0521
|
||||
//! use crossbeam_utils::thread;
|
||||
//!
|
||||
//! thread::scope(|s| {
|
||||
@@ -111,16 +111,18 @@
|
||||
//! }).unwrap();
|
||||
//! ```
|
||||
|
||||
use std::boxed::Box;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::marker::PhantomData;
|
||||
use std::mem;
|
||||
use std::panic;
|
||||
use std::string::String;
|
||||
use std::sync::{Arc, Mutex};
|
||||
use std::thread;
|
||||
use std::vec::Vec;
|
||||
|
||||
use crate::sync::WaitGroup;
|
||||
use cfg_if::cfg_if;
|
||||
|
||||
type SharedVec<T> = Arc<Mutex<Vec<T>>>;
|
||||
type SharedOption<T> = Arc<Mutex<Option<T>>>;
|
||||
@@ -133,6 +135,8 @@ type SharedOption<T> = Arc<Mutex<Option<T>>>;
|
||||
/// returned containing errors from panicked threads. Note that if panics are implemented by
|
||||
/// aborting the process, no error is returned; see the notes of [std::panic::catch_unwind].
|
||||
///
|
||||
/// **Note:** Since Rust 1.63, this function is soft-deprecated in favor of the more efficient [`std::thread::scope`].
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
@@ -150,6 +154,15 @@ pub fn scope<'env, F, R>(f: F) -> thread::Result<R>
|
||||
where
|
||||
F: FnOnce(&Scope<'env>) -> R,
|
||||
{
|
||||
struct AbortOnPanic;
|
||||
impl Drop for AbortOnPanic {
|
||||
fn drop(&mut self) {
|
||||
if thread::panicking() {
|
||||
std::process::abort();
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
let wg = WaitGroup::new();
|
||||
let scope = Scope::<'env> {
|
||||
handles: SharedVec::default(),
|
||||
@@ -160,6 +173,10 @@ where
|
||||
// Execute the scoped function, but catch any panics.
|
||||
let result = panic::catch_unwind(panic::AssertUnwindSafe(|| f(&scope)));
|
||||
|
||||
// If an unwinding panic occurs before all threads are joined
|
||||
// promote it to an aborting panic to prevent any threads from escaping the scope.
|
||||
let guard = AbortOnPanic;
|
||||
|
||||
// Wait until all nested scopes are dropped.
|
||||
drop(scope.wait_group);
|
||||
wg.wait();
|
||||
@@ -175,6 +192,8 @@ where
|
||||
.filter_map(|handle| handle.join().err())
|
||||
.collect();
|
||||
|
||||
mem::forget(guard);
|
||||
|
||||
// If `f` has panicked, resume unwinding.
|
||||
// If any of the child threads have panicked, return the panic errors.
|
||||
// Otherwise, everything is OK and return the result of `f`.
|
||||
@@ -481,7 +500,7 @@ pub struct ScopedJoinHandle<'scope, T> {
|
||||
/// Holds the result of the inner closure.
|
||||
result: SharedOption<T>,
|
||||
|
||||
/// A handle to the the spawned thread.
|
||||
/// A handle to the spawned thread.
|
||||
thread: thread::Thread,
|
||||
|
||||
/// Borrows the parent scope with lifetime `'scope`.
|
||||
@@ -545,37 +564,42 @@ impl<T> ScopedJoinHandle<'_, T> {
|
||||
}
|
||||
}
|
||||
|
||||
cfg_if! {
|
||||
if #[cfg(unix)] {
|
||||
use std::os::unix::thread::{JoinHandleExt, RawPthread};
|
||||
/// Unix-specific extensions.
|
||||
#[cfg(unix)]
|
||||
mod unix {
|
||||
use super::ScopedJoinHandle;
|
||||
use std::os::unix::thread::{JoinHandleExt, RawPthread};
|
||||
|
||||
impl<T> JoinHandleExt for ScopedJoinHandle<'_, T> {
|
||||
fn as_pthread_t(&self) -> RawPthread {
|
||||
// Borrow the handle. The handle will surely be available because the root scope waits
|
||||
// for nested scopes before joining remaining threads.
|
||||
let handle = self.handle.lock().unwrap();
|
||||
handle.as_ref().unwrap().as_pthread_t()
|
||||
}
|
||||
fn into_pthread_t(self) -> RawPthread {
|
||||
self.as_pthread_t()
|
||||
}
|
||||
impl<T> JoinHandleExt for ScopedJoinHandle<'_, T> {
|
||||
fn as_pthread_t(&self) -> RawPthread {
|
||||
// Borrow the handle. The handle will surely be available because the root scope waits
|
||||
// for nested scopes before joining remaining threads.
|
||||
let handle = self.handle.lock().unwrap();
|
||||
handle.as_ref().unwrap().as_pthread_t()
|
||||
}
|
||||
} else if #[cfg(windows)] {
|
||||
use std::os::windows::io::{AsRawHandle, IntoRawHandle, RawHandle};
|
||||
|
||||
impl<T> AsRawHandle for ScopedJoinHandle<'_, T> {
|
||||
fn as_raw_handle(&self) -> RawHandle {
|
||||
// Borrow the handle. The handle will surely be available because the root scope waits
|
||||
// for nested scopes before joining remaining threads.
|
||||
let handle = self.handle.lock().unwrap();
|
||||
handle.as_ref().unwrap().as_raw_handle()
|
||||
}
|
||||
fn into_pthread_t(self) -> RawPthread {
|
||||
self.as_pthread_t()
|
||||
}
|
||||
}
|
||||
}
|
||||
/// Windows-specific extensions.
|
||||
#[cfg(windows)]
|
||||
mod windows {
|
||||
use super::ScopedJoinHandle;
|
||||
use std::os::windows::io::{AsRawHandle, IntoRawHandle, RawHandle};
|
||||
|
||||
impl<T> IntoRawHandle for ScopedJoinHandle<'_, T> {
|
||||
fn into_raw_handle(self) -> RawHandle {
|
||||
self.as_raw_handle()
|
||||
}
|
||||
impl<T> AsRawHandle for ScopedJoinHandle<'_, T> {
|
||||
fn as_raw_handle(&self) -> RawHandle {
|
||||
// Borrow the handle. The handle will surely be available because the root scope waits
|
||||
// for nested scopes before joining remaining threads.
|
||||
let handle = self.handle.lock().unwrap();
|
||||
handle.as_ref().unwrap().as_raw_handle()
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> IntoRawHandle for ScopedJoinHandle<'_, T> {
|
||||
fn into_raw_handle(self) -> RawHandle {
|
||||
self.as_raw_handle()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,11 +6,11 @@ use crossbeam_utils::atomic::AtomicCell;
|
||||
|
||||
#[test]
|
||||
fn is_lock_free() {
|
||||
struct UsizeWrap(usize);
|
||||
struct U8Wrap(bool);
|
||||
struct I16Wrap(i16);
|
||||
struct UsizeWrap(#[allow(dead_code)] usize);
|
||||
struct U8Wrap(#[allow(dead_code)] bool);
|
||||
struct I16Wrap(#[allow(dead_code)] i16);
|
||||
#[repr(align(8))]
|
||||
struct U64Align8(u64);
|
||||
struct U64Align8(#[allow(dead_code)] u64);
|
||||
|
||||
assert!(AtomicCell::<usize>::is_lock_free());
|
||||
assert!(AtomicCell::<isize>::is_lock_free());
|
||||
@@ -35,17 +35,13 @@ fn is_lock_free() {
|
||||
// of `AtomicU64` is `8`, so `AtomicCell<u64>` is not lock-free.
|
||||
assert_eq!(
|
||||
AtomicCell::<u64>::is_lock_free(),
|
||||
cfg!(not(crossbeam_no_atomic_64))
|
||||
&& cfg!(any(
|
||||
target_pointer_width = "64",
|
||||
target_pointer_width = "128"
|
||||
))
|
||||
cfg!(target_has_atomic = "64") && std::mem::align_of::<u64>() == 8
|
||||
);
|
||||
assert_eq!(mem::size_of::<U64Align8>(), 8);
|
||||
assert_eq!(mem::align_of::<U64Align8>(), 8);
|
||||
assert_eq!(
|
||||
AtomicCell::<U64Align8>::is_lock_free(),
|
||||
cfg!(not(crossbeam_no_atomic_64))
|
||||
cfg!(target_has_atomic = "64")
|
||||
);
|
||||
|
||||
// AtomicU128 is unstable
|
||||
@@ -311,7 +307,6 @@ test_arithmetic!(arithmetic_i128, i128);
|
||||
|
||||
// https://github.com/crossbeam-rs/crossbeam/issues/748
|
||||
#[cfg_attr(miri, ignore)] // TODO
|
||||
#[rustversion::since(1.37)] // #[repr(align(N))] requires Rust 1.37
|
||||
#[test]
|
||||
fn issue_748() {
|
||||
#[allow(dead_code)]
|
||||
@@ -325,14 +320,13 @@ fn issue_748() {
|
||||
assert_eq!(mem::size_of::<Test>(), 8);
|
||||
assert_eq!(
|
||||
AtomicCell::<Test>::is_lock_free(),
|
||||
cfg!(not(crossbeam_no_atomic_64))
|
||||
cfg!(target_has_atomic = "64")
|
||||
);
|
||||
let x = AtomicCell::new(Test::FieldLess);
|
||||
assert_eq!(x.load(), Test::FieldLess);
|
||||
}
|
||||
|
||||
// https://github.com/crossbeam-rs/crossbeam/issues/833
|
||||
#[rustversion::since(1.40)] // const_constructor requires Rust 1.40
|
||||
#[test]
|
||||
fn issue_833() {
|
||||
use std::num::NonZeroU128;
|
||||
|
||||
@@ -36,25 +36,27 @@ fn wait() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(miri, ignore)] // this test makes timing assumptions, but Miri is so slow it violates them
|
||||
fn wait_and_drop() {
|
||||
let wg = WaitGroup::new();
|
||||
let wg2 = WaitGroup::new();
|
||||
let (tx, rx) = mpsc::channel();
|
||||
|
||||
for _ in 0..THREADS {
|
||||
let wg = wg.clone();
|
||||
let wg2 = wg2.clone();
|
||||
let tx = tx.clone();
|
||||
|
||||
thread::spawn(move || {
|
||||
thread::sleep(Duration::from_millis(100));
|
||||
wg2.wait();
|
||||
tx.send(()).unwrap();
|
||||
drop(wg);
|
||||
});
|
||||
}
|
||||
|
||||
// At this point, all spawned threads should be in `thread::sleep`, so we shouldn't get anything
|
||||
// from the channel.
|
||||
// At this point, no thread has gotten past `wg2.wait()`, so we shouldn't get anything from the
|
||||
// channel.
|
||||
assert!(rx.try_recv().is_err());
|
||||
drop(wg2);
|
||||
|
||||
wg.wait();
|
||||
|
||||
|
||||
@@ -1 +1 @@
|
||||
{"files":{"Cargo.toml":"f1778460809aaf6ac901ebb08f73bde6776318f85be107ab632d855140fea183","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"15656cc11a8331f28c0986b8ab97220d3e76f98e60ed388b5ffad37dfac4710c","README.md":"7059db284b2016ba7355c63a2b14eb732c7b8952286ff1bc4fdde605018a39c4","src/container_attributes.rs":"9342a89e5e5f412159d1a1a88ae4ee0248180f30adc13e61ebf5f96b5f09877f","src/field_attributes.rs":"15093171d7f1e30c2b2523788e54c69c816029b310133ceb1ac811d1f11a76d5","src/lib.rs":"701a9c66e25c3a2151eab9159f6c2fa64ea910ceb6feb225dc97c2734254c265"},"package":"53e0efad4403bfc52dc201159c4b842a246a14b98c64b55dfd0f2d89729dfeb8"}
|
||||
{"files":{"Cargo.toml":"196abb2a00d047e16ba1337cd492b4115ebcee941283acde6fb80dc025176dce","LICENSE-APACHE":"a60eea817514531668d7e00765731449fe14d059d3249e0bc93b36de45f759f2","LICENSE-MIT":"15656cc11a8331f28c0986b8ab97220d3e76f98e60ed388b5ffad37dfac4710c","README.md":"7059db284b2016ba7355c63a2b14eb732c7b8952286ff1bc4fdde605018a39c4","src/container_attributes.rs":"9342a89e5e5f412159d1a1a88ae4ee0248180f30adc13e61ebf5f96b5f09877f","src/field_attributes.rs":"15093171d7f1e30c2b2523788e54c69c816029b310133ceb1ac811d1f11a76d5","src/lib.rs":"701a9c66e25c3a2151eab9159f6c2fa64ea910ceb6feb225dc97c2734254c265"},"package":"67e77553c4162a157adbf834ebae5b415acbecbeafc7a74b0e886657506a7611"}
|
||||
3
third_party/rust/derive_arbitrary/Cargo.toml
vendored
3
third_party/rust/derive_arbitrary/Cargo.toml
vendored
@@ -13,7 +13,7 @@
|
||||
edition = "2021"
|
||||
rust-version = "1.63.0"
|
||||
name = "derive_arbitrary"
|
||||
version = "1.3.1"
|
||||
version = "1.3.2"
|
||||
authors = [
|
||||
"The Rust-Fuzz Project Developers",
|
||||
"Nick Fitzgerald <fitzgen@gmail.com>",
|
||||
@@ -33,7 +33,6 @@ keywords = [
|
||||
categories = ["development-tools::testing"]
|
||||
license = "MIT/Apache-2.0"
|
||||
repository = "https://github.com/rust-fuzz/arbitrary"
|
||||
resolver = "1"
|
||||
|
||||
[lib]
|
||||
proc_macro = true
|
||||
|
||||
2
third_party/rust/flate2/.cargo-checksum.json
vendored
2
third_party/rust/flate2/.cargo-checksum.json
vendored
File diff suppressed because one or more lines are too long
73
third_party/rust/flate2/Cargo.lock
generated
vendored
73
third_party/rust/flate2/Cargo.lock
generated
vendored
@@ -10,9 +10,9 @@ checksum = "f26201604c87b1e01bd3d98f8d5d9a8fcbb815e8cedb41ffccbeb4bf593a35fe"
|
||||
|
||||
[[package]]
|
||||
name = "cc"
|
||||
version = "1.0.73"
|
||||
version = "1.0.88"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2fff2a6927b3bb87f9595d67196a70493f627687a71d87a0d692242c33f58c11"
|
||||
checksum = "02f341c093d19155a6e41631ce5971aac4e9a868262212153124c15fa22d1cdc"
|
||||
|
||||
[[package]]
|
||||
name = "cfg-if"
|
||||
@@ -22,38 +22,39 @@ checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd"
|
||||
|
||||
[[package]]
|
||||
name = "cloudflare-zlib-sys"
|
||||
version = "0.3.0"
|
||||
version = "0.3.3"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "2040b6d1edfee6d75f172d81e2d2a7807534f3f294ce18184c70e7bb0105cd6f"
|
||||
checksum = "c3185ff8c69c53ab346d5ac89f418e194b997d48393cae321cb611dd05f83c90"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "cmake"
|
||||
version = "0.1.48"
|
||||
version = "0.1.50"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e8ad8cef104ac57b68b89df3208164d228503abbdce70f6880ffa3d970e7443a"
|
||||
checksum = "a31c789563b815f77f4250caee12365734369f942439b7defd71e18a48197130"
|
||||
dependencies = [
|
||||
"cc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "crc32fast"
|
||||
version = "1.3.2"
|
||||
version = "1.4.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "b540bd8bc810d3885c6ea91e2018302f68baba2129ab3e88f32389ee9370880d"
|
||||
checksum = "b3855a8a784b474f333699ef2bbca9db2c4a1f6d9088a90a2d25b1eb53111eaa"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "flate2"
|
||||
version = "1.0.26"
|
||||
version = "1.0.30"
|
||||
dependencies = [
|
||||
"cloudflare-zlib-sys",
|
||||
"crc32fast",
|
||||
"libz-ng-sys",
|
||||
"libz-rs-sys",
|
||||
"libz-sys",
|
||||
"miniz_oxide",
|
||||
"quickcheck",
|
||||
@@ -62,9 +63,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "getrandom"
|
||||
version = "0.2.6"
|
||||
version = "0.2.12"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9be70c98951c83b8d2f8f60d7065fa6d5146873094452a1008da8c2f1e4205ad"
|
||||
checksum = "190092ea657667030ac6a35e305e62fc4dd69fd98ac98631e5d3a2b1575a12b5"
|
||||
dependencies = [
|
||||
"cfg-if",
|
||||
"libc",
|
||||
@@ -73,25 +74,35 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "libc"
|
||||
version = "0.2.124"
|
||||
version = "0.2.153"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "21a41fed9d98f27ab1c6d161da622a4fa35e8a54a8adc24bbf3ddd0ef70b0e50"
|
||||
checksum = "9c198f91728a82281a64e1f4f9eeb25d82cb32a5de251c6bd1b5154d63a8e7bd"
|
||||
|
||||
[[package]]
|
||||
name = "libz-ng-sys"
|
||||
version = "1.1.8"
|
||||
version = "1.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "4399ae96a9966bf581e726de86969f803a81b7ce795fcd5480e640589457e0f2"
|
||||
checksum = "c6409efc61b12687963e602df8ecf70e8ddacf95bc6576bcf16e3ac6328083c5"
|
||||
dependencies = [
|
||||
"cmake",
|
||||
"libc",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.8"
|
||||
name = "libz-rs-sys"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "9702761c3935f8cc2f101793272e202c72b99da8f4224a19ddcf1279a6450bbf"
|
||||
checksum = "cd3c4423250be0f3892a490cd6ddc490f5153335678776f08fca307f5fea3b28"
|
||||
dependencies = [
|
||||
"libc",
|
||||
"zlib-rs",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "libz-sys"
|
||||
version = "1.1.15"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "037731f5d3aaa87a5675e895b63ddff1a87624bc29f77004ea829809654e48f6"
|
||||
dependencies = [
|
||||
"cc",
|
||||
"cmake",
|
||||
@@ -102,24 +113,24 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "miniz_oxide"
|
||||
version = "0.7.1"
|
||||
version = "0.7.2"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "e7810e0be55b428ada41041c41f32c9f1a42817901b4ccf45fa3d4b6561e74c7"
|
||||
checksum = "9d811f3e15f28568be3407c8e7fdb6514c1cda3cb30683f15b6a1a1dc4ea14a7"
|
||||
dependencies = [
|
||||
"adler",
|
||||
]
|
||||
|
||||
[[package]]
|
||||
name = "pkg-config"
|
||||
version = "0.3.25"
|
||||
version = "0.3.30"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "1df8c4ec4b0627e53bdf214615ad287367e482558cf84b109250b37464dc03ae"
|
||||
checksum = "d231b230927b5e4ad203db57bbcbee2802f6bce620b1e4a9024a07d94e2907ec"
|
||||
|
||||
[[package]]
|
||||
name = "ppv-lite86"
|
||||
version = "0.2.16"
|
||||
version = "0.2.17"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "eb9f9e6e233e5c4a35559a617bf40a4ec447db2e84c20b55a6f83167b7e57872"
|
||||
checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de"
|
||||
|
||||
[[package]]
|
||||
name = "quickcheck"
|
||||
@@ -153,9 +164,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "rand_core"
|
||||
version = "0.6.3"
|
||||
version = "0.6.4"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "d34f1408f55294453790c48b2f1ebbb1c5b4b7563eb1f418bcfcfdbb06ebb4e7"
|
||||
checksum = "ec0be4795e2f6a28069bec0b5ff3e2ac9bafc99e6a9a7dc3547996c5c816922c"
|
||||
dependencies = [
|
||||
"getrandom",
|
||||
]
|
||||
@@ -168,6 +179,12 @@ checksum = "accd4ea62f7bb7a82fe23066fb0957d48ef677f6eeb8215f372f52e48bb32426"
|
||||
|
||||
[[package]]
|
||||
name = "wasi"
|
||||
version = "0.10.2+wasi-snapshot-preview1"
|
||||
version = "0.11.0+wasi-snapshot-preview1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "fd6fbd9a79829dd1ad0cc20627bf1ed606756a7f77edff7b66b7064f9cb327c6"
|
||||
checksum = "9c8d87e72b64a3b4db28d11ce29237c246188f4f51057d65a7eab63b7987e423"
|
||||
|
||||
[[package]]
|
||||
name = "zlib-rs"
|
||||
version = "0.1.1"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "c52105e2dc6760ec88755876659dc301b51f6728f3b7a1bbdeeb66c6af4d44a1"
|
||||
|
||||
23
third_party/rust/flate2/Cargo.toml
vendored
23
third_party/rust/flate2/Cargo.toml
vendored
@@ -12,7 +12,7 @@
|
||||
[package]
|
||||
edition = "2018"
|
||||
name = "flate2"
|
||||
version = "1.0.26"
|
||||
version = "1.0.30"
|
||||
authors = [
|
||||
"Alex Crichton <alex@alexcrichton.com>",
|
||||
"Josh Triplett <josh@joshtriplett.org>",
|
||||
@@ -57,6 +57,15 @@ version = "1.2.0"
|
||||
version = "1.1.8"
|
||||
optional = true
|
||||
|
||||
[dependencies.libz-rs-sys]
|
||||
version = "0.1.1"
|
||||
features = [
|
||||
"std",
|
||||
"rust-allocator",
|
||||
]
|
||||
optional = true
|
||||
default-features = false
|
||||
|
||||
[dependencies.libz-sys]
|
||||
version = "1.1.8"
|
||||
optional = true
|
||||
@@ -76,14 +85,18 @@ default-features = false
|
||||
version = "0.8"
|
||||
|
||||
[features]
|
||||
any_zlib = []
|
||||
any_impl = []
|
||||
any_zlib = ["any_impl"]
|
||||
cloudflare_zlib = [
|
||||
"any_zlib",
|
||||
"cloudflare-zlib-sys",
|
||||
]
|
||||
default = ["rust_backend"]
|
||||
miniz-sys = ["rust_backend"]
|
||||
rust_backend = ["miniz_oxide"]
|
||||
rust_backend = [
|
||||
"miniz_oxide",
|
||||
"any_impl",
|
||||
]
|
||||
zlib = [
|
||||
"any_zlib",
|
||||
"libz-sys",
|
||||
@@ -100,6 +113,10 @@ zlib-ng-compat = [
|
||||
"zlib",
|
||||
"libz-sys/zlib-ng",
|
||||
]
|
||||
zlib-rs = [
|
||||
"any_zlib",
|
||||
"libz-rs-sys",
|
||||
]
|
||||
|
||||
[target."cfg(all(target_arch = \"wasm32\", not(target_os = \"emscripten\")))".dependencies.miniz_oxide]
|
||||
version = "0.7.1"
|
||||
|
||||
21
third_party/rust/flate2/MAINTENANCE.md
vendored
Normal file
21
third_party/rust/flate2/MAINTENANCE.md
vendored
Normal file
@@ -0,0 +1,21 @@
|
||||
This document explains how to perform the project's maintenance tasks.
|
||||
|
||||
### Creating a new release
|
||||
|
||||
#### Artifacts
|
||||
|
||||
* a tag of the version number
|
||||
* a new [crate version](https://crates.io/crates/flate2/versions)
|
||||
|
||||
#### Process
|
||||
|
||||
To generate all the artifacts above, one proceeds as follows:
|
||||
|
||||
1. `git checkout -b release-<next-version>` - move to a branch to prepare making changes to the repository. *Changes cannot be made to `main` as it is protected.*
|
||||
2. Edit `Cargo.toml` to the next package version.
|
||||
3. `gh pr create` to create a new PR for the current branch and **get it merged**.
|
||||
4. `cargo publish` to create a new release on `crates.io`.
|
||||
5. `git tag <next-version>` to remember the commit.
|
||||
6. `git push --tags` to push the new tag.
|
||||
7. Go to the newly created release page on GitHub and edit it by pressing the "Generate Release Notes" and the `@` button. Save the release.
|
||||
|
||||
2
third_party/rust/flate2/README.md
vendored
2
third_party/rust/flate2/README.md
vendored
@@ -6,7 +6,7 @@
|
||||
A streaming compression/decompression library DEFLATE-based streams in Rust.
|
||||
|
||||
This crate by default uses the `miniz_oxide` crate, a port of `miniz.c` to pure
|
||||
Rust. This crate also supports other [backends](#Backends), such as the widely
|
||||
Rust. This crate also supports other [backends](#backends), such as the widely
|
||||
available zlib library or the high-performance zlib-ng library.
|
||||
|
||||
Supported formats:
|
||||
|
||||
57
third_party/rust/flate2/src/deflate/bufread.rs
vendored
57
third_party/rust/flate2/src/deflate/bufread.rs
vendored
@@ -7,9 +7,10 @@ use crate::{Compress, Decompress};
|
||||
|
||||
/// A DEFLATE encoder, or compressor.
|
||||
///
|
||||
/// This structure consumes a [`BufRead`] interface, reading uncompressed data
|
||||
/// from the underlying reader, and emitting compressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
|
||||
///
|
||||
/// # Examples
|
||||
@@ -123,9 +124,10 @@ impl<W: BufRead + Write> Write for DeflateEncoder<W> {
|
||||
|
||||
/// A DEFLATE decoder, or decompressor.
|
||||
///
|
||||
/// This structure consumes a [`BufRead`] interface, reading compressed data
|
||||
/// from the underlying reader, and emitting uncompressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
|
||||
///
|
||||
/// # Examples
|
||||
@@ -241,3 +243,50 @@ impl<W: BufRead + Write> Write for DeflateDecoder<W> {
|
||||
self.get_mut().flush()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::bufread::DeflateDecoder;
|
||||
use crate::deflate::write;
|
||||
use crate::Compression;
|
||||
use std::io::{Read, Write};
|
||||
|
||||
// DeflateDecoder consumes one deflate archive and then returns 0 for subsequent reads, allowing any
|
||||
// additional data to be consumed by the caller.
|
||||
#[test]
|
||||
fn decode_extra_data() {
|
||||
let expected = "Hello World";
|
||||
|
||||
let compressed = {
|
||||
let mut e = write::DeflateEncoder::new(Vec::new(), Compression::default());
|
||||
e.write(expected.as_ref()).unwrap();
|
||||
let mut b = e.finish().unwrap();
|
||||
b.push(b'x');
|
||||
b
|
||||
};
|
||||
|
||||
let mut output = Vec::new();
|
||||
let mut decoder = DeflateDecoder::new(compressed.as_slice());
|
||||
let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
|
||||
assert_eq!(decoded_bytes, output.len());
|
||||
let actual = std::str::from_utf8(&output).expect("String parsing error");
|
||||
assert_eq!(
|
||||
actual, expected,
|
||||
"after decompression we obtain the original input"
|
||||
);
|
||||
|
||||
output.clear();
|
||||
assert_eq!(
|
||||
decoder.read(&mut output).unwrap(),
|
||||
0,
|
||||
"subsequent read of decoder returns 0, but inner reader can return additional data"
|
||||
);
|
||||
let mut reader = decoder.into_inner();
|
||||
assert_eq!(
|
||||
reader.read_to_end(&mut output).unwrap(),
|
||||
1,
|
||||
"extra data is accessible in underlying buf-read"
|
||||
);
|
||||
assert_eq!(output, b"x");
|
||||
}
|
||||
}
|
||||
|
||||
14
third_party/rust/flate2/src/deflate/read.rs
vendored
14
third_party/rust/flate2/src/deflate/read.rs
vendored
@@ -6,8 +6,8 @@ use crate::bufreader::BufReader;
|
||||
|
||||
/// A DEFLATE encoder, or compressor.
|
||||
///
|
||||
/// This structure implements a [`Read`] interface and will read uncompressed
|
||||
/// data from an underlying stream and emit a stream of compressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// uncompressed data from the underlying [`Read`] and provides the compressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
///
|
||||
@@ -25,11 +25,11 @@ use crate::bufreader::BufReader;
|
||||
/// #
|
||||
/// // Return a vector containing the Deflate compressed version of hello world
|
||||
/// fn deflateencoder_read_hello_world() -> io::Result<Vec<u8>> {
|
||||
/// let mut ret_vec = [0;100];
|
||||
/// let mut ret_vec = Vec::new();
|
||||
/// let c = b"hello world";
|
||||
/// let mut deflater = DeflateEncoder::new(&c[..], Compression::fast());
|
||||
/// let count = deflater.read(&mut ret_vec)?;
|
||||
/// Ok(ret_vec[0..count].to_vec())
|
||||
/// deflater.read_to_end(&mut ret_vec)?;
|
||||
/// Ok(ret_vec)
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
@@ -120,8 +120,8 @@ impl<W: Read + Write> Write for DeflateEncoder<W> {
|
||||
|
||||
/// A DEFLATE decoder, or decompressor.
|
||||
///
|
||||
/// This structure implements a [`Read`] interface and takes a stream of
|
||||
/// compressed data as input, providing the decompressed data when read from.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// compressed data from the underlying [`Read`] and provides the uncompressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
///
|
||||
|
||||
40
third_party/rust/flate2/src/deflate/write.rs
vendored
40
third_party/rust/flate2/src/deflate/write.rs
vendored
@@ -320,3 +320,43 @@ impl<W: Read + Write> Read for DeflateDecoder<W> {
|
||||
self.inner.get_mut().read(buf)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::Compression;
|
||||
|
||||
const STR: &str = "Hello World Hello World Hello World Hello World Hello World \
|
||||
Hello World Hello World Hello World Hello World Hello World \
|
||||
Hello World Hello World Hello World Hello World Hello World \
|
||||
Hello World Hello World Hello World Hello World Hello World \
|
||||
Hello World Hello World Hello World Hello World Hello World";
|
||||
|
||||
// DeflateDecoder consumes one zlib archive and then returns 0 for subsequent writes, allowing any
|
||||
// additional data to be consumed by the caller.
|
||||
#[test]
|
||||
fn decode_extra_data() {
|
||||
let compressed = {
|
||||
let mut e = DeflateEncoder::new(Vec::new(), Compression::default());
|
||||
e.write(STR.as_ref()).unwrap();
|
||||
let mut b = e.finish().unwrap();
|
||||
b.push(b'x');
|
||||
b
|
||||
};
|
||||
|
||||
let mut writer = Vec::new();
|
||||
let mut decoder = DeflateDecoder::new(writer);
|
||||
let mut consumed_bytes = 0;
|
||||
loop {
|
||||
let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
consumed_bytes += n;
|
||||
}
|
||||
writer = decoder.finish().unwrap();
|
||||
let actual = String::from_utf8(writer).expect("String parsing error");
|
||||
assert_eq!(actual, STR);
|
||||
assert_eq!(&compressed[consumed_bytes..], b"x");
|
||||
}
|
||||
}
|
||||
|
||||
291
third_party/rust/flate2/src/ffi/c.rs
vendored
291
third_party/rust/flate2/src/ffi/c.rs
vendored
@@ -1,15 +1,12 @@
|
||||
//! Implementation for C backends.
|
||||
use std::alloc::{self, Layout};
|
||||
use std::cmp;
|
||||
use std::convert::TryFrom;
|
||||
use std::fmt;
|
||||
use std::marker;
|
||||
use std::ops::{Deref, DerefMut};
|
||||
use std::os::raw::{c_int, c_uint, c_void};
|
||||
use std::os::raw::{c_int, c_uint};
|
||||
use std::ptr;
|
||||
|
||||
use super::*;
|
||||
use crate::mem::{self, FlushDecompress, Status};
|
||||
use crate::mem;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct ErrorMessage(Option<&'static str>);
|
||||
@@ -21,7 +18,10 @@ impl ErrorMessage {
|
||||
}
|
||||
|
||||
pub struct StreamWrapper {
|
||||
pub inner: Box<mz_stream>,
|
||||
// SAFETY: The field `inner` must always be accessed as a raw pointer,
|
||||
// since it points to a cyclic structure, and it must never be copied
|
||||
// by Rust.
|
||||
pub inner: *mut mz_stream,
|
||||
}
|
||||
|
||||
impl fmt::Debug for StreamWrapper {
|
||||
@@ -32,8 +32,12 @@ impl fmt::Debug for StreamWrapper {
|
||||
|
||||
impl Default for StreamWrapper {
|
||||
fn default() -> StreamWrapper {
|
||||
// SAFETY: The field `state` will be initialized across the FFI to
|
||||
// point to the opaque type `mz_internal_state`, which will contain a copy
|
||||
// of `inner`. This cyclic structure breaks the uniqueness invariant of
|
||||
// &mut mz_stream, so we must use a raw pointer instead of Box<mz_stream>.
|
||||
StreamWrapper {
|
||||
inner: Box::new(mz_stream {
|
||||
inner: Box::into_raw(Box::new(mz_stream {
|
||||
next_in: ptr::null_mut(),
|
||||
avail_in: 0,
|
||||
total_in: 0,
|
||||
@@ -46,81 +50,100 @@ impl Default for StreamWrapper {
|
||||
reserved: 0,
|
||||
opaque: ptr::null_mut(),
|
||||
state: ptr::null_mut(),
|
||||
#[cfg(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys")))]
|
||||
zalloc,
|
||||
#[cfg(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys")))]
|
||||
zfree,
|
||||
#[cfg(not(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys"))))]
|
||||
zalloc: Some(zalloc),
|
||||
#[cfg(not(all(feature = "any_zlib", not(feature = "cloudflare-zlib-sys"))))]
|
||||
zfree: Some(zfree),
|
||||
}),
|
||||
#[cfg(all(
|
||||
feature = "any_zlib",
|
||||
not(any(feature = "cloudflare-zlib-sys", feature = "libz-rs-sys"))
|
||||
))]
|
||||
zalloc: allocator::zalloc,
|
||||
#[cfg(all(
|
||||
feature = "any_zlib",
|
||||
not(any(feature = "cloudflare-zlib-sys", feature = "libz-rs-sys"))
|
||||
))]
|
||||
zfree: allocator::zfree,
|
||||
|
||||
#[cfg(all(feature = "any_zlib", feature = "cloudflare-zlib-sys"))]
|
||||
zalloc: Some(allocator::zalloc),
|
||||
#[cfg(all(feature = "any_zlib", feature = "cloudflare-zlib-sys"))]
|
||||
zfree: Some(allocator::zfree),
|
||||
|
||||
// for zlib-rs, it is most efficient to have it provide the allocator.
|
||||
// The libz-rs-sys dependency is configured to use the rust system allocator
|
||||
#[cfg(all(feature = "any_zlib", feature = "libz-rs-sys"))]
|
||||
zalloc: None,
|
||||
#[cfg(all(feature = "any_zlib", feature = "libz-rs-sys"))]
|
||||
zfree: None,
|
||||
})),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
const ALIGN: usize = std::mem::align_of::<usize>();
|
||||
|
||||
fn align_up(size: usize, align: usize) -> usize {
|
||||
(size + align - 1) & !(align - 1)
|
||||
impl Drop for StreamWrapper {
|
||||
fn drop(&mut self) {
|
||||
// SAFETY: At this point, every other allocation for struct has been freed by
|
||||
// `inflateEnd` or `deflateEnd`, and no copies of `inner` are retained by `C`,
|
||||
// so it is safe to drop the struct as long as the user respects the invariant that
|
||||
// `inner` must never be copied by Rust.
|
||||
drop(unsafe { Box::from_raw(self.inner) });
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn zalloc(_ptr: *mut c_void, items: AllocSize, item_size: AllocSize) -> *mut c_void {
|
||||
// We need to multiply `items` and `item_size` to get the actual desired
|
||||
// allocation size. Since `zfree` doesn't receive a size argument we
|
||||
// also need to allocate space for a `usize` as a header so we can store
|
||||
// how large the allocation is to deallocate later.
|
||||
let size = match items
|
||||
.checked_mul(item_size)
|
||||
.and_then(|i| usize::try_from(i).ok())
|
||||
.map(|size| align_up(size, ALIGN))
|
||||
.and_then(|i| i.checked_add(std::mem::size_of::<usize>()))
|
||||
{
|
||||
Some(i) => i,
|
||||
None => return ptr::null_mut(),
|
||||
};
|
||||
#[cfg(all(feature = "any_zlib", not(feature = "libz-rs-sys")))]
|
||||
mod allocator {
|
||||
use super::*;
|
||||
|
||||
// Make sure the `size` isn't too big to fail `Layout`'s restrictions
|
||||
let layout = match Layout::from_size_align(size, ALIGN) {
|
||||
Ok(layout) => layout,
|
||||
Err(_) => return ptr::null_mut(),
|
||||
};
|
||||
use std::alloc::{self, Layout};
|
||||
use std::convert::TryFrom;
|
||||
use std::os::raw::c_void;
|
||||
|
||||
unsafe {
|
||||
// Allocate the data, and if successful store the size we allocated
|
||||
// at the beginning and then return an offset pointer.
|
||||
let ptr = alloc::alloc(layout) as *mut usize;
|
||||
if ptr.is_null() {
|
||||
return ptr as *mut c_void;
|
||||
const ALIGN: usize = std::mem::align_of::<usize>();
|
||||
|
||||
fn align_up(size: usize, align: usize) -> usize {
|
||||
(size + align - 1) & !(align - 1)
|
||||
}
|
||||
|
||||
pub extern "C" fn zalloc(_ptr: *mut c_void, items: uInt, item_size: uInt) -> *mut c_void {
|
||||
// We need to multiply `items` and `item_size` to get the actual desired
|
||||
// allocation size. Since `zfree` doesn't receive a size argument we
|
||||
// also need to allocate space for a `usize` as a header so we can store
|
||||
// how large the allocation is to deallocate later.
|
||||
let size = match items
|
||||
.checked_mul(item_size)
|
||||
.and_then(|i| usize::try_from(i).ok())
|
||||
.map(|size| align_up(size, ALIGN))
|
||||
.and_then(|i| i.checked_add(std::mem::size_of::<usize>()))
|
||||
{
|
||||
Some(i) => i,
|
||||
None => return ptr::null_mut(),
|
||||
};
|
||||
|
||||
// Make sure the `size` isn't too big to fail `Layout`'s restrictions
|
||||
let layout = match Layout::from_size_align(size, ALIGN) {
|
||||
Ok(layout) => layout,
|
||||
Err(_) => return ptr::null_mut(),
|
||||
};
|
||||
|
||||
unsafe {
|
||||
// Allocate the data, and if successful store the size we allocated
|
||||
// at the beginning and then return an offset pointer.
|
||||
let ptr = alloc::alloc(layout) as *mut usize;
|
||||
if ptr.is_null() {
|
||||
return ptr as *mut c_void;
|
||||
}
|
||||
*ptr = size;
|
||||
ptr.add(1) as *mut c_void
|
||||
}
|
||||
*ptr = size;
|
||||
ptr.add(1) as *mut c_void
|
||||
}
|
||||
}
|
||||
|
||||
extern "C" fn zfree(_ptr: *mut c_void, address: *mut c_void) {
|
||||
unsafe {
|
||||
// Move our address being freed back one pointer, read the size we
|
||||
// stored in `zalloc`, and then free it using the standard Rust
|
||||
// allocator.
|
||||
let ptr = (address as *mut usize).offset(-1);
|
||||
let size = *ptr;
|
||||
let layout = Layout::from_size_align_unchecked(size, ALIGN);
|
||||
alloc::dealloc(ptr as *mut u8, layout)
|
||||
}
|
||||
}
|
||||
|
||||
impl Deref for StreamWrapper {
|
||||
type Target = mz_stream;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&*self.inner
|
||||
}
|
||||
}
|
||||
|
||||
impl DerefMut for StreamWrapper {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut *self.inner
|
||||
pub extern "C" fn zfree(_ptr: *mut c_void, address: *mut c_void) {
|
||||
unsafe {
|
||||
// Move our address being freed back one pointer, read the size we
|
||||
// stored in `zalloc`, and then free it using the standard Rust
|
||||
// allocator.
|
||||
let ptr = (address as *mut usize).offset(-1);
|
||||
let size = *ptr;
|
||||
let layout = Layout::from_size_align_unchecked(size, ALIGN);
|
||||
alloc::dealloc(ptr as *mut u8, layout)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -148,7 +171,10 @@ pub struct Stream<D: Direction> {
|
||||
|
||||
impl<D: Direction> Stream<D> {
|
||||
pub fn msg(&self) -> ErrorMessage {
|
||||
let msg = self.stream_wrapper.msg;
|
||||
// SAFETY: The field `inner` must always be accessed as a raw pointer,
|
||||
// since it points to a cyclic structure. No copies of `inner` can be
|
||||
// retained for longer than the lifetime of `self`.
|
||||
let msg = unsafe { (*self.stream_wrapper.inner).msg };
|
||||
ErrorMessage(if msg.is_null() {
|
||||
None
|
||||
} else {
|
||||
@@ -161,7 +187,7 @@ impl<D: Direction> Stream<D> {
|
||||
impl<D: Direction> Drop for Stream<D> {
|
||||
fn drop(&mut self) {
|
||||
unsafe {
|
||||
let _ = D::destroy(&mut *self.stream_wrapper);
|
||||
let _ = D::destroy(self.stream_wrapper.inner);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -185,9 +211,9 @@ pub struct Inflate {
|
||||
impl InflateBackend for Inflate {
|
||||
fn make(zlib_header: bool, window_bits: u8) -> Self {
|
||||
unsafe {
|
||||
let mut state = StreamWrapper::default();
|
||||
let state = StreamWrapper::default();
|
||||
let ret = mz_inflateInit2(
|
||||
&mut *state,
|
||||
state.inner,
|
||||
if zlib_header {
|
||||
window_bits as c_int
|
||||
} else {
|
||||
@@ -212,27 +238,38 @@ impl InflateBackend for Inflate {
|
||||
output: &mut [u8],
|
||||
flush: FlushDecompress,
|
||||
) -> Result<Status, DecompressError> {
|
||||
let raw = &mut *self.inner.stream_wrapper;
|
||||
raw.msg = ptr::null_mut();
|
||||
raw.next_in = input.as_ptr() as *mut u8;
|
||||
raw.avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint;
|
||||
raw.next_out = output.as_mut_ptr();
|
||||
raw.avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint;
|
||||
let raw = self.inner.stream_wrapper.inner;
|
||||
// SAFETY: The field `inner` must always be accessed as a raw pointer,
|
||||
// since it points to a cyclic structure. No copies of `inner` can be
|
||||
// retained for longer than the lifetime of `self`.
|
||||
unsafe {
|
||||
(*raw).msg = ptr::null_mut();
|
||||
(*raw).next_in = input.as_ptr() as *mut u8;
|
||||
(*raw).avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint;
|
||||
(*raw).next_out = output.as_mut_ptr();
|
||||
(*raw).avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint;
|
||||
|
||||
let rc = unsafe { mz_inflate(raw, flush as c_int) };
|
||||
let rc = mz_inflate(raw, flush as c_int);
|
||||
|
||||
// Unfortunately the total counters provided by zlib might be only
|
||||
// 32 bits wide and overflow while processing large amounts of data.
|
||||
self.inner.total_in += (raw.next_in as usize - input.as_ptr() as usize) as u64;
|
||||
self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64;
|
||||
// Unfortunately the total counters provided by zlib might be only
|
||||
// 32 bits wide and overflow while processing large amounts of data.
|
||||
self.inner.total_in += ((*raw).next_in as usize - input.as_ptr() as usize) as u64;
|
||||
self.inner.total_out += ((*raw).next_out as usize - output.as_ptr() as usize) as u64;
|
||||
|
||||
match rc {
|
||||
MZ_DATA_ERROR | MZ_STREAM_ERROR => mem::decompress_failed(self.inner.msg()),
|
||||
MZ_OK => Ok(Status::Ok),
|
||||
MZ_BUF_ERROR => Ok(Status::BufError),
|
||||
MZ_STREAM_END => Ok(Status::StreamEnd),
|
||||
MZ_NEED_DICT => mem::decompress_need_dict(raw.adler as u32),
|
||||
c => panic!("unknown return code: {}", c),
|
||||
// reset these pointers so we don't accidentally read them later
|
||||
(*raw).next_in = ptr::null_mut();
|
||||
(*raw).avail_in = 0;
|
||||
(*raw).next_out = ptr::null_mut();
|
||||
(*raw).avail_out = 0;
|
||||
|
||||
match rc {
|
||||
MZ_DATA_ERROR | MZ_STREAM_ERROR => mem::decompress_failed(self.inner.msg()),
|
||||
MZ_OK => Ok(Status::Ok),
|
||||
MZ_BUF_ERROR => Ok(Status::BufError),
|
||||
MZ_STREAM_END => Ok(Status::StreamEnd),
|
||||
MZ_NEED_DICT => mem::decompress_need_dict((*raw).adler as u32),
|
||||
c => panic!("unknown return code: {}", c),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -243,7 +280,7 @@ impl InflateBackend for Inflate {
|
||||
-MZ_DEFAULT_WINDOW_BITS
|
||||
};
|
||||
unsafe {
|
||||
inflateReset2(&mut *self.inner.stream_wrapper, bits);
|
||||
inflateReset2(self.inner.stream_wrapper.inner, bits);
|
||||
}
|
||||
self.inner.total_out = 0;
|
||||
self.inner.total_in = 0;
|
||||
@@ -270,9 +307,9 @@ pub struct Deflate {
|
||||
impl DeflateBackend for Deflate {
|
||||
fn make(level: Compression, zlib_header: bool, window_bits: u8) -> Self {
|
||||
unsafe {
|
||||
let mut state = StreamWrapper::default();
|
||||
let state = StreamWrapper::default();
|
||||
let ret = mz_deflateInit2(
|
||||
&mut *state,
|
||||
state.inner,
|
||||
level.0 as c_int,
|
||||
MZ_DEFLATED,
|
||||
if zlib_header {
|
||||
@@ -300,33 +337,44 @@ impl DeflateBackend for Deflate {
|
||||
output: &mut [u8],
|
||||
flush: FlushCompress,
|
||||
) -> Result<Status, CompressError> {
|
||||
let raw = &mut *self.inner.stream_wrapper;
|
||||
raw.msg = ptr::null_mut();
|
||||
raw.next_in = input.as_ptr() as *mut _;
|
||||
raw.avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint;
|
||||
raw.next_out = output.as_mut_ptr();
|
||||
raw.avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint;
|
||||
let raw = self.inner.stream_wrapper.inner;
|
||||
// SAFETY: The field `inner` must always be accessed as a raw pointer,
|
||||
// since it points to a cyclic structure. No copies of `inner` can be
|
||||
// retained for longer than the lifetime of `self`.
|
||||
unsafe {
|
||||
(*raw).msg = ptr::null_mut();
|
||||
(*raw).next_in = input.as_ptr() as *mut _;
|
||||
(*raw).avail_in = cmp::min(input.len(), c_uint::MAX as usize) as c_uint;
|
||||
(*raw).next_out = output.as_mut_ptr();
|
||||
(*raw).avail_out = cmp::min(output.len(), c_uint::MAX as usize) as c_uint;
|
||||
|
||||
let rc = unsafe { mz_deflate(raw, flush as c_int) };
|
||||
let rc = mz_deflate(raw, flush as c_int);
|
||||
|
||||
// Unfortunately the total counters provided by zlib might be only
|
||||
// 32 bits wide and overflow while processing large amounts of data.
|
||||
self.inner.total_in += (raw.next_in as usize - input.as_ptr() as usize) as u64;
|
||||
self.inner.total_out += (raw.next_out as usize - output.as_ptr() as usize) as u64;
|
||||
// Unfortunately the total counters provided by zlib might be only
|
||||
// 32 bits wide and overflow while processing large amounts of data.
|
||||
|
||||
match rc {
|
||||
MZ_OK => Ok(Status::Ok),
|
||||
MZ_BUF_ERROR => Ok(Status::BufError),
|
||||
MZ_STREAM_END => Ok(Status::StreamEnd),
|
||||
MZ_STREAM_ERROR => mem::compress_failed(self.inner.msg()),
|
||||
c => panic!("unknown return code: {}", c),
|
||||
self.inner.total_in += ((*raw).next_in as usize - input.as_ptr() as usize) as u64;
|
||||
self.inner.total_out += ((*raw).next_out as usize - output.as_ptr() as usize) as u64;
|
||||
// reset these pointers so we don't accidentally read them later
|
||||
(*raw).next_in = ptr::null_mut();
|
||||
(*raw).avail_in = 0;
|
||||
(*raw).next_out = ptr::null_mut();
|
||||
(*raw).avail_out = 0;
|
||||
|
||||
match rc {
|
||||
MZ_OK => Ok(Status::Ok),
|
||||
MZ_BUF_ERROR => Ok(Status::BufError),
|
||||
MZ_STREAM_END => Ok(Status::StreamEnd),
|
||||
MZ_STREAM_ERROR => mem::compress_failed(self.inner.msg()),
|
||||
c => panic!("unknown return code: {}", c),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn reset(&mut self) {
|
||||
self.inner.total_in = 0;
|
||||
self.inner.total_out = 0;
|
||||
let rc = unsafe { mz_deflateReset(&mut *self.inner.stream_wrapper) };
|
||||
let rc = unsafe { mz_deflateReset(self.inner.stream_wrapper.inner) };
|
||||
assert_eq!(rc, MZ_OK);
|
||||
}
|
||||
}
|
||||
@@ -347,6 +395,7 @@ pub use self::c_backend::*;
|
||||
|
||||
/// For backwards compatibility, we provide symbols as `mz_` to mimic the miniz API
|
||||
#[allow(bad_style)]
|
||||
#[allow(unused_imports)]
|
||||
mod c_backend {
|
||||
use std::mem;
|
||||
use std::os::raw::{c_char, c_int};
|
||||
@@ -354,10 +403,17 @@ mod c_backend {
|
||||
#[cfg(feature = "zlib-ng")]
|
||||
use libz_ng_sys as libz;
|
||||
|
||||
#[cfg(all(not(feature = "zlib-ng"), feature = "zlib-rs"))]
|
||||
use libz_rs_sys as libz;
|
||||
|
||||
#[cfg(all(not(feature = "zlib-ng"), feature = "cloudflare_zlib"))]
|
||||
use cloudflare_zlib_sys as libz;
|
||||
|
||||
#[cfg(all(not(feature = "cloudflare_zlib"), not(feature = "zlib-ng")))]
|
||||
#[cfg(all(
|
||||
not(feature = "cloudflare_zlib"),
|
||||
not(feature = "zlib-ng"),
|
||||
not(feature = "zlib-rs")
|
||||
))]
|
||||
use libz_sys as libz;
|
||||
|
||||
pub use libz::deflate as mz_deflate;
|
||||
@@ -382,13 +438,14 @@ mod c_backend {
|
||||
pub use libz::Z_STREAM_END as MZ_STREAM_END;
|
||||
pub use libz::Z_STREAM_ERROR as MZ_STREAM_ERROR;
|
||||
pub use libz::Z_SYNC_FLUSH as MZ_SYNC_FLUSH;
|
||||
pub type AllocSize = libz::uInt;
|
||||
|
||||
pub const MZ_DEFAULT_WINDOW_BITS: c_int = 15;
|
||||
|
||||
#[cfg(feature = "zlib-ng")]
|
||||
const ZLIB_VERSION: &'static str = "2.1.0.devel\0";
|
||||
#[cfg(not(feature = "zlib-ng"))]
|
||||
#[cfg(all(not(feature = "zlib-ng"), feature = "zlib-rs"))]
|
||||
const ZLIB_VERSION: &'static str = "0.1.0\0";
|
||||
#[cfg(not(any(feature = "zlib-ng", feature = "zlib-rs")))]
|
||||
const ZLIB_VERSION: &'static str = "1.2.8\0";
|
||||
|
||||
pub unsafe extern "C" fn mz_deflateInit2(
|
||||
|
||||
4
third_party/rust/flate2/src/ffi/mod.rs
vendored
4
third_party/rust/flate2/src/ffi/mod.rs
vendored
@@ -40,9 +40,9 @@ mod c;
|
||||
#[cfg(feature = "any_zlib")]
|
||||
pub use self::c::*;
|
||||
|
||||
#[cfg(not(feature = "any_zlib"))]
|
||||
#[cfg(all(not(feature = "any_zlib"), feature = "miniz_oxide"))]
|
||||
mod rust;
|
||||
#[cfg(not(feature = "any_zlib"))]
|
||||
#[cfg(all(not(feature = "any_zlib"), feature = "miniz_oxide"))]
|
||||
pub use self::rust::*;
|
||||
|
||||
impl std::fmt::Debug for ErrorMessage {
|
||||
|
||||
595
third_party/rust/flate2/src/gz/bufread.rs
vendored
595
third_party/rust/flate2/src/gz/bufread.rs
vendored
@@ -3,9 +3,8 @@ use std::io;
|
||||
use std::io::prelude::*;
|
||||
use std::mem;
|
||||
|
||||
use super::{GzBuilder, GzHeader};
|
||||
use super::{FCOMMENT, FEXTRA, FHCRC, FNAME};
|
||||
use crate::crc::{Crc, CrcReader};
|
||||
use super::{corrupt, read_into, GzBuilder, GzHeader, GzHeaderParser};
|
||||
use crate::crc::CrcReader;
|
||||
use crate::deflate;
|
||||
use crate::Compression;
|
||||
|
||||
@@ -18,118 +17,12 @@ fn copy(into: &mut [u8], from: &[u8], pos: &mut usize) -> usize {
|
||||
min
|
||||
}
|
||||
|
||||
pub(crate) fn corrupt() -> io::Error {
|
||||
io::Error::new(
|
||||
io::ErrorKind::InvalidInput,
|
||||
"corrupt gzip stream does not have a matching checksum",
|
||||
)
|
||||
}
|
||||
|
||||
fn bad_header() -> io::Error {
|
||||
io::Error::new(io::ErrorKind::InvalidInput, "invalid gzip header")
|
||||
}
|
||||
|
||||
fn read_le_u16<R: Read>(r: &mut Buffer<R>) -> io::Result<u16> {
|
||||
let mut b = [0; 2];
|
||||
r.read_and_forget(&mut b)?;
|
||||
Ok((b[0] as u16) | ((b[1] as u16) << 8))
|
||||
}
|
||||
|
||||
fn read_gz_header_part<'a, R: Read>(r: &'a mut Buffer<'a, R>) -> io::Result<()> {
|
||||
loop {
|
||||
match r.part.state {
|
||||
GzHeaderParsingState::Start => {
|
||||
let mut header = [0; 10];
|
||||
r.read_and_forget(&mut header)?;
|
||||
|
||||
if header[0] != 0x1f || header[1] != 0x8b {
|
||||
return Err(bad_header());
|
||||
}
|
||||
if header[2] != 8 {
|
||||
return Err(bad_header());
|
||||
}
|
||||
|
||||
r.part.flg = header[3];
|
||||
r.part.header.mtime = ((header[4] as u32) << 0)
|
||||
| ((header[5] as u32) << 8)
|
||||
| ((header[6] as u32) << 16)
|
||||
| ((header[7] as u32) << 24);
|
||||
let _xfl = header[8];
|
||||
r.part.header.operating_system = header[9];
|
||||
r.part.state = GzHeaderParsingState::Xlen;
|
||||
}
|
||||
GzHeaderParsingState::Xlen => {
|
||||
if r.part.flg & FEXTRA != 0 {
|
||||
r.part.xlen = read_le_u16(r)?;
|
||||
}
|
||||
r.part.state = GzHeaderParsingState::Extra;
|
||||
}
|
||||
GzHeaderParsingState::Extra => {
|
||||
if r.part.flg & FEXTRA != 0 {
|
||||
let mut extra = vec![0; r.part.xlen as usize];
|
||||
r.read_and_forget(&mut extra)?;
|
||||
r.part.header.extra = Some(extra);
|
||||
}
|
||||
r.part.state = GzHeaderParsingState::Filename;
|
||||
}
|
||||
GzHeaderParsingState::Filename => {
|
||||
if r.part.flg & FNAME != 0 {
|
||||
if r.part.header.filename.is_none() {
|
||||
r.part.header.filename = Some(Vec::new());
|
||||
};
|
||||
for byte in r.bytes() {
|
||||
let byte = byte?;
|
||||
if byte == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
r.part.state = GzHeaderParsingState::Comment;
|
||||
}
|
||||
GzHeaderParsingState::Comment => {
|
||||
if r.part.flg & FCOMMENT != 0 {
|
||||
if r.part.header.comment.is_none() {
|
||||
r.part.header.comment = Some(Vec::new());
|
||||
};
|
||||
for byte in r.bytes() {
|
||||
let byte = byte?;
|
||||
if byte == 0 {
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
r.part.state = GzHeaderParsingState::Crc;
|
||||
}
|
||||
GzHeaderParsingState::Crc => {
|
||||
if r.part.flg & FHCRC != 0 {
|
||||
let stored_crc = read_le_u16(r)?;
|
||||
let calced_crc = r.part.crc.sum() as u16;
|
||||
if stored_crc != calced_crc {
|
||||
return Err(corrupt());
|
||||
}
|
||||
}
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub(crate) fn read_gz_header<R: Read>(r: &mut R) -> io::Result<GzHeader> {
|
||||
let mut part = GzHeaderPartial::new();
|
||||
|
||||
let result = {
|
||||
let mut reader = Buffer::new(&mut part, r);
|
||||
read_gz_header_part(&mut reader)
|
||||
};
|
||||
result.map(|()| part.take_header())
|
||||
}
|
||||
|
||||
/// A gzip streaming encoder
|
||||
///
|
||||
/// This structure exposes a [`BufRead`] interface that will read uncompressed data
|
||||
/// from the underlying reader and expose the compressed version as a [`BufRead`]
|
||||
/// interface.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
|
||||
///
|
||||
/// # Examples
|
||||
@@ -270,11 +163,22 @@ impl<R: BufRead + Write> Write for GzEncoder<R> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A gzip streaming decoder
|
||||
/// A decoder for a single member of a [gzip file].
|
||||
///
|
||||
/// This structure consumes a [`BufRead`] interface, reading compressed data
|
||||
/// from the underlying reader, and emitting uncompressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
|
||||
///
|
||||
/// After reading a single member of the gzip data this reader will return
|
||||
/// Ok(0) even if there are more bytes available in the underlying reader.
|
||||
/// If you need the following bytes, call `into_inner()` after Ok(0) to
|
||||
/// recover the underlying reader.
|
||||
///
|
||||
/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
|
||||
/// or read more
|
||||
/// [in the introduction](../index.html#about-multi-member-gzip-files).
|
||||
///
|
||||
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
|
||||
///
|
||||
/// # Examples
|
||||
@@ -305,161 +209,38 @@ impl<R: BufRead + Write> Write for GzEncoder<R> {
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
pub struct GzDecoder<R> {
|
||||
inner: GzState,
|
||||
header: Option<GzHeader>,
|
||||
state: GzState,
|
||||
reader: CrcReader<deflate::bufread::DeflateDecoder<R>>,
|
||||
multi: bool,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum GzHeaderParsingState {
|
||||
Start,
|
||||
Xlen,
|
||||
Extra,
|
||||
Filename,
|
||||
Comment,
|
||||
Crc,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct GzHeaderPartial {
|
||||
buf: Vec<u8>,
|
||||
state: GzHeaderParsingState,
|
||||
flg: u8,
|
||||
xlen: u16,
|
||||
crc: Crc,
|
||||
header: GzHeader,
|
||||
}
|
||||
|
||||
impl GzHeaderPartial {
|
||||
fn new() -> GzHeaderPartial {
|
||||
GzHeaderPartial {
|
||||
buf: Vec::with_capacity(10), // minimum header length
|
||||
state: GzHeaderParsingState::Start,
|
||||
flg: 0,
|
||||
xlen: 0,
|
||||
crc: Crc::new(),
|
||||
header: GzHeader {
|
||||
extra: None,
|
||||
filename: None,
|
||||
comment: None,
|
||||
operating_system: 0,
|
||||
mtime: 0,
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
pub fn take_header(self) -> GzHeader {
|
||||
self.header
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
enum GzState {
|
||||
Header(GzHeaderPartial),
|
||||
Body,
|
||||
Finished(usize, [u8; 8]),
|
||||
Header(GzHeaderParser),
|
||||
Body(GzHeader),
|
||||
Finished(GzHeader, usize, [u8; 8]),
|
||||
Err(io::Error),
|
||||
End,
|
||||
}
|
||||
|
||||
/// A small adapter which reads data originally from `buf` and then reads all
|
||||
/// further data from `reader`. This will also buffer all data read from
|
||||
/// `reader` into `buf` for reuse on a further call.
|
||||
struct Buffer<'a, T: 'a> {
|
||||
part: &'a mut GzHeaderPartial,
|
||||
buf_cur: usize,
|
||||
buf_max: usize,
|
||||
reader: &'a mut T,
|
||||
}
|
||||
|
||||
impl<'a, T> Buffer<'a, T> {
|
||||
fn new(part: &'a mut GzHeaderPartial, reader: &'a mut T) -> Buffer<'a, T> {
|
||||
Buffer {
|
||||
reader,
|
||||
buf_cur: 0,
|
||||
buf_max: part.buf.len(),
|
||||
part,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T: Read> Read for Buffer<'a, T> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
let mut bufref = match self.part.state {
|
||||
GzHeaderParsingState::Filename => self.part.header.filename.as_mut(),
|
||||
GzHeaderParsingState::Comment => self.part.header.comment.as_mut(),
|
||||
_ => None,
|
||||
};
|
||||
if let Some(ref mut b) = bufref {
|
||||
// we have a direct reference to a buffer where to write
|
||||
let len = self.reader.read(buf)?;
|
||||
if len > 0 && buf[len - 1] == 0 {
|
||||
// we do not append the final 0
|
||||
b.extend_from_slice(&buf[..len - 1]);
|
||||
} else {
|
||||
b.extend_from_slice(&buf[..len]);
|
||||
}
|
||||
self.part.crc.update(&buf[..len]);
|
||||
Ok(len)
|
||||
} else if self.buf_cur == self.buf_max {
|
||||
// we read new bytes and also save them in self.part.buf
|
||||
let len = self.reader.read(buf)?;
|
||||
self.part.buf.extend_from_slice(&buf[..len]);
|
||||
self.part.crc.update(&buf[..len]);
|
||||
Ok(len)
|
||||
} else {
|
||||
// we first read the previously saved bytes
|
||||
let len = (&self.part.buf[self.buf_cur..self.buf_max]).read(buf)?;
|
||||
self.buf_cur += len;
|
||||
Ok(len)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, T> Buffer<'a, T>
|
||||
where
|
||||
T: std::io::Read,
|
||||
{
|
||||
// If we manage to read all the bytes, we reset the buffer
|
||||
fn read_and_forget(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
self.read_exact(buf)?;
|
||||
// we managed to read the whole buf
|
||||
// we will no longer need the previously saved bytes in self.part.buf
|
||||
let rlen = buf.len();
|
||||
self.part.buf.truncate(0);
|
||||
self.buf_cur = 0;
|
||||
self.buf_max = 0;
|
||||
Ok(rlen)
|
||||
}
|
||||
End(Option<GzHeader>),
|
||||
}
|
||||
|
||||
impl<R: BufRead> GzDecoder<R> {
|
||||
/// Creates a new decoder from the given reader, immediately parsing the
|
||||
/// gzip header.
|
||||
pub fn new(mut r: R) -> GzDecoder<R> {
|
||||
let mut part = GzHeaderPartial::new();
|
||||
let mut header = None;
|
||||
let mut header_parser = GzHeaderParser::new();
|
||||
|
||||
let result = {
|
||||
let mut reader = Buffer::new(&mut part, &mut r);
|
||||
read_gz_header_part(&mut reader)
|
||||
};
|
||||
|
||||
let state = match result {
|
||||
Ok(()) => {
|
||||
header = Some(part.take_header());
|
||||
GzState::Body
|
||||
let state = match header_parser.parse(&mut r) {
|
||||
Ok(_) => GzState::Body(GzHeader::from(header_parser)),
|
||||
Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => {
|
||||
GzState::Header(header_parser)
|
||||
}
|
||||
Err(ref err) if io::ErrorKind::WouldBlock == err.kind() => GzState::Header(part),
|
||||
Err(err) => GzState::Err(err),
|
||||
};
|
||||
|
||||
GzDecoder {
|
||||
inner: state,
|
||||
state,
|
||||
reader: CrcReader::new(deflate::bufread::DeflateDecoder::new(r)),
|
||||
multi: false,
|
||||
header,
|
||||
}
|
||||
}
|
||||
|
||||
@@ -472,7 +253,11 @@ impl<R: BufRead> GzDecoder<R> {
|
||||
impl<R> GzDecoder<R> {
|
||||
/// Returns the header associated with this stream, if it was valid
|
||||
pub fn header(&self) -> Option<&GzHeader> {
|
||||
self.header.as_ref()
|
||||
match &self.state {
|
||||
GzState::Body(header) | GzState::Finished(header, _, _) => Some(header),
|
||||
GzState::End(header) => header.as_ref(),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquires a reference to the underlying reader.
|
||||
@@ -496,111 +281,61 @@ impl<R> GzDecoder<R> {
|
||||
|
||||
impl<R: BufRead> Read for GzDecoder<R> {
|
||||
fn read(&mut self, into: &mut [u8]) -> io::Result<usize> {
|
||||
let GzDecoder {
|
||||
inner,
|
||||
header,
|
||||
reader,
|
||||
multi,
|
||||
} = self;
|
||||
|
||||
loop {
|
||||
*inner = match mem::replace(inner, GzState::End) {
|
||||
GzState::Header(mut part) => {
|
||||
let result = {
|
||||
let mut reader = Buffer::new(&mut part, reader.get_mut().get_mut());
|
||||
read_gz_header_part(&mut reader)
|
||||
};
|
||||
match result {
|
||||
Ok(()) => {
|
||||
*header = Some(part.take_header());
|
||||
GzState::Body
|
||||
}
|
||||
Err(err) if io::ErrorKind::WouldBlock == err.kind() => {
|
||||
*inner = GzState::Header(part);
|
||||
return Err(err);
|
||||
}
|
||||
Err(err) => return Err(err),
|
||||
}
|
||||
match &mut self.state {
|
||||
GzState::Header(parser) => {
|
||||
parser.parse(self.reader.get_mut().get_mut())?;
|
||||
self.state = GzState::Body(GzHeader::from(mem::take(parser)));
|
||||
}
|
||||
GzState::Body => {
|
||||
GzState::Body(header) => {
|
||||
if into.is_empty() {
|
||||
*inner = GzState::Body;
|
||||
return Ok(0);
|
||||
}
|
||||
|
||||
let n = reader.read(into).map_err(|err| {
|
||||
if io::ErrorKind::WouldBlock == err.kind() {
|
||||
*inner = GzState::Body;
|
||||
match self.reader.read(into)? {
|
||||
0 => {
|
||||
self.state = GzState::Finished(mem::take(header), 0, [0; 8]);
|
||||
}
|
||||
|
||||
err
|
||||
})?;
|
||||
|
||||
match n {
|
||||
0 => GzState::Finished(0, [0; 8]),
|
||||
n => {
|
||||
*inner = GzState::Body;
|
||||
return Ok(n);
|
||||
}
|
||||
}
|
||||
}
|
||||
GzState::Finished(pos, mut buf) => {
|
||||
if pos < buf.len() {
|
||||
let n = reader
|
||||
.get_mut()
|
||||
.get_mut()
|
||||
.read(&mut buf[pos..])
|
||||
.and_then(|n| {
|
||||
if n == 0 {
|
||||
Err(io::ErrorKind::UnexpectedEof.into())
|
||||
} else {
|
||||
Ok(n)
|
||||
}
|
||||
})
|
||||
.map_err(|err| {
|
||||
if io::ErrorKind::WouldBlock == err.kind() {
|
||||
*inner = GzState::Finished(pos, buf);
|
||||
}
|
||||
|
||||
err
|
||||
})?;
|
||||
|
||||
GzState::Finished(pos + n, buf)
|
||||
GzState::Finished(header, pos, buf) => {
|
||||
if *pos < buf.len() {
|
||||
*pos += read_into(self.reader.get_mut().get_mut(), &mut buf[*pos..])?;
|
||||
} else {
|
||||
let (crc, amt) = finish(&buf);
|
||||
|
||||
if crc != reader.crc().sum() || amt != reader.crc().amount() {
|
||||
if crc != self.reader.crc().sum() || amt != self.reader.crc().amount() {
|
||||
self.state = GzState::End(Some(mem::take(header)));
|
||||
return Err(corrupt());
|
||||
} else if *multi {
|
||||
let is_eof = reader
|
||||
} else if self.multi {
|
||||
let is_eof = self
|
||||
.reader
|
||||
.get_mut()
|
||||
.get_mut()
|
||||
.fill_buf()
|
||||
.map(|buf| buf.is_empty())
|
||||
.map_err(|err| {
|
||||
if io::ErrorKind::WouldBlock == err.kind() {
|
||||
*inner = GzState::Finished(pos, buf);
|
||||
}
|
||||
|
||||
err
|
||||
})?;
|
||||
.map(|buf| buf.is_empty())?;
|
||||
|
||||
if is_eof {
|
||||
GzState::End
|
||||
self.state = GzState::End(Some(mem::take(header)));
|
||||
} else {
|
||||
reader.reset();
|
||||
reader.get_mut().reset_data();
|
||||
header.take();
|
||||
GzState::Header(GzHeaderPartial::new())
|
||||
self.reader.reset();
|
||||
self.reader.get_mut().reset_data();
|
||||
self.state = GzState::Header(GzHeaderParser::new())
|
||||
}
|
||||
} else {
|
||||
GzState::End
|
||||
self.state = GzState::End(Some(mem::take(header)));
|
||||
}
|
||||
}
|
||||
}
|
||||
GzState::Err(err) => return Err(err),
|
||||
GzState::End => return Ok(0),
|
||||
};
|
||||
GzState::Err(err) => {
|
||||
let result = Err(mem::replace(err, io::ErrorKind::Other.into()));
|
||||
self.state = GzState::End(None);
|
||||
return result;
|
||||
}
|
||||
GzState::End(_) => return Ok(0),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -615,18 +350,20 @@ impl<R: BufRead + Write> Write for GzDecoder<R> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A gzip streaming decoder that decodes all members of a multistream
|
||||
/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
|
||||
///
|
||||
/// A gzip member consists of a header, compressed data and a trailer. The [gzip
|
||||
/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
|
||||
/// gzip members to be joined in a single stream. `MultiGzDecoder` will
|
||||
/// decode all consecutive members while `GzDecoder` will only decompress
|
||||
/// the first gzip member. The multistream format is commonly used in
|
||||
/// bioinformatics, for example when using the BGZF compressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
|
||||
///
|
||||
/// This structure exposes a [`BufRead`] interface that will consume all gzip members
|
||||
/// from the underlying reader and emit uncompressed data.
|
||||
/// A gzip file consists of a series of *members* concatenated one after another.
|
||||
/// MultiGzDecoder decodes all members from the data and only returns Ok(0) when the
|
||||
/// underlying reader does. For a file, this reads to the end of the file.
|
||||
///
|
||||
/// To handle members separately, see [GzDecoder] or read more
|
||||
/// [in the introduction](../index.html#about-multi-member-gzip-files).
|
||||
///
|
||||
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
|
||||
///
|
||||
/// # Examples
|
||||
@@ -699,154 +436,48 @@ impl<R: BufRead> Read for MultiGzDecoder<R> {
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use crate::gz::bufread::*;
|
||||
use std::io;
|
||||
use std::io::{Cursor, Read, Write};
|
||||
mod test {
|
||||
use crate::bufread::GzDecoder;
|
||||
use crate::gz::write;
|
||||
use crate::Compression;
|
||||
use std::io::{Read, Write};
|
||||
|
||||
//a cursor turning EOF into blocking errors
|
||||
#[derive(Debug)]
|
||||
pub struct BlockingCursor {
|
||||
pub cursor: Cursor<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl BlockingCursor {
|
||||
pub fn new() -> BlockingCursor {
|
||||
BlockingCursor {
|
||||
cursor: Cursor::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_position(&mut self, pos: u64) {
|
||||
self.cursor.set_position(pos)
|
||||
}
|
||||
|
||||
pub fn position(&mut self) -> u64 {
|
||||
self.cursor.position()
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for BlockingCursor {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
self.cursor.write(buf)
|
||||
}
|
||||
fn flush(&mut self) -> io::Result<()> {
|
||||
self.cursor.flush()
|
||||
}
|
||||
}
|
||||
|
||||
impl Read for BlockingCursor {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
//use the cursor, except it turns eof into blocking error
|
||||
let r = self.cursor.read(buf);
|
||||
match r {
|
||||
Err(ref err) => {
|
||||
if err.kind() == io::ErrorKind::UnexpectedEof {
|
||||
return Err(io::ErrorKind::WouldBlock.into());
|
||||
}
|
||||
}
|
||||
Ok(0) => {
|
||||
//regular EOF turned into blocking error
|
||||
return Err(io::ErrorKind::WouldBlock.into());
|
||||
}
|
||||
Ok(_n) => {}
|
||||
}
|
||||
r
|
||||
}
|
||||
}
|
||||
// GzDecoder consumes one gzip member and then returns 0 for subsequent reads, allowing any
|
||||
// additional data to be consumed by the caller.
|
||||
#[test]
|
||||
// test function read_and_forget of Buffer
|
||||
fn buffer_read_and_forget() {
|
||||
// this is unused except for the buffering
|
||||
let mut part = GzHeaderPartial::new();
|
||||
// this is a reader which receives data afterwards
|
||||
let mut r = BlockingCursor::new();
|
||||
let data = vec![1, 2, 3];
|
||||
let mut out = Vec::with_capacity(7);
|
||||
fn decode_extra_data() {
|
||||
let expected = "Hello World";
|
||||
|
||||
match r.write_all(&data) {
|
||||
Ok(()) => {}
|
||||
_ => {
|
||||
panic!("Unexpected result for write_all");
|
||||
}
|
||||
}
|
||||
r.set_position(0);
|
||||
let compressed = {
|
||||
let mut e = write::GzEncoder::new(Vec::new(), Compression::default());
|
||||
e.write(expected.as_ref()).unwrap();
|
||||
let mut b = e.finish().unwrap();
|
||||
b.push(b'x');
|
||||
b
|
||||
};
|
||||
|
||||
// First read : successful for one byte
|
||||
let mut reader = Buffer::new(&mut part, &mut r);
|
||||
out.resize(1, 0);
|
||||
match reader.read_and_forget(&mut out) {
|
||||
Ok(1) => {}
|
||||
_ => {
|
||||
panic!("Unexpected result for read_and_forget with data");
|
||||
}
|
||||
}
|
||||
let mut output = Vec::new();
|
||||
let mut decoder = GzDecoder::new(compressed.as_slice());
|
||||
let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
|
||||
assert_eq!(decoded_bytes, output.len());
|
||||
let actual = std::str::from_utf8(&output).expect("String parsing error");
|
||||
assert_eq!(
|
||||
actual, expected,
|
||||
"after decompression we obtain the original input"
|
||||
);
|
||||
|
||||
// Second read : incomplete for 7 bytes (we have only 2)
|
||||
out.resize(7, 0);
|
||||
match reader.read_and_forget(&mut out) {
|
||||
Err(ref err) => {
|
||||
assert_eq!(io::ErrorKind::WouldBlock, err.kind());
|
||||
}
|
||||
_ => {
|
||||
panic!("Unexpected result for read_and_forget with incomplete");
|
||||
}
|
||||
}
|
||||
|
||||
// 3 more data bytes have arrived
|
||||
let pos = r.position();
|
||||
let data2 = vec![4, 5, 6];
|
||||
match r.write_all(&data2) {
|
||||
Ok(()) => {}
|
||||
_ => {
|
||||
panic!("Unexpected result for write_all");
|
||||
}
|
||||
}
|
||||
r.set_position(pos);
|
||||
|
||||
// Third read : still incomplete for 7 bytes (we have 5)
|
||||
let mut reader2 = Buffer::new(&mut part, &mut r);
|
||||
match reader2.read_and_forget(&mut out) {
|
||||
Err(ref err) => {
|
||||
assert_eq!(io::ErrorKind::WouldBlock, err.kind());
|
||||
}
|
||||
_ => {
|
||||
panic!("Unexpected result for read_and_forget with more incomplete");
|
||||
}
|
||||
}
|
||||
|
||||
// 3 more data bytes have arrived again
|
||||
let pos2 = r.position();
|
||||
let data3 = vec![7, 8, 9];
|
||||
match r.write_all(&data3) {
|
||||
Ok(()) => {}
|
||||
_ => {
|
||||
panic!("Unexpected result for write_all");
|
||||
}
|
||||
}
|
||||
r.set_position(pos2);
|
||||
|
||||
// Fourth read : now successful for 7 bytes
|
||||
let mut reader3 = Buffer::new(&mut part, &mut r);
|
||||
match reader3.read_and_forget(&mut out) {
|
||||
Ok(7) => {
|
||||
assert_eq!(out[0], 2);
|
||||
assert_eq!(out[6], 8);
|
||||
}
|
||||
_ => {
|
||||
panic!("Unexpected result for read_and_forget with data");
|
||||
}
|
||||
}
|
||||
|
||||
// Fifth read : successful for one more byte
|
||||
out.resize(1, 0);
|
||||
match reader3.read_and_forget(&mut out) {
|
||||
Ok(1) => {
|
||||
assert_eq!(out[0], 9);
|
||||
}
|
||||
_ => {
|
||||
panic!("Unexpected result for read_and_forget with data");
|
||||
}
|
||||
}
|
||||
output.clear();
|
||||
assert_eq!(
|
||||
decoder.read(&mut output).unwrap(),
|
||||
0,
|
||||
"subsequent read of decoder returns 0, but inner reader can return additional data"
|
||||
);
|
||||
let mut reader = decoder.into_inner();
|
||||
assert_eq!(
|
||||
reader.read_to_end(&mut output).unwrap(),
|
||||
1,
|
||||
"extra data is accessible in underlying buf-read"
|
||||
);
|
||||
assert_eq!(output, b"x");
|
||||
}
|
||||
}
|
||||
|
||||
325
third_party/rust/flate2/src/gz/mod.rs
vendored
325
third_party/rust/flate2/src/gz/mod.rs
vendored
@@ -1,19 +1,24 @@
|
||||
use std::ffi::CString;
|
||||
use std::io::prelude::*;
|
||||
use std::io::{BufRead, Error, ErrorKind, Read, Result, Write};
|
||||
use std::time;
|
||||
|
||||
use crate::bufreader::BufReader;
|
||||
use crate::Compression;
|
||||
use crate::{Compression, Crc};
|
||||
|
||||
pub static FHCRC: u8 = 1 << 1;
|
||||
pub static FEXTRA: u8 = 1 << 2;
|
||||
pub static FNAME: u8 = 1 << 3;
|
||||
pub static FCOMMENT: u8 = 1 << 4;
|
||||
pub static FRESERVED: u8 = 1 << 5 | 1 << 6 | 1 << 7;
|
||||
|
||||
pub mod bufread;
|
||||
pub mod read;
|
||||
pub mod write;
|
||||
|
||||
// The maximum length of the header filename and comment fields. More than
|
||||
// enough for these fields in reasonable use, but prevents possible attacks.
|
||||
const MAX_HEADER_BUF: usize = 65535;
|
||||
|
||||
/// A structure representing the header of a gzip stream.
|
||||
///
|
||||
/// The header can contain metadata about the file that was compressed, if
|
||||
@@ -82,6 +87,210 @@ impl GzHeader {
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum GzHeaderState {
|
||||
Start(u8, [u8; 10]),
|
||||
Xlen(Option<Box<Crc>>, u8, [u8; 2]),
|
||||
Extra(Option<Box<Crc>>, u16),
|
||||
Filename(Option<Box<Crc>>),
|
||||
Comment(Option<Box<Crc>>),
|
||||
Crc(Option<Box<Crc>>, u8, [u8; 2]),
|
||||
Complete,
|
||||
}
|
||||
|
||||
impl Default for GzHeaderState {
|
||||
fn default() -> Self {
|
||||
Self::Complete
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Default)]
|
||||
pub struct GzHeaderParser {
|
||||
state: GzHeaderState,
|
||||
flags: u8,
|
||||
header: GzHeader,
|
||||
}
|
||||
|
||||
impl GzHeaderParser {
|
||||
fn new() -> Self {
|
||||
GzHeaderParser {
|
||||
state: GzHeaderState::Start(0, [0; 10]),
|
||||
flags: 0,
|
||||
header: GzHeader::default(),
|
||||
}
|
||||
}
|
||||
|
||||
fn parse<'a, R: Read>(&mut self, r: &'a mut R) -> Result<()> {
|
||||
loop {
|
||||
match &mut self.state {
|
||||
GzHeaderState::Start(count, buffer) => {
|
||||
while (*count as usize) < buffer.len() {
|
||||
*count += read_into(r, &mut buffer[*count as usize..])? as u8;
|
||||
}
|
||||
// Gzip identification bytes
|
||||
if buffer[0] != 0x1f || buffer[1] != 0x8b {
|
||||
return Err(bad_header());
|
||||
}
|
||||
// Gzip compression method (8 = deflate)
|
||||
if buffer[2] != 8 {
|
||||
return Err(bad_header());
|
||||
}
|
||||
self.flags = buffer[3];
|
||||
// RFC1952: "must give an error indication if any reserved bit is non-zero"
|
||||
if self.flags & FRESERVED != 0 {
|
||||
return Err(bad_header());
|
||||
}
|
||||
self.header.mtime = ((buffer[4] as u32) << 0)
|
||||
| ((buffer[5] as u32) << 8)
|
||||
| ((buffer[6] as u32) << 16)
|
||||
| ((buffer[7] as u32) << 24);
|
||||
let _xfl = buffer[8];
|
||||
self.header.operating_system = buffer[9];
|
||||
let crc = if self.flags & FHCRC != 0 {
|
||||
let mut crc = Box::new(Crc::new());
|
||||
crc.update(buffer);
|
||||
Some(crc)
|
||||
} else {
|
||||
None
|
||||
};
|
||||
self.state = GzHeaderState::Xlen(crc, 0, [0; 2]);
|
||||
}
|
||||
GzHeaderState::Xlen(crc, count, buffer) => {
|
||||
if self.flags & FEXTRA != 0 {
|
||||
while (*count as usize) < buffer.len() {
|
||||
*count += read_into(r, &mut buffer[*count as usize..])? as u8;
|
||||
}
|
||||
if let Some(crc) = crc {
|
||||
crc.update(buffer);
|
||||
}
|
||||
let xlen = parse_le_u16(&buffer);
|
||||
self.header.extra = Some(vec![0; xlen as usize]);
|
||||
self.state = GzHeaderState::Extra(crc.take(), 0);
|
||||
} else {
|
||||
self.state = GzHeaderState::Filename(crc.take());
|
||||
}
|
||||
}
|
||||
GzHeaderState::Extra(crc, count) => {
|
||||
debug_assert!(self.header.extra.is_some());
|
||||
let extra = self.header.extra.as_mut().unwrap();
|
||||
while (*count as usize) < extra.len() {
|
||||
*count += read_into(r, &mut extra[*count as usize..])? as u16;
|
||||
}
|
||||
if let Some(crc) = crc {
|
||||
crc.update(extra);
|
||||
}
|
||||
self.state = GzHeaderState::Filename(crc.take());
|
||||
}
|
||||
GzHeaderState::Filename(crc) => {
|
||||
if self.flags & FNAME != 0 {
|
||||
let filename = self.header.filename.get_or_insert_with(Vec::new);
|
||||
read_to_nul(r, filename)?;
|
||||
if let Some(crc) = crc {
|
||||
crc.update(filename);
|
||||
crc.update(b"\0");
|
||||
}
|
||||
}
|
||||
self.state = GzHeaderState::Comment(crc.take());
|
||||
}
|
||||
GzHeaderState::Comment(crc) => {
|
||||
if self.flags & FCOMMENT != 0 {
|
||||
let comment = self.header.comment.get_or_insert_with(Vec::new);
|
||||
read_to_nul(r, comment)?;
|
||||
if let Some(crc) = crc {
|
||||
crc.update(comment);
|
||||
crc.update(b"\0");
|
||||
}
|
||||
}
|
||||
self.state = GzHeaderState::Crc(crc.take(), 0, [0; 2]);
|
||||
}
|
||||
GzHeaderState::Crc(crc, count, buffer) => {
|
||||
if let Some(crc) = crc {
|
||||
debug_assert!(self.flags & FHCRC != 0);
|
||||
while (*count as usize) < buffer.len() {
|
||||
*count += read_into(r, &mut buffer[*count as usize..])? as u8;
|
||||
}
|
||||
let stored_crc = parse_le_u16(&buffer);
|
||||
let calced_crc = crc.sum() as u16;
|
||||
if stored_crc != calced_crc {
|
||||
return Err(corrupt());
|
||||
}
|
||||
}
|
||||
self.state = GzHeaderState::Complete;
|
||||
}
|
||||
GzHeaderState::Complete => {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn header(&self) -> Option<&GzHeader> {
|
||||
match self.state {
|
||||
GzHeaderState::Complete => Some(&self.header),
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl From<GzHeaderParser> for GzHeader {
|
||||
fn from(parser: GzHeaderParser) -> Self {
|
||||
debug_assert!(matches!(parser.state, GzHeaderState::Complete));
|
||||
parser.header
|
||||
}
|
||||
}
|
||||
|
||||
// Attempt to fill the `buffer` from `r`. Return the number of bytes read.
|
||||
// Return an error if EOF is read before the buffer is full. This differs
|
||||
// from `read` in that Ok(0) means that more data may be available.
|
||||
fn read_into<R: Read>(r: &mut R, buffer: &mut [u8]) -> Result<usize> {
|
||||
debug_assert!(!buffer.is_empty());
|
||||
match r.read(buffer) {
|
||||
Ok(0) => Err(ErrorKind::UnexpectedEof.into()),
|
||||
Ok(n) => Ok(n),
|
||||
Err(ref e) if e.kind() == ErrorKind::Interrupted => Ok(0),
|
||||
Err(e) => Err(e),
|
||||
}
|
||||
}
|
||||
|
||||
// Read `r` up to the first nul byte, pushing non-nul bytes to `buffer`.
|
||||
fn read_to_nul<R: Read>(r: &mut R, buffer: &mut Vec<u8>) -> Result<()> {
|
||||
let mut bytes = r.bytes();
|
||||
loop {
|
||||
match bytes.next().transpose()? {
|
||||
Some(byte) if byte == 0 => {
|
||||
return Ok(());
|
||||
}
|
||||
Some(_) if buffer.len() == MAX_HEADER_BUF => {
|
||||
return Err(Error::new(
|
||||
ErrorKind::InvalidInput,
|
||||
"gzip header field too long",
|
||||
));
|
||||
}
|
||||
Some(byte) => {
|
||||
buffer.push(byte);
|
||||
}
|
||||
None => {
|
||||
return Err(ErrorKind::UnexpectedEof.into());
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_le_u16(buffer: &[u8; 2]) -> u16 {
|
||||
(buffer[0] as u16) | ((buffer[1] as u16) << 8)
|
||||
}
|
||||
|
||||
fn bad_header() -> Error {
|
||||
Error::new(ErrorKind::InvalidInput, "invalid gzip header")
|
||||
}
|
||||
|
||||
fn corrupt() -> Error {
|
||||
Error::new(
|
||||
ErrorKind::InvalidInput,
|
||||
"corrupt gzip stream does not have a matching checksum",
|
||||
)
|
||||
}
|
||||
|
||||
/// A builder structure to create a new gzip Encoder.
|
||||
///
|
||||
/// This structure controls header configuration options such as the filename.
|
||||
@@ -253,8 +462,8 @@ impl GzBuilder {
|
||||
mod tests {
|
||||
use std::io::prelude::*;
|
||||
|
||||
use super::{read, write, GzBuilder};
|
||||
use crate::Compression;
|
||||
use super::{read, write, GzBuilder, GzHeaderParser};
|
||||
use crate::{Compression, GzHeader};
|
||||
use rand::{thread_rng, Rng};
|
||||
|
||||
#[test]
|
||||
@@ -304,6 +513,85 @@ mod tests {
|
||||
assert_eq!(res, v);
|
||||
}
|
||||
|
||||
// A Rust implementation of CRC that closely matches the C code in RFC1952.
|
||||
// Only use this to create CRCs for tests.
|
||||
struct Rfc1952Crc {
|
||||
/* Table of CRCs of all 8-bit messages. */
|
||||
crc_table: [u32; 256],
|
||||
}
|
||||
|
||||
impl Rfc1952Crc {
|
||||
fn new() -> Self {
|
||||
let mut crc = Rfc1952Crc {
|
||||
crc_table: [0; 256],
|
||||
};
|
||||
/* Make the table for a fast CRC. */
|
||||
for n in 0usize..256 {
|
||||
let mut c = n as u32;
|
||||
for _k in 0..8 {
|
||||
if c & 1 != 0 {
|
||||
c = 0xedb88320 ^ (c >> 1);
|
||||
} else {
|
||||
c = c >> 1;
|
||||
}
|
||||
}
|
||||
crc.crc_table[n] = c;
|
||||
}
|
||||
crc
|
||||
}
|
||||
|
||||
/*
|
||||
Update a running crc with the bytes buf and return
|
||||
the updated crc. The crc should be initialized to zero. Pre- and
|
||||
post-conditioning (one's complement) is performed within this
|
||||
function so it shouldn't be done by the caller.
|
||||
*/
|
||||
fn update_crc(&self, crc: u32, buf: &[u8]) -> u32 {
|
||||
let mut c = crc ^ 0xffffffff;
|
||||
|
||||
for b in buf {
|
||||
c = self.crc_table[(c as u8 ^ *b) as usize] ^ (c >> 8);
|
||||
}
|
||||
c ^ 0xffffffff
|
||||
}
|
||||
|
||||
/* Return the CRC of the bytes buf. */
|
||||
fn crc(&self, buf: &[u8]) -> u32 {
|
||||
self.update_crc(0, buf)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn roundtrip_header() {
|
||||
let mut header = GzBuilder::new()
|
||||
.mtime(1234)
|
||||
.operating_system(57)
|
||||
.filename("filename")
|
||||
.comment("comment")
|
||||
.into_header(Compression::fast());
|
||||
|
||||
// Add a CRC to the header
|
||||
header[3] = header[3] ^ super::FHCRC;
|
||||
let rfc1952_crc = Rfc1952Crc::new();
|
||||
let crc32 = rfc1952_crc.crc(&header);
|
||||
let crc16 = crc32 as u16;
|
||||
header.extend(&crc16.to_le_bytes());
|
||||
|
||||
let mut parser = GzHeaderParser::new();
|
||||
parser.parse(&mut header.as_slice()).unwrap();
|
||||
let actual = parser.header().unwrap();
|
||||
assert_eq!(
|
||||
actual,
|
||||
&GzHeader {
|
||||
extra: None,
|
||||
filename: Some("filename".as_bytes().to_vec()),
|
||||
comment: Some("comment".as_bytes().to_vec()),
|
||||
operating_system: 57,
|
||||
mtime: 1234
|
||||
}
|
||||
)
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn fields() {
|
||||
let r = vec![0, 2, 4, 6];
|
||||
@@ -353,33 +641,4 @@ mod tests {
|
||||
write!(f, "Hello world").unwrap();
|
||||
f.flush().unwrap();
|
||||
}
|
||||
|
||||
use crate::gz::bufread::tests::BlockingCursor;
|
||||
#[test]
|
||||
// test function read_and_forget of Buffer
|
||||
fn blocked_partial_header_read() {
|
||||
// this is a reader which receives data afterwards
|
||||
let mut r = BlockingCursor::new();
|
||||
let data = vec![1, 2, 3];
|
||||
|
||||
match r.write_all(&data) {
|
||||
Ok(()) => {}
|
||||
_ => {
|
||||
panic!("Unexpected result for write_all");
|
||||
}
|
||||
}
|
||||
r.set_position(0);
|
||||
|
||||
// this is unused except for the buffering
|
||||
let mut decoder = read::GzDecoder::new(r);
|
||||
let mut out = Vec::with_capacity(7);
|
||||
match decoder.read(&mut out) {
|
||||
Err(e) => {
|
||||
assert_eq!(e.kind(), std::io::ErrorKind::WouldBlock);
|
||||
}
|
||||
_ => {
|
||||
panic!("Unexpected result for decoder.read");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
144
third_party/rust/flate2/src/gz/read.rs
vendored
144
third_party/rust/flate2/src/gz/read.rs
vendored
@@ -8,9 +8,8 @@ use crate::Compression;
|
||||
|
||||
/// A gzip streaming encoder
|
||||
///
|
||||
/// This structure exposes a [`Read`] interface that will read uncompressed data
|
||||
/// from the underlying reader and expose the compressed version as a [`Read`]
|
||||
/// interface.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// uncompressed data from the underlying [`Read`] and provides the compressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
///
|
||||
@@ -25,11 +24,11 @@ use crate::Compression;
|
||||
/// // Return a vector containing the GZ compressed version of hello world
|
||||
///
|
||||
/// fn gzencode_hello_world() -> io::Result<Vec<u8>> {
|
||||
/// let mut ret_vec = [0;100];
|
||||
/// let mut ret_vec = Vec::new();
|
||||
/// let bytestring = b"hello world";
|
||||
/// let mut gz = GzEncoder::new(&bytestring[..], Compression::fast());
|
||||
/// let count = gz.read(&mut ret_vec)?;
|
||||
/// Ok(ret_vec[0..count].to_vec())
|
||||
/// gz.read_to_end(&mut ret_vec)?;
|
||||
/// Ok(ret_vec)
|
||||
/// }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
@@ -90,17 +89,26 @@ impl<R: Read + Write> Write for GzEncoder<R> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A gzip streaming decoder
|
||||
/// A decoder for a single member of a [gzip file].
|
||||
///
|
||||
/// This structure exposes a [`Read`] interface that will consume compressed
|
||||
/// data from the underlying reader and emit uncompressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// compressed data from the underlying [`Read`] and provides the uncompressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// After reading a single member of the gzip data this reader will return
|
||||
/// Ok(0) even if there are more bytes available in the underlying reader.
|
||||
/// `GzDecoder` may have read additional bytes past the end of the gzip data.
|
||||
/// If you need the following bytes, wrap the `Reader` in a `std::io::BufReader`
|
||||
/// and use `bufread::GzDecoder` instead.
|
||||
///
|
||||
/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
|
||||
/// or read more
|
||||
/// [in the introduction](../index.html#about-multi-member-gzip-files).
|
||||
///
|
||||
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
/// ```
|
||||
///
|
||||
/// use std::io::prelude::*;
|
||||
/// use std::io;
|
||||
/// # use flate2::Compression;
|
||||
@@ -146,6 +154,9 @@ impl<R> GzDecoder<R> {
|
||||
}
|
||||
|
||||
/// Acquires a reference to the underlying reader.
|
||||
///
|
||||
/// Note that the decoder may have read past the end of the gzip data.
|
||||
/// To prevent this use [`bufread::GzDecoder`] instead.
|
||||
pub fn get_ref(&self) -> &R {
|
||||
self.inner.get_ref().get_ref()
|
||||
}
|
||||
@@ -153,12 +164,19 @@ impl<R> GzDecoder<R> {
|
||||
/// Acquires a mutable reference to the underlying stream.
|
||||
///
|
||||
/// Note that mutation of the stream may result in surprising results if
|
||||
/// this decoder is continued to be used.
|
||||
/// this decoder continues to be used.
|
||||
///
|
||||
/// Note that the decoder may have read past the end of the gzip data.
|
||||
/// To prevent this use [`bufread::GzDecoder`] instead.
|
||||
pub fn get_mut(&mut self) -> &mut R {
|
||||
self.inner.get_mut().get_mut()
|
||||
}
|
||||
|
||||
/// Consumes this decoder, returning the underlying reader.
|
||||
///
|
||||
/// Note that the decoder may have read past the end of the gzip data.
|
||||
/// Subsequent reads will skip those bytes. To prevent this use
|
||||
/// [`bufread::GzDecoder`] instead.
|
||||
pub fn into_inner(self) -> R {
|
||||
self.inner.into_inner().into_inner()
|
||||
}
|
||||
@@ -180,19 +198,20 @@ impl<R: Read + Write> Write for GzDecoder<R> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A gzip streaming decoder that decodes all members of a multistream
|
||||
/// A gzip streaming decoder that decodes a [gzip file] that may have multiple members.
|
||||
///
|
||||
/// A gzip member consists of a header, compressed data and a trailer. The [gzip
|
||||
/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
|
||||
/// gzip members to be joined in a single stream. `MultiGzDecoder` will
|
||||
/// decode all consecutive members while `GzDecoder` will only decompress the
|
||||
/// first gzip member. The multistream format is commonly used in bioinformatics,
|
||||
/// for example when using the BGZF compressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// compressed data from the underlying [`Read`] and provides the uncompressed
|
||||
/// data.
|
||||
///
|
||||
/// This structure exposes a [`Read`] interface that will consume all gzip members
|
||||
/// from the underlying reader and emit uncompressed data.
|
||||
/// A gzip file consists of a series of *members* concatenated one after another.
|
||||
/// MultiGzDecoder decodes all members of a file and returns Ok(0) once the
|
||||
/// underlying reader does.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// To handle members separately, see [GzDecoder] or read more
|
||||
/// [in the introduction](../index.html#about-multi-member-gzip-files).
|
||||
///
|
||||
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
|
||||
///
|
||||
/// # Examples
|
||||
///
|
||||
@@ -276,3 +295,84 @@ impl<R: Read + Write> Write for MultiGzDecoder<R> {
|
||||
self.get_mut().flush()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::{Cursor, ErrorKind, Read, Result, Write};
|
||||
|
||||
use super::GzDecoder;
|
||||
|
||||
//a cursor turning EOF into blocking errors
|
||||
#[derive(Debug)]
|
||||
pub struct BlockingCursor {
|
||||
pub cursor: Cursor<Vec<u8>>,
|
||||
}
|
||||
|
||||
impl BlockingCursor {
|
||||
pub fn new() -> BlockingCursor {
|
||||
BlockingCursor {
|
||||
cursor: Cursor::new(Vec::new()),
|
||||
}
|
||||
}
|
||||
|
||||
pub fn set_position(&mut self, pos: u64) {
|
||||
return self.cursor.set_position(pos);
|
||||
}
|
||||
}
|
||||
|
||||
impl Write for BlockingCursor {
|
||||
fn write(&mut self, buf: &[u8]) -> Result<usize> {
|
||||
return self.cursor.write(buf);
|
||||
}
|
||||
fn flush(&mut self) -> Result<()> {
|
||||
return self.cursor.flush();
|
||||
}
|
||||
}
|
||||
|
||||
impl Read for BlockingCursor {
|
||||
fn read(&mut self, buf: &mut [u8]) -> Result<usize> {
|
||||
//use the cursor, except it turns eof into blocking error
|
||||
let r = self.cursor.read(buf);
|
||||
match r {
|
||||
Err(ref err) => {
|
||||
if err.kind() == ErrorKind::UnexpectedEof {
|
||||
return Err(ErrorKind::WouldBlock.into());
|
||||
}
|
||||
}
|
||||
Ok(0) => {
|
||||
//regular EOF turned into blocking error
|
||||
return Err(ErrorKind::WouldBlock.into());
|
||||
}
|
||||
Ok(_n) => {}
|
||||
}
|
||||
return r;
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn blocked_partial_header_read() {
|
||||
// this is a reader which receives data afterwards
|
||||
let mut r = BlockingCursor::new();
|
||||
let data = vec![1, 2, 3];
|
||||
|
||||
match r.write_all(&data) {
|
||||
Ok(()) => {}
|
||||
_ => {
|
||||
panic!("Unexpected result for write_all");
|
||||
}
|
||||
}
|
||||
r.set_position(0);
|
||||
|
||||
// this is unused except for the buffering
|
||||
let mut decoder = GzDecoder::new(r);
|
||||
let mut out = Vec::with_capacity(7);
|
||||
match decoder.read(&mut out) {
|
||||
Err(e) => {
|
||||
assert_eq!(e.kind(), ErrorKind::WouldBlock);
|
||||
}
|
||||
_ => {
|
||||
panic!("Unexpected result for decoder.read");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
169
third_party/rust/flate2/src/gz/write.rs
vendored
169
third_party/rust/flate2/src/gz/write.rs
vendored
@@ -2,8 +2,7 @@ use std::cmp;
|
||||
use std::io;
|
||||
use std::io::prelude::*;
|
||||
|
||||
use super::bufread::{corrupt, read_gz_header};
|
||||
use super::{GzBuilder, GzHeader};
|
||||
use super::{corrupt, GzBuilder, GzHeader, GzHeaderParser};
|
||||
use crate::crc::{Crc, CrcWriter};
|
||||
use crate::zio;
|
||||
use crate::{Compress, Compression, Decompress, Status};
|
||||
@@ -167,11 +166,20 @@ impl<W: Write> Drop for GzEncoder<W> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A gzip streaming decoder
|
||||
/// A decoder for a single member of a [gzip file].
|
||||
///
|
||||
/// This structure exposes a [`Write`] interface that will emit uncompressed data
|
||||
/// to the underlying writer `W`.
|
||||
/// This structure exposes a [`Write`] interface, receiving compressed data and
|
||||
/// writing uncompressed data to the underlying writer.
|
||||
///
|
||||
/// After decoding a single member of the gzip data this writer will return the number of bytes up to
|
||||
/// to the end of the gzip member and subsequent writes will return Ok(0) allowing the caller to
|
||||
/// handle any data following the gzip member.
|
||||
///
|
||||
/// To handle gzip files that may have multiple members, see [`MultiGzDecoder`]
|
||||
/// or read more
|
||||
/// [in the introduction](../index.html#about-multi-member-gzip-files).
|
||||
///
|
||||
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
|
||||
/// [`Write`]: https://doc.rust-lang.org/std/io/trait.Write.html
|
||||
///
|
||||
/// # Examples
|
||||
@@ -203,8 +211,7 @@ impl<W: Write> Drop for GzEncoder<W> {
|
||||
pub struct GzDecoder<W: Write> {
|
||||
inner: zio::Writer<CrcWriter<W>, Decompress>,
|
||||
crc_bytes: Vec<u8>,
|
||||
header: Option<GzHeader>,
|
||||
header_buf: Vec<u8>,
|
||||
header_parser: GzHeaderParser,
|
||||
}
|
||||
|
||||
const CRC_BYTES_LEN: usize = 8;
|
||||
@@ -218,14 +225,13 @@ impl<W: Write> GzDecoder<W> {
|
||||
GzDecoder {
|
||||
inner: zio::Writer::new(CrcWriter::new(w), Decompress::new(false)),
|
||||
crc_bytes: Vec::with_capacity(CRC_BYTES_LEN),
|
||||
header: None,
|
||||
header_buf: Vec::new(),
|
||||
header_parser: GzHeaderParser::new(),
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the header associated with this stream.
|
||||
pub fn header(&self) -> Option<&GzHeader> {
|
||||
self.header.as_ref()
|
||||
self.header_parser.header()
|
||||
}
|
||||
|
||||
/// Acquires a reference to the underlying writer.
|
||||
@@ -306,47 +312,24 @@ impl<W: Write> GzDecoder<W> {
|
||||
}
|
||||
}
|
||||
|
||||
struct Counter<T: Read> {
|
||||
inner: T,
|
||||
pos: usize,
|
||||
}
|
||||
|
||||
impl<T: Read> Read for Counter<T> {
|
||||
fn read(&mut self, buf: &mut [u8]) -> io::Result<usize> {
|
||||
let pos = self.inner.read(buf)?;
|
||||
self.pos += pos;
|
||||
Ok(pos)
|
||||
}
|
||||
}
|
||||
|
||||
impl<W: Write> Write for GzDecoder<W> {
|
||||
fn write(&mut self, buf: &[u8]) -> io::Result<usize> {
|
||||
if self.header.is_none() {
|
||||
// trying to avoid buffer usage
|
||||
let (res, pos) = {
|
||||
let mut counter = Counter {
|
||||
inner: self.header_buf.chain(buf),
|
||||
pos: 0,
|
||||
};
|
||||
let res = read_gz_header(&mut counter);
|
||||
(res, counter.pos)
|
||||
};
|
||||
|
||||
match res {
|
||||
fn write(&mut self, mut buf: &[u8]) -> io::Result<usize> {
|
||||
let buflen = buf.len();
|
||||
if self.header().is_none() {
|
||||
match self.header_parser.parse(&mut buf) {
|
||||
Err(err) => {
|
||||
if err.kind() == io::ErrorKind::UnexpectedEof {
|
||||
// not enough data for header, save to the buffer
|
||||
self.header_buf.extend(buf);
|
||||
Ok(buf.len())
|
||||
// all data read but header still not complete
|
||||
Ok(buflen)
|
||||
} else {
|
||||
Err(err)
|
||||
}
|
||||
}
|
||||
Ok(header) => {
|
||||
self.header = Some(header);
|
||||
let pos = pos - self.header_buf.len();
|
||||
self.header_buf.truncate(0);
|
||||
Ok(pos)
|
||||
Ok(_) => {
|
||||
debug_assert!(self.header().is_some());
|
||||
// buf now contains the unread part of the original buf
|
||||
let n = buflen - buf.len();
|
||||
Ok(n)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -373,17 +356,19 @@ impl<W: Read + Write> Read for GzDecoder<W> {
|
||||
}
|
||||
}
|
||||
|
||||
/// A gzip streaming decoder that decodes all members of a multistream
|
||||
/// A gzip streaming decoder that decodes a [gzip file] with multiple members.
|
||||
///
|
||||
/// A gzip member consists of a header, compressed data and a trailer. The [gzip
|
||||
/// specification](https://tools.ietf.org/html/rfc1952), however, allows multiple
|
||||
/// gzip members to be joined in a single stream. `MultiGzDecoder` will
|
||||
/// decode all consecutive members while `GzDecoder` will only decompress
|
||||
/// the first gzip member. The multistream format is commonly used in
|
||||
/// bioinformatics, for example when using the BGZF compressed data.
|
||||
/// This structure exposes a [`Write`] interface that will consume compressed data and
|
||||
/// write uncompressed data to the underlying writer.
|
||||
///
|
||||
/// This structure exposes a [`Write`] interface that will consume all gzip members
|
||||
/// from the written buffers and write uncompressed data to the writer.
|
||||
/// A gzip file consists of a series of *members* concatenated one after another.
|
||||
/// `MultiGzDecoder` decodes all members of a file and writes them to the
|
||||
/// underlying writer one after another.
|
||||
///
|
||||
/// To handle members separately, see [GzDecoder] or read more
|
||||
/// [in the introduction](../index.html#about-multi-member-gzip-files).
|
||||
///
|
||||
/// [gzip file]: https://www.rfc-editor.org/rfc/rfc1952#page-5
|
||||
#[derive(Debug)]
|
||||
pub struct MultiGzDecoder<W: Write> {
|
||||
inner: GzDecoder<W>,
|
||||
@@ -523,6 +508,56 @@ mod tests {
|
||||
assert_eq!(return_string, STR);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_writer_partial_header_filename() {
|
||||
let filename = "test.txt";
|
||||
let mut e = GzBuilder::new()
|
||||
.filename(filename)
|
||||
.read(STR.as_bytes(), Compression::default());
|
||||
let mut bytes = Vec::new();
|
||||
e.read_to_end(&mut bytes).unwrap();
|
||||
|
||||
let mut writer = Vec::new();
|
||||
let mut decoder = GzDecoder::new(writer);
|
||||
assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
|
||||
let n = decoder.write(&bytes[12..]).unwrap();
|
||||
if n < bytes.len() - 12 {
|
||||
decoder.write(&bytes[n + 12..]).unwrap();
|
||||
}
|
||||
assert_eq!(
|
||||
decoder.header().unwrap().filename().unwrap(),
|
||||
filename.as_bytes()
|
||||
);
|
||||
writer = decoder.finish().unwrap();
|
||||
let return_string = String::from_utf8(writer).expect("String parsing error");
|
||||
assert_eq!(return_string, STR);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_writer_partial_header_comment() {
|
||||
let comment = "test comment";
|
||||
let mut e = GzBuilder::new()
|
||||
.comment(comment)
|
||||
.read(STR.as_bytes(), Compression::default());
|
||||
let mut bytes = Vec::new();
|
||||
e.read_to_end(&mut bytes).unwrap();
|
||||
|
||||
let mut writer = Vec::new();
|
||||
let mut decoder = GzDecoder::new(writer);
|
||||
assert_eq!(decoder.write(&bytes[..12]).unwrap(), 12);
|
||||
let n = decoder.write(&bytes[12..]).unwrap();
|
||||
if n < bytes.len() - 12 {
|
||||
decoder.write(&bytes[n + 12..]).unwrap();
|
||||
}
|
||||
assert_eq!(
|
||||
decoder.header().unwrap().comment().unwrap(),
|
||||
comment.as_bytes()
|
||||
);
|
||||
writer = decoder.finish().unwrap();
|
||||
let return_string = String::from_utf8(writer).expect("String parsing error");
|
||||
assert_eq!(return_string, STR);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn decode_writer_exact_header() {
|
||||
let mut e = GzEncoder::new(Vec::new(), Compression::default());
|
||||
@@ -575,4 +610,32 @@ mod tests {
|
||||
let expected = STR.repeat(2);
|
||||
assert_eq!(return_string, expected);
|
||||
}
|
||||
|
||||
// GzDecoder consumes one gzip member and then returns 0 for subsequent writes, allowing any
|
||||
// additional data to be consumed by the caller.
|
||||
#[test]
|
||||
fn decode_extra_data() {
|
||||
let compressed = {
|
||||
let mut e = GzEncoder::new(Vec::new(), Compression::default());
|
||||
e.write(STR.as_ref()).unwrap();
|
||||
let mut b = e.finish().unwrap();
|
||||
b.push(b'x');
|
||||
b
|
||||
};
|
||||
|
||||
let mut writer = Vec::new();
|
||||
let mut decoder = GzDecoder::new(writer);
|
||||
let mut consumed_bytes = 0;
|
||||
loop {
|
||||
let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
consumed_bytes += n;
|
||||
}
|
||||
writer = decoder.finish().unwrap();
|
||||
let actual = String::from_utf8(writer).expect("String parsing error");
|
||||
assert_eq!(actual, STR);
|
||||
assert_eq!(&compressed[consumed_bytes..], b"x");
|
||||
}
|
||||
}
|
||||
|
||||
30
third_party/rust/flate2/src/lib.rs
vendored
30
third_party/rust/flate2/src/lib.rs
vendored
@@ -65,12 +65,30 @@
|
||||
//! `Write` trait if `T: Write`. That is, the "dual trait" is forwarded directly
|
||||
//! to the underlying object if available.
|
||||
//!
|
||||
//! # About multi-member Gzip files
|
||||
//!
|
||||
//! While most `gzip` files one encounters will have a single *member* that can be read
|
||||
//! with the [`GzDecoder`], there may be some files which have multiple members.
|
||||
//!
|
||||
//! A [`GzDecoder`] will only read the first member of gzip data, which may unexpectedly
|
||||
//! provide partial results when a multi-member gzip file is encountered. `GzDecoder` is appropriate
|
||||
//! for data that is designed to be read as single members from a multi-member file. `bufread::GzDecoder`
|
||||
//! and `write::GzDecoder` also allow non-gzip data following gzip data to be handled.
|
||||
//!
|
||||
//! The [`MultiGzDecoder`] on the other hand will decode all members of a `gzip` file
|
||||
//! into one consecutive stream of bytes, which hides the underlying *members* entirely.
|
||||
//! If a file contains contains non-gzip data after the gzip data, MultiGzDecoder will
|
||||
//! emit an error after decoding the gzip data. This behavior matches the `gzip`,
|
||||
//! `gunzip`, and `zcat` command line tools.
|
||||
//!
|
||||
//! [`read`]: read/index.html
|
||||
//! [`bufread`]: bufread/index.html
|
||||
//! [`write`]: write/index.html
|
||||
//! [read]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
//! [write]: https://doc.rust-lang.org/std/io/trait.Write.html
|
||||
//! [bufread]: https://doc.rust-lang.org/std/io/trait.BufRead.html
|
||||
//! [`GzDecoder`]: read/struct.GzDecoder.html
|
||||
//! [`MultiGzDecoder`]: read/struct.MultiGzDecoder.html
|
||||
#![doc(html_root_url = "https://docs.rs/flate2/0.2")]
|
||||
#![deny(missing_docs)]
|
||||
#![deny(missing_debug_implementations)]
|
||||
@@ -78,6 +96,9 @@
|
||||
#![cfg_attr(test, deny(warnings))]
|
||||
#![cfg_attr(docsrs, feature(doc_auto_cfg))]
|
||||
|
||||
#[cfg(not(feature = "any_impl",))]
|
||||
compile_error!("You need to choose a zlib backend");
|
||||
|
||||
pub use crate::crc::{Crc, CrcReader, CrcWriter};
|
||||
pub use crate::gz::GzBuilder;
|
||||
pub use crate::gz::GzHeader;
|
||||
@@ -96,7 +117,14 @@ mod zlib;
|
||||
/// Types which operate over [`Read`] streams, both encoders and decoders for
|
||||
/// various formats.
|
||||
///
|
||||
/// Note that the `read` decoder types may read past the end of the compressed
|
||||
/// data while decoding. If the caller requires subsequent reads to start
|
||||
/// immediately following the compressed data wrap the `Read` type in a
|
||||
/// [`BufReader`] and use the `BufReader` with the equivalent decoder from the
|
||||
/// `bufread` module and also for the subsequent reads.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// [`BufReader`]: https://doc.rust-lang.org/std/io/struct.BufReader.html
|
||||
pub mod read {
|
||||
pub use crate::deflate::read::DeflateDecoder;
|
||||
pub use crate::deflate::read::DeflateEncoder;
|
||||
@@ -154,7 +182,7 @@ fn _assert_send_sync() {
|
||||
}
|
||||
|
||||
/// When compressing data, the compression level can be specified by a value in
|
||||
/// this enum.
|
||||
/// this struct.
|
||||
#[derive(Copy, Clone, PartialEq, Eq, Debug)]
|
||||
pub struct Compression(u32);
|
||||
|
||||
|
||||
88
third_party/rust/flate2/src/mem.rs
vendored
88
third_party/rust/flate2/src/mem.rs
vendored
@@ -1,7 +1,6 @@
|
||||
use std::error::Error;
|
||||
use std::fmt;
|
||||
use std::io;
|
||||
use std::slice;
|
||||
|
||||
use crate::ffi::{self, Backend, Deflate, DeflateBackend, ErrorMessage, Inflate, InflateBackend};
|
||||
use crate::Compression;
|
||||
@@ -266,16 +265,19 @@ impl Compress {
|
||||
/// Returns the Adler-32 checksum of the dictionary.
|
||||
#[cfg(feature = "any_zlib")]
|
||||
pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result<u32, CompressError> {
|
||||
let stream = &mut *self.inner.inner.stream_wrapper;
|
||||
stream.msg = std::ptr::null_mut();
|
||||
// SAFETY: The field `inner` must always be accessed as a raw pointer,
|
||||
// since it points to a cyclic structure. No copies of `inner` can be
|
||||
// retained for longer than the lifetime of `self.inner.inner.stream_wrapper`.
|
||||
let stream = self.inner.inner.stream_wrapper.inner;
|
||||
let rc = unsafe {
|
||||
(*stream).msg = std::ptr::null_mut();
|
||||
assert!(dictionary.len() < ffi::uInt::MAX as usize);
|
||||
ffi::deflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt)
|
||||
};
|
||||
|
||||
match rc {
|
||||
ffi::MZ_STREAM_ERROR => compress_failed(self.inner.inner.msg()),
|
||||
ffi::MZ_OK => Ok(stream.adler as u32),
|
||||
ffi::MZ_OK => Ok(unsafe { (*stream).adler } as u32),
|
||||
c => panic!("unknown return code: {}", c),
|
||||
}
|
||||
}
|
||||
@@ -300,9 +302,13 @@ impl Compress {
|
||||
#[cfg(feature = "any_zlib")]
|
||||
pub fn set_level(&mut self, level: Compression) -> Result<(), CompressError> {
|
||||
use std::os::raw::c_int;
|
||||
let stream = &mut *self.inner.inner.stream_wrapper;
|
||||
stream.msg = std::ptr::null_mut();
|
||||
|
||||
// SAFETY: The field `inner` must always be accessed as a raw pointer,
|
||||
// since it points to a cyclic structure. No copies of `inner` can be
|
||||
// retained for longer than the lifetime of `self.inner.inner.stream_wrapper`.
|
||||
let stream = self.inner.inner.stream_wrapper.inner;
|
||||
unsafe {
|
||||
(*stream).msg = std::ptr::null_mut();
|
||||
}
|
||||
let rc = unsafe { ffi::deflateParams(stream, level.0 as c_int, ffi::MZ_DEFAULT_STRATEGY) };
|
||||
|
||||
match rc {
|
||||
@@ -342,19 +348,12 @@ impl Compress {
|
||||
output: &mut Vec<u8>,
|
||||
flush: FlushCompress,
|
||||
) -> Result<Status, CompressError> {
|
||||
let cap = output.capacity();
|
||||
let len = output.len();
|
||||
|
||||
unsafe {
|
||||
write_to_spare_capacity_of_vec(output, |out| {
|
||||
let before = self.total_out();
|
||||
let ret = {
|
||||
let ptr = output.as_mut_ptr().add(len);
|
||||
let out = slice::from_raw_parts_mut(ptr, cap - len);
|
||||
self.compress(input, out, flush)
|
||||
};
|
||||
output.set_len((self.total_out() - before) as usize + len);
|
||||
ret
|
||||
}
|
||||
let ret = self.compress(input, out, flush);
|
||||
let bytes_written = self.total_out() - before;
|
||||
(bytes_written as usize, ret)
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -473,35 +472,31 @@ impl Decompress {
|
||||
output: &mut Vec<u8>,
|
||||
flush: FlushDecompress,
|
||||
) -> Result<Status, DecompressError> {
|
||||
let cap = output.capacity();
|
||||
let len = output.len();
|
||||
|
||||
unsafe {
|
||||
write_to_spare_capacity_of_vec(output, |out| {
|
||||
let before = self.total_out();
|
||||
let ret = {
|
||||
let ptr = output.as_mut_ptr().add(len);
|
||||
let out = slice::from_raw_parts_mut(ptr, cap - len);
|
||||
self.decompress(input, out, flush)
|
||||
};
|
||||
output.set_len((self.total_out() - before) as usize + len);
|
||||
ret
|
||||
}
|
||||
let ret = self.decompress(input, out, flush);
|
||||
let bytes_written = self.total_out() - before;
|
||||
(bytes_written as usize, ret)
|
||||
})
|
||||
}
|
||||
|
||||
/// Specifies the decompression dictionary to use.
|
||||
#[cfg(feature = "any_zlib")]
|
||||
pub fn set_dictionary(&mut self, dictionary: &[u8]) -> Result<u32, DecompressError> {
|
||||
let stream = &mut *self.inner.inner.stream_wrapper;
|
||||
stream.msg = std::ptr::null_mut();
|
||||
// SAFETY: The field `inner` must always be accessed as a raw pointer,
|
||||
// since it points to a cyclic structure. No copies of `inner` can be
|
||||
// retained for longer than the lifetime of `self.inner.inner.stream_wrapper`.
|
||||
let stream = self.inner.inner.stream_wrapper.inner;
|
||||
let rc = unsafe {
|
||||
(*stream).msg = std::ptr::null_mut();
|
||||
assert!(dictionary.len() < ffi::uInt::MAX as usize);
|
||||
ffi::inflateSetDictionary(stream, dictionary.as_ptr(), dictionary.len() as ffi::uInt)
|
||||
};
|
||||
|
||||
match rc {
|
||||
ffi::MZ_STREAM_ERROR => decompress_failed(self.inner.inner.msg()),
|
||||
ffi::MZ_DATA_ERROR => decompress_need_dict(stream.adler as u32),
|
||||
ffi::MZ_OK => Ok(stream.adler as u32),
|
||||
ffi::MZ_DATA_ERROR => decompress_need_dict(unsafe { (*stream).adler } as u32),
|
||||
ffi::MZ_OK => Ok(unsafe { (*stream).adler } as u32),
|
||||
c => panic!("unknown return code: {}", c),
|
||||
}
|
||||
}
|
||||
@@ -574,6 +569,29 @@ impl fmt::Display for CompressError {
|
||||
}
|
||||
}
|
||||
|
||||
/// Allows `writer` to write data into the spare capacity of the `output` vector.
|
||||
/// This will not reallocate the vector provided or attempt to grow it, so space
|
||||
/// for the `output` must be reserved by the caller before calling this
|
||||
/// function.
|
||||
///
|
||||
/// `writer` needs to return the number of bytes written (and can also return
|
||||
/// another arbitrary return value).
|
||||
fn write_to_spare_capacity_of_vec<T>(
|
||||
output: &mut Vec<u8>,
|
||||
writer: impl FnOnce(&mut [u8]) -> (usize, T),
|
||||
) -> T {
|
||||
let cap = output.capacity();
|
||||
let len = output.len();
|
||||
|
||||
output.resize(output.capacity(), 0);
|
||||
let (bytes_written, ret) = writer(&mut output[len..]);
|
||||
|
||||
let new_len = core::cmp::min(len + bytes_written, cap); // Sanitizes `bytes_written`.
|
||||
output.resize(new_len, 0 /* unused */);
|
||||
|
||||
ret
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::io::Write;
|
||||
|
||||
75
third_party/rust/flate2/src/zlib/bufread.rs
vendored
75
third_party/rust/flate2/src/zlib/bufread.rs
vendored
@@ -7,9 +7,10 @@ use crate::{Compress, Decompress};
|
||||
|
||||
/// A ZLIB encoder, or compressor.
|
||||
///
|
||||
/// This structure consumes a [`BufRead`] interface, reading uncompressed data
|
||||
/// from the underlying reader, and emitting compressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// uncompressed data from the underlying [`BufRead`] and provides the compressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
|
||||
///
|
||||
/// # Examples
|
||||
@@ -47,6 +48,15 @@ impl<R: BufRead> ZlibEncoder<R> {
|
||||
data: Compress::new(level, true),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new encoder with the given `compression` settings which will
|
||||
/// read uncompressed data from the given stream `r` and emit the compressed stream.
|
||||
pub fn new_with_compress(r: R, compression: Compress) -> ZlibEncoder<R> {
|
||||
ZlibEncoder {
|
||||
obj: r,
|
||||
data: compression,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_encoder_data<R>(zlib: &mut ZlibEncoder<R>) {
|
||||
@@ -119,9 +129,10 @@ impl<R: BufRead + Write> Write for ZlibEncoder<R> {
|
||||
|
||||
/// A ZLIB decoder, or decompressor.
|
||||
///
|
||||
/// This structure consumes a [`BufRead`] interface, reading compressed data
|
||||
/// from the underlying reader, and emitting uncompressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// compressed data from the underlying [`BufRead`] and provides the uncompressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
/// [`BufRead`]: https://doc.rust-lang.org/std/io/trait.BufRead.html
|
||||
///
|
||||
/// # Examples
|
||||
@@ -165,6 +176,15 @@ impl<R: BufRead> ZlibDecoder<R> {
|
||||
data: Decompress::new(true),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new decoder which will decompress data read from the given
|
||||
/// stream, using the given `decompression` settings.
|
||||
pub fn new_with_decompress(r: R, decompression: Decompress) -> ZlibDecoder<R> {
|
||||
ZlibDecoder {
|
||||
obj: r,
|
||||
data: decompression,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn reset_decoder_data<R>(zlib: &mut ZlibDecoder<R>) {
|
||||
@@ -231,3 +251,50 @@ impl<R: BufRead + Write> Write for ZlibDecoder<R> {
|
||||
self.get_mut().flush()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod test {
|
||||
use crate::bufread::ZlibDecoder;
|
||||
use crate::zlib::write;
|
||||
use crate::Compression;
|
||||
use std::io::{Read, Write};
|
||||
|
||||
// ZlibDecoder consumes one zlib archive and then returns 0 for subsequent reads, allowing any
|
||||
// additional data to be consumed by the caller.
|
||||
#[test]
|
||||
fn decode_extra_data() {
|
||||
let expected = "Hello World";
|
||||
|
||||
let compressed = {
|
||||
let mut e = write::ZlibEncoder::new(Vec::new(), Compression::default());
|
||||
e.write(expected.as_ref()).unwrap();
|
||||
let mut b = e.finish().unwrap();
|
||||
b.push(b'x');
|
||||
b
|
||||
};
|
||||
|
||||
let mut output = Vec::new();
|
||||
let mut decoder = ZlibDecoder::new(compressed.as_slice());
|
||||
let decoded_bytes = decoder.read_to_end(&mut output).unwrap();
|
||||
assert_eq!(decoded_bytes, output.len());
|
||||
let actual = std::str::from_utf8(&output).expect("String parsing error");
|
||||
assert_eq!(
|
||||
actual, expected,
|
||||
"after decompression we obtain the original input"
|
||||
);
|
||||
|
||||
output.clear();
|
||||
assert_eq!(
|
||||
decoder.read(&mut output).unwrap(),
|
||||
0,
|
||||
"subsequent read of decoder returns 0, but inner reader can return additional data"
|
||||
);
|
||||
let mut reader = decoder.into_inner();
|
||||
assert_eq!(
|
||||
reader.read_to_end(&mut output).unwrap(),
|
||||
1,
|
||||
"extra data is accessible in underlying buf-read"
|
||||
);
|
||||
assert_eq!(output, b"x");
|
||||
}
|
||||
}
|
||||
|
||||
51
third_party/rust/flate2/src/zlib/read.rs
vendored
51
third_party/rust/flate2/src/zlib/read.rs
vendored
@@ -3,11 +3,12 @@ use std::io::prelude::*;
|
||||
|
||||
use super::bufread;
|
||||
use crate::bufreader::BufReader;
|
||||
use crate::Decompress;
|
||||
|
||||
/// A ZLIB encoder, or compressor.
|
||||
///
|
||||
/// This structure implements a [`Read`] interface and will read uncompressed
|
||||
/// data from an underlying stream and emit a stream of compressed data.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// uncompressed data from the underlying [`Read`] and provides the compressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
///
|
||||
@@ -24,9 +25,9 @@ use crate::bufreader::BufReader;
|
||||
/// # fn open_hello_world() -> std::io::Result<Vec<u8>> {
|
||||
/// let f = File::open("examples/hello_world.txt")?;
|
||||
/// let mut z = ZlibEncoder::new(f, Compression::fast());
|
||||
/// let mut buffer = [0;50];
|
||||
/// let byte_count = z.read(&mut buffer)?;
|
||||
/// # Ok(buffer[0..byte_count].to_vec())
|
||||
/// let mut buffer = Vec::new();
|
||||
/// z.read_to_end(&mut buffer)?;
|
||||
/// # Ok(buffer)
|
||||
/// # }
|
||||
/// ```
|
||||
#[derive(Debug)]
|
||||
@@ -42,6 +43,14 @@ impl<R: Read> ZlibEncoder<R> {
|
||||
inner: bufread::ZlibEncoder::new(BufReader::new(r), level),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new encoder with the given `compression` settings which will
|
||||
/// read uncompressed data from the given stream `r` and emit the compressed stream.
|
||||
pub fn new_with_compress(r: R, compression: crate::Compress) -> ZlibEncoder<R> {
|
||||
ZlibEncoder {
|
||||
inner: bufread::ZlibEncoder::new_with_compress(BufReader::new(r), compression),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> ZlibEncoder<R> {
|
||||
@@ -117,8 +126,8 @@ impl<W: Read + Write> Write for ZlibEncoder<W> {
|
||||
|
||||
/// A ZLIB decoder, or decompressor.
|
||||
///
|
||||
/// This structure implements a [`Read`] interface and takes a stream of
|
||||
/// compressed data as input, providing the decompressed data when read from.
|
||||
/// This structure implements a [`Read`] interface. When read from, it reads
|
||||
/// compressed data from the underlying [`Read`] and provides the uncompressed data.
|
||||
///
|
||||
/// [`Read`]: https://doc.rust-lang.org/std/io/trait.Read.html
|
||||
///
|
||||
@@ -160,7 +169,8 @@ impl<R: Read> ZlibDecoder<R> {
|
||||
ZlibDecoder::new_with_buf(r, vec![0; 32 * 1024])
|
||||
}
|
||||
|
||||
/// Same as `new`, but the intermediate buffer for data is specified.
|
||||
/// Creates a new decoder which will decompress data read from the given
|
||||
/// stream `r`, using `buf` as backing to speed up reading.
|
||||
///
|
||||
/// Note that the specified buffer will only be used up to its current
|
||||
/// length. The buffer's capacity will also not grow over time.
|
||||
@@ -169,6 +179,31 @@ impl<R: Read> ZlibDecoder<R> {
|
||||
inner: bufread::ZlibDecoder::new(BufReader::with_buf(buf, r)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new decoder which will decompress data read from the given
|
||||
/// stream `r`, along with `decompression` settings.
|
||||
pub fn new_with_decompress(r: R, decompression: Decompress) -> ZlibDecoder<R> {
|
||||
ZlibDecoder::new_with_decompress_and_buf(r, vec![0; 32 * 1024], decompression)
|
||||
}
|
||||
|
||||
/// Creates a new decoder which will decompress data read from the given
|
||||
/// stream `r`, using `buf` as backing to speed up reading,
|
||||
/// along with `decompression` settings to configure decoder.
|
||||
///
|
||||
/// Note that the specified buffer will only be used up to its current
|
||||
/// length. The buffer's capacity will also not grow over time.
|
||||
pub fn new_with_decompress_and_buf(
|
||||
r: R,
|
||||
buf: Vec<u8>,
|
||||
decompression: Decompress,
|
||||
) -> ZlibDecoder<R> {
|
||||
ZlibDecoder {
|
||||
inner: bufread::ZlibDecoder::new_with_decompress(
|
||||
BufReader::with_buf(buf, r),
|
||||
decompression,
|
||||
),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<R> ZlibDecoder<R> {
|
||||
|
||||
59
third_party/rust/flate2/src/zlib/write.rs
vendored
59
third_party/rust/flate2/src/zlib/write.rs
vendored
@@ -44,6 +44,14 @@ impl<W: Write> ZlibEncoder<W> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new encoder which will write compressed data to the stream
|
||||
/// `w` with the given `compression` settings.
|
||||
pub fn new_with_compress(w: W, compression: Compress) -> ZlibEncoder<W> {
|
||||
ZlibEncoder {
|
||||
inner: zio::Writer::new(w, compression),
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquires a reference to the underlying writer.
|
||||
pub fn get_ref(&self) -> &W {
|
||||
self.inner.get_ref()
|
||||
@@ -218,6 +226,17 @@ impl<W: Write> ZlibDecoder<W> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates a new decoder which will write uncompressed data to the stream `w`
|
||||
/// using the given `decompression` settings.
|
||||
///
|
||||
/// When this decoder is dropped or unwrapped the final pieces of data will
|
||||
/// be flushed.
|
||||
pub fn new_with_decompress(w: W, decompression: Decompress) -> ZlibDecoder<W> {
|
||||
ZlibDecoder {
|
||||
inner: zio::Writer::new(w, decompression),
|
||||
}
|
||||
}
|
||||
|
||||
/// Acquires a reference to the underlying writer.
|
||||
pub fn get_ref(&self) -> &W {
|
||||
self.inner.get_ref()
|
||||
@@ -319,3 +338,43 @@ impl<W: Read + Write> Read for ZlibDecoder<W> {
|
||||
self.inner.get_mut().read(buf)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
use crate::Compression;
|
||||
|
||||
const STR: &str = "Hello World Hello World Hello World Hello World Hello World \
|
||||
Hello World Hello World Hello World Hello World Hello World \
|
||||
Hello World Hello World Hello World Hello World Hello World \
|
||||
Hello World Hello World Hello World Hello World Hello World \
|
||||
Hello World Hello World Hello World Hello World Hello World";
|
||||
|
||||
// ZlibDecoder consumes one zlib archive and then returns 0 for subsequent writes, allowing any
|
||||
// additional data to be consumed by the caller.
|
||||
#[test]
|
||||
fn decode_extra_data() {
|
||||
let compressed = {
|
||||
let mut e = ZlibEncoder::new(Vec::new(), Compression::default());
|
||||
e.write(STR.as_ref()).unwrap();
|
||||
let mut b = e.finish().unwrap();
|
||||
b.push(b'x');
|
||||
b
|
||||
};
|
||||
|
||||
let mut writer = Vec::new();
|
||||
let mut decoder = ZlibDecoder::new(writer);
|
||||
let mut consumed_bytes = 0;
|
||||
loop {
|
||||
let n = decoder.write(&compressed[consumed_bytes..]).unwrap();
|
||||
if n == 0 {
|
||||
break;
|
||||
}
|
||||
consumed_bytes += n;
|
||||
}
|
||||
writer = decoder.finish().unwrap();
|
||||
let actual = String::from_utf8(writer).expect("String parsing error");
|
||||
assert_eq!(actual, STR);
|
||||
assert_eq!(&compressed[consumed_bytes..], b"x");
|
||||
}
|
||||
}
|
||||
|
||||
2
third_party/rust/memchr/.cargo-checksum.json
vendored
2
third_party/rust/memchr/.cargo-checksum.json
vendored
@@ -1 +1 @@
|
||||
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"fdeda7d32fa12e4a1589d13c74ae5fd4f1065d0219ba73f8492e28248d84d146","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"51d941627e004588863b137918e908e34c4d599d12e03afd3e489e2bb61e3704","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","build.rs":"5638d9b60d40f44db96767ce32246de42158571364cce92531a85307ac7eda6c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","scripts/make-byte-frequency-table":"21d1ded41fe5a780507bb88e1910d471b4081cc626a48891a408712e45b7b2bf","src/cow.rs":"a23c3b009e5215b5c3ac46627a5dd844235bef0136d76b3fc1eeeb744565c125","src/lib.rs":"9430cd37b13399df8f8c27a752ccdf6422a563e24171d1b4802424f9193a8f37","src/memchr/c.rs":"34f7caf79316f4b03908832fdbd4aff367f2bc30eae291478cc5a0a108ce6e76","src/memchr/fallback.rs":"48764f18b7ff1f00a9ac1c4ed8ec96ad11f7b09b2d062a8ed3fe81160add627d","src/memchr/iter.rs":"61463e7fa22ca8f212c2cbfb882af0c87b0fb1bc6b4676678a4822a581ec1037","src/memchr/mod.rs":"d5bfc881c7c089e1a0825209a4d21c3f792f38c6f16f3bc715d0d539477376b6","src/memchr/naive.rs":"c7453bc99cc4e58eb37cf5a50c88688833e50a270ee1849baefddb8acc0ccd94","src/memchr/x86/avx.rs":"3c2750174ce7ff033daa4096e7961bbee9a2da898068266b27dee22ef8cfddad","src/memchr/x86/mod.rs":"a642d5aefdb7452ead4ab7946b5c6cfb6cc6df636dcd0ebbd6f5e6e1ac8305c0","src/memchr/x86/sse2.rs":"79ede1aba71a655e86eb5873d682c5da26933bffa4fffd7042a2313f18cf4675","src/memchr/x86/sse42.rs":"de4c6f354dbfec170876cddb8d9157b35928f96ed2339a0c5d094cc953a2f52d","src/memmem/byte_frequencies.rs":"2fb85b381c038c1e44ce94294531cdcd339dca48b1e61f41455666e802cbbc9e","src/memmem/genericsimd.rs":"9ce7283db0994438eb6df2bea6ad984e80512b6f643ebae7ae7d82eb5d39fa11","src/memmem/mod.rs":"949fb8e11a23030d59b34fd8c7c196150f133e909a8448705c77a751c436907d","src/memmem/prefilter/fallback.rs":"d32248c41aa09701c2410c52f948bbe009dd1b13a01b444ce0fb8c4b4e404ede","src/memmem/prefilter/genericsimd.rs":"57d5523cf0299b37ef1dd1b351e3d387d5070f2f7ecffc9a9ca66528101ebd3f","src/memmem/prefilter/mod.rs":"ad8b4ac72c025f11d6b641c5fc0888468112758dcdc6bb72b43f932d2005ea4e","src/memmem/prefilter/wasm.rs":"14f684412fca35445a94760a6973d772dfd22d329ebae3b52b525d2a1f3acd63","src/memmem/prefilter/x86/avx.rs":"e344cae36a88b59c07a1c1d395edeb9c636a399e1528ce69b2bc7c94d8d8bb0b","src/memmem/prefilter/x86/mod.rs":"df2d84b23b22574383c281d33671a121b5faf7b1a48dd6f67c3085cd02cd4498","src/memmem/prefilter/x86/sse.rs":"daa648fc2a90d37299803a80d632e8a47a30ce8719d0ac2a2ea2cde3b30b6fef","src/memmem/rabinkarp.rs":"9b44eb092524a51792eba4deaca6c6d3cbc51db98cb548ea4fa7e5d8988cc71a","src/memmem/rarebytes.rs":"571082c71fc3dca5e4304171d41fb3c44e241df6dcd88bac4d7a15b52f9521e0","src/memmem/twoway.rs":"102f8bbb29696d5656cd2f5a1769a3af96d044fb09972881455cfb6424d6b50a","src/memmem/util.rs":"0194d40b912137e2352863af9cc1c0273baf97fdf6b27799628680846c06febd","src/memmem/vector.rs":"96e6f45f8ad11a822c4f18393839225d7f40f898ad657e109ba1b3288af0ef8f","src/memmem/wasm.rs":"87da03c964f054db30cc972d07a74e8902ec1248e2338ecd1dbac430f43fffc2","src/memmem/x86/avx.rs":"de85dbc415603c844baf94fbc92d676a738dd4b99246be468bd5f7be5921b25f","src/memmem/x86/mod.rs":"5012fca41b91caf229278aa221e8dd514ede497fe4938d64562d03fef2fc46e6","src/memmem/x86/sse.rs":"148a40c0952aca8b16d9eb3e724a5b9b60693bc7b2bcc5209bcc43c94faf560a","src/tests/memchr/iter.rs":"b68c7ecdb6222c5dbf61212e6863f78f98ad343868a74cb8612692fc790240b2","src/tests/memchr/memchr.rs":"09589c5899324c9b26ea4513c80389a2ffdf6ddc460031e2ca8da43bd493ae3f","src/tests/memchr/mod.rs":"29e0855f946c7babf603b3d610a29235a56a26a4c867fef0768542388eac4c95","src/tests/memchr/simple.rs":"b9997903ede972272c01c1750522a20692a28488cc7c5cf745ea83ff96d65fe3","src/tests/memchr/testdata.rs":"3e34377fe60eca3687d1ebc66127bd631af27ceaccc8f08806a293199b69a83f","src/tests/mod.rs":"9054a2a2f9af140f305ca29155d942fafbac9fb0874067611adc8a5990546be4","src/tests/x86_64-soft_float.json":"c0e416487fe9b4809534edb7db2a9eff3453dc40d9f1e23362c37f45a77ec717"},"package":"2dffe52ecf27772e601905b7522cb4ef790d2cc203488bbd0e2fe85fcb74566d"}
|
||||
{"files":{"COPYING":"01c266bced4a434da0051174d6bee16a4c82cf634e2679b6155d40d75012390f","Cargo.toml":"8333f270d28547eec87c63083418550a6d9d1de14e9adbcba94ebe0f2a40db61","LICENSE-MIT":"0f96a83840e146e43c0ec96a22ec1f392e0680e6c1226e6f3ba87e0740af850f","README.md":"92a74aaffe011bdaa06fbc34a01686a6eba58ca1322e976759417a547fddf734","UNLICENSE":"7e12e5df4bae12cb21581ba157ced20e1986a0508dd10d0e8a4ab9a4cf94e85c","rustfmt.toml":"1ca600239a27401c4a43f363cf3f38183a212affc1f31bff3ae93234bbaec228","src/arch/aarch64/memchr.rs":"5bb70f915084e629d940dbc322f5b9096b2e658cf63fea8a2f6e7550412e73a0","src/arch/aarch64/mod.rs":"44cd1a614bd66f1e66fc86c541d3c3b8d3a14a644c13e8bf816df3f555eac2d4","src/arch/aarch64/neon/memchr.rs":"e8c00b8fb2c7e2711832ae3cedefe59f32ebedd7dfa4d0ec6de2a566c979daea","src/arch/aarch64/neon/mod.rs":"eab6d56c2b2354db4ee395f40282cd49f97e2ab853547be5de6e65fbe1b2f634","src/arch/aarch64/neon/packedpair.rs":"fbdfdbfaf7b76b234db261fbe55a55c4479d32cdc65a654d60417c2d1c237849","src/arch/all/memchr.rs":"029f77c622e59116d2ce011e8b069118c23e21c37e1561c35c82448d4ad4c430","src/arch/all/mod.rs":"05f3fc2b069682eb1545fc6366d167bb620a454365dac8b8dd6cde6cd64de18a","src/arch/all/packedpair/default_rank.rs":"abffd1b5b8b7a3be95c03dd1105b905c246a379854dc56f1e846ea7c4408f2c7","src/arch/all/packedpair/mod.rs":"292b66042c5b5c78bba33db6526aeae6904db803d601fcdd29032b87b3eb3754","src/arch/all/rabinkarp.rs":"236f69c04b90c14c253ae6c8d9b78150b4a56df75bb50af6d63b15145668b7cc","src/arch/all/shiftor.rs":"0d79117f52a1e4795843603a3bb0b45397df4ad5e4184bbc923658dab9dc3b5f","src/arch/all/twoway.rs":"47c97a265bfbafde90a618946643d3e97dfd9a85f01aa4ac758cd4c1573a450d","src/arch/generic/memchr.rs":"88290761bab740878401e914d71866da6501cdcef53d1249ec6fda4c7f9c12ae","src/arch/generic/mod.rs":"1dd75f61e0ea2563b8205a08aaa7b55500130aa331d18b9e9f995724b66c7a39","src/arch/generic/packedpair.rs":"a4a6efb29877ced9cf4c4e5ae9f36a79f019a16b831f2b9424899a1513d458ad","src/arch/mod.rs":"ca3960b7e2ed28d1b3c121710a870430531aad792f64d4dcb4ca4709d6cbda30","src/arch/wasm32/memchr.rs":"d88ac79f891d8530f505f5035062d3da274a05d66c611480c75430d52709d052","src/arch/wasm32/mod.rs":"a20377aa8fe07d68594879101dc73061e4f51d9c8d812b593b1f376e3c8add79","src/arch/wasm32/simd128/memchr.rs":"bac2c4c43fe710c83a6f2b1118fede043be89dd821d4b532907f129f09fdb5cf","src/arch/wasm32/simd128/mod.rs":"c157b373faedbfd65323be432e25bc411d97aa1b7bc58e76048614c7b2bf3bf6","src/arch/wasm32/simd128/packedpair.rs":"288ba6e5eee6a7a8e5e45c64cff1aa5d72d996c2a6bc228be372c75789f08e45","src/arch/x86_64/avx2/memchr.rs":"576ec0c30f49874f7fd9f6caeb490d56132c0fbbaa4d877b1aa532cafce19323","src/arch/x86_64/avx2/mod.rs":"0033d1b712d0b10f0f273ef9aa8caa53e05e49f4c56a64f39af0b9df97eec584","src/arch/x86_64/avx2/packedpair.rs":"87b69cb4301815906127db4f6370f572c7c5d5dad35c0946c00ad888dbcaec8c","src/arch/x86_64/memchr.rs":"99a1dbe4156d498e6f910d06d3d3b31e7f6d06dff7d13a4c51b33a02b7e2fba9","src/arch/x86_64/mod.rs":"61b2aa876942fd3e78714c2ae21e356c8634545c06995020f443fa50218df027","src/arch/x86_64/sse2/memchr.rs":"68fc3b8f9eddf82192979c3aa11e5141f085cbb993c49c340558719a904679dc","src/arch/x86_64/sse2/mod.rs":"38b70ae52a64ec974dbb91d04d6ca8013d9e06d1fe4af852206bbc2faf1c59aa","src/arch/x86_64/sse2/packedpair.rs":"241ea981d8eea6024769f1c9375f726a9bb9700160c5857781d4befd9f5ef55d","src/cow.rs":"34eddd02cb82cc2d5a2c640891d64efe332dabcc1eea5115764200d8f46b66f7","src/ext.rs":"210f89d1e32211bc64414cbd56e97b4f56ce8a8832d321d77a9fe519634e27ea","src/lib.rs":"614f778a41e88a29ea0ceb8e92c839dbb6b5a61c967f8bfd962975e18f932c71","src/macros.rs":"3e4b39252bfa471fad384160a43f113ebfec7bec46a85d16f006622881dd2081","src/memchr.rs":"6ae779ec5d00f443075316e0105edf30b489a38e2e96325bec14ccecd014145b","src/memmem/mod.rs":"1b0a9d6a681fd0887c677c4fc8d4c8f9719ddde250bdd5ea545365c1a7fb9094","src/memmem/searcher.rs":"7763472d43c66df596ca0697c07db0b4666d38a6a14f64f9f298aaf756c4a715","src/tests/memchr/mod.rs":"269f8e4b4f7f5ea458f27a3c174eb1020ffb2484eeba9464170beb51747df69b","src/tests/memchr/naive.rs":"6a0bee033e5edfb5b1d5769a5fa1c78388f7e9ff7bb91cb67f0ad029289e00e7","src/tests/memchr/prop.rs":"7bf7435087fbf08c5014c216b76575349735590d6b1d0e448921a1dc17bc0ea7","src/tests/mod.rs":"7cec8f809e279310a465c6a7725087970f219a676cc76c83de30c695bb490740","src/tests/packedpair.rs":"b02ec4fbb61a8653cb5f2268c31bc9168b8043347f2abdcc74081acf83b98e15","src/tests/substring/mod.rs":"c7660d10749363ac4687e7da2b5fda60768230425df8ba416c0c28b8d56a5c74","src/tests/substring/naive.rs":"df6f55d165382b8a53762ba4c324926cac13ebc62cde1805f4ce08740b326483","src/tests/substring/prop.rs":"38c15992609b5681a95d838ae6f2933e00a1219f2c971bfba245f96e0729fcdc","src/vector.rs":"3b15d5cb9715f26e655598eacbb8bbba74cbe8ddb2fb969d13aa75f216a118dd"},"package":"78ca9ab1a0babb1e7d5695e3530886289c18cf2f87ec19a575a0abdce112e3a3"}
|
||||
49
third_party/rust/memchr/Cargo.toml
vendored
49
third_party/rust/memchr/Cargo.toml
vendored
@@ -10,44 +10,59 @@
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
edition = "2018"
|
||||
edition = "2021"
|
||||
rust-version = "1.61"
|
||||
name = "memchr"
|
||||
version = "2.5.0"
|
||||
version = "2.7.4"
|
||||
authors = [
|
||||
"Andrew Gallant <jamslam@gmail.com>",
|
||||
"bluss",
|
||||
]
|
||||
build = false
|
||||
exclude = [
|
||||
"/bench",
|
||||
"/.github",
|
||||
"/benchmarks",
|
||||
"/fuzz",
|
||||
"/scripts",
|
||||
"/tmp",
|
||||
]
|
||||
description = "Safe interface to memchr."
|
||||
autobins = false
|
||||
autoexamples = false
|
||||
autotests = false
|
||||
autobenches = false
|
||||
description = """
|
||||
Provides extremely fast (uses SIMD on x86_64, aarch64 and wasm32) routines for
|
||||
1, 2 or 3 byte search and single substring search.
|
||||
"""
|
||||
homepage = "https://github.com/BurntSushi/memchr"
|
||||
documentation = "https://docs.rs/memchr/"
|
||||
readme = "README.md"
|
||||
keywords = [
|
||||
"memchr",
|
||||
"char",
|
||||
"scan",
|
||||
"strchr",
|
||||
"string",
|
||||
"memmem",
|
||||
"substring",
|
||||
"find",
|
||||
"search",
|
||||
]
|
||||
license = "Unlicense/MIT"
|
||||
license = "Unlicense OR MIT"
|
||||
repository = "https://github.com/BurntSushi/memchr"
|
||||
|
||||
[package.metadata.docs.rs]
|
||||
rustdoc-args = ["--generate-link-to-definition"]
|
||||
|
||||
[profile.bench]
|
||||
debug = true
|
||||
debug = 2
|
||||
|
||||
[profile.release]
|
||||
debug = true
|
||||
debug = 2
|
||||
|
||||
[profile.test]
|
||||
opt-level = 3
|
||||
debug = true
|
||||
debug = 2
|
||||
|
||||
[lib]
|
||||
name = "memchr"
|
||||
path = "src/lib.rs"
|
||||
bench = false
|
||||
|
||||
[dependencies.compiler_builtins]
|
||||
@@ -59,20 +74,22 @@ version = "1.0.0"
|
||||
optional = true
|
||||
package = "rustc-std-workspace-core"
|
||||
|
||||
[dependencies.libc]
|
||||
version = "0.2.18"
|
||||
[dependencies.log]
|
||||
version = "0.4.20"
|
||||
optional = true
|
||||
default-features = false
|
||||
|
||||
[dev-dependencies.quickcheck]
|
||||
version = "1.0.3"
|
||||
default-features = false
|
||||
|
||||
[features]
|
||||
alloc = []
|
||||
default = ["std"]
|
||||
libc = []
|
||||
logging = ["dep:log"]
|
||||
rustc-dep-of-std = [
|
||||
"core",
|
||||
"compiler_builtins",
|
||||
]
|
||||
std = []
|
||||
std = ["alloc"]
|
||||
use_std = ["std"]
|
||||
|
||||
125
third_party/rust/memchr/README.md
vendored
125
third_party/rust/memchr/README.md
vendored
@@ -35,30 +35,19 @@ memchr links to the standard library by default, but you can disable the
|
||||
memchr = { version = "2", default-features = false }
|
||||
```
|
||||
|
||||
On x86 platforms, when the `std` feature is disabled, the SSE2 accelerated
|
||||
implementations will be used. When `std` is enabled, AVX accelerated
|
||||
On `x86_64` platforms, when the `std` feature is disabled, the SSE2 accelerated
|
||||
implementations will be used. When `std` is enabled, AVX2 accelerated
|
||||
implementations will be used if the CPU is determined to support it at runtime.
|
||||
|
||||
### Using libc
|
||||
|
||||
`memchr` is a routine that is part of libc, although this crate does not use
|
||||
libc by default. Instead, it uses its own routines, which are either vectorized
|
||||
or generic fallback routines. In general, these should be competitive with
|
||||
what's in libc, although this has not been tested for all architectures. If
|
||||
using `memchr` from libc is desirable and a vectorized routine is not otherwise
|
||||
available in this crate, then enabling the `libc` feature will use libc's
|
||||
version of `memchr`.
|
||||
|
||||
The rest of the functions in this crate, e.g., `memchr2` or `memrchr3` and the
|
||||
substring search routines, will always use the implementations in this crate.
|
||||
One exception to this is `memrchr`, which is an extension in `libc` found on
|
||||
Linux. On Linux, `memrchr` is used in precisely the same scenario as `memchr`,
|
||||
as described above.
|
||||
SIMD accelerated routines are also available on the `wasm32` and `aarch64`
|
||||
targets. The `std` feature is not required to use them.
|
||||
|
||||
When a SIMD version is not available, then this crate falls back to
|
||||
[SWAR](https://en.wikipedia.org/wiki/SWAR) techniques.
|
||||
|
||||
### Minimum Rust version policy
|
||||
|
||||
This crate's minimum supported `rustc` version is `1.41.1`.
|
||||
This crate's minimum supported `rustc` version is `1.61.0`.
|
||||
|
||||
The current policy is that the minimum Rust version required to use this crate
|
||||
can be increased in minor version updates. For example, if `crate 1.0` requires
|
||||
@@ -105,3 +94,103 @@ has a few different algorithms to choose from depending on the situation.
|
||||
is used. If possible, a prefilter based on the "Generic SIMD" algorithm
|
||||
linked above is used to find candidates quickly. A dynamic heuristic is used
|
||||
to detect if the prefilter is ineffective, and if so, disables it.
|
||||
|
||||
|
||||
### Why is the standard library's substring search so much slower?
|
||||
|
||||
We'll start by establishing what the difference in performance actually
|
||||
is. There are two relevant benchmark classes to consider: `prebuilt` and
|
||||
`oneshot`. The `prebuilt` benchmarks are designed to measure---to the extent
|
||||
possible---search time only. That is, the benchmark first starts by building a
|
||||
searcher and then only tracking the time for _using_ the searcher:
|
||||
|
||||
```
|
||||
$ rebar rank benchmarks/record/x86_64/2023-08-26.csv --intersection -e memchr/memmem/prebuilt -e std/memmem/prebuilt
|
||||
Engine Version Geometric mean of speed ratios Benchmark count
|
||||
------ ------- ------------------------------ ---------------
|
||||
rust/memchr/memmem/prebuilt 2.5.0 1.03 53
|
||||
rust/std/memmem/prebuilt 1.73.0-nightly 180dffba1 6.50 53
|
||||
```
|
||||
|
||||
Conversely, the `oneshot` benchmark class measures the time it takes to both
|
||||
build the searcher _and_ use it:
|
||||
|
||||
```
|
||||
$ rebar rank benchmarks/record/x86_64/2023-08-26.csv --intersection -e memchr/memmem/oneshot -e std/memmem/oneshot
|
||||
Engine Version Geometric mean of speed ratios Benchmark count
|
||||
------ ------- ------------------------------ ---------------
|
||||
rust/memchr/memmem/oneshot 2.5.0 1.04 53
|
||||
rust/std/memmem/oneshot 1.73.0-nightly 180dffba1 5.26 53
|
||||
```
|
||||
|
||||
**NOTE:** Replace `rebar rank` with `rebar cmp` in the above commands to
|
||||
explore the specific benchmarks and their differences.
|
||||
|
||||
So in both cases, this crate is quite a bit faster over a broad sampling of
|
||||
benchmarks regardless of whether you measure only search time or search time
|
||||
plus construction time. The difference is a little smaller when you include
|
||||
construction time in your measurements.
|
||||
|
||||
These two different types of benchmark classes make for a nice segue into
|
||||
one reason why the standard library's substring search can be slower: API
|
||||
design. In the standard library, the only APIs available to you require
|
||||
one to re-construct the searcher for every search. While you can benefit
|
||||
from building a searcher once and iterating over all matches in a single
|
||||
string, you cannot reuse that searcher to search other strings. This might
|
||||
come up when, for example, searching a file one line at a time. You'll need
|
||||
to re-build the searcher for every line searched, and this can [really
|
||||
matter][burntsushi-bstr-blog].
|
||||
|
||||
**NOTE:** The `prebuilt` benchmark for the standard library can't actually
|
||||
avoid measuring searcher construction at some level, because there is no API
|
||||
for it. Instead, the benchmark consists of building the searcher once and then
|
||||
finding all matches in a single string via an iterator. This tends to
|
||||
approximate a benchmark where searcher construction isn't measured, but it
|
||||
isn't perfect. While this means the comparison is not strictly
|
||||
apples-to-apples, it does reflect what is maximally possible with the standard
|
||||
library, and thus reflects the best that one could do in a real world scenario.
|
||||
|
||||
While there is more to the story than just API design here, it's important to
|
||||
point out that even if the standard library's substring search were a precise
|
||||
clone of this crate internally, it would still be at a disadvantage in some
|
||||
workloads because of its API. (The same also applies to C's standard library
|
||||
`memmem` function. There is no way to amortize construction of the searcher.
|
||||
You need to pay for it on every call.)
|
||||
|
||||
The other reason for the difference in performance is that
|
||||
the standard library has trouble using SIMD. In particular, substring search
|
||||
is implemented in the `core` library, where platform specific code generally
|
||||
can't exist. That's an issue because in order to utilize SIMD beyond SSE2
|
||||
while maintaining portable binaries, one needs to use [dynamic CPU feature
|
||||
detection][dynamic-cpu], and that in turn requires platform specific code.
|
||||
While there is [an RFC for enabling target feature detection in
|
||||
`core`][core-feature], it doesn't yet exist.
|
||||
|
||||
The bottom line here is that `core`'s substring search implementation is
|
||||
limited to making use of SSE2, but not AVX.
|
||||
|
||||
Still though, this crate does accelerate substring search even when only SSE2
|
||||
is available. The standard library could therefore adopt the techniques in this
|
||||
crate just for SSE2. The reason why that hasn't happened yet isn't totally
|
||||
clear to me. It likely needs a champion to push it through. The standard
|
||||
library tends to be more conservative in these things. With that said, the
|
||||
standard library does use some [SSE2 acceleration on `x86-64`][std-sse2] added
|
||||
in [this PR][std-sse2-pr]. However, at the time of writing, it is only used
|
||||
for short needles and doesn't use the frequency based heuristics found in this
|
||||
crate.
|
||||
|
||||
**NOTE:** Another thing worth mentioning is that the standard library's
|
||||
substring search routine requires that both the needle and haystack have type
|
||||
`&str`. Unless you can assume that your data is valid UTF-8, building a `&str`
|
||||
will come with the overhead of UTF-8 validation. This may in turn result in
|
||||
overall slower searching depending on your workload. In contrast, the `memchr`
|
||||
crate permits both the needle and the haystack to have type `&[u8]`, where
|
||||
`&[u8]` can be created from a `&str` with zero cost. Therefore, the substring
|
||||
search in this crate is strictly more flexible than what the standard library
|
||||
provides.
|
||||
|
||||
[burntsushi-bstr-blog]: https://blog.burntsushi.net/bstr/#motivation-based-on-performance
|
||||
[dynamic-cpu]: https://doc.rust-lang.org/std/arch/index.html#dynamic-cpu-feature-detection
|
||||
[core-feature]: https://github.com/rust-lang/rfcs/pull/3469
|
||||
[std-sse2]: https://github.com/rust-lang/rust/blob/bf9229a2e366b4c311f059014a4aa08af16de5d8/library/core/src/str/pattern.rs#L1719-L1857
|
||||
[std-sse2-pr]: https://github.com/rust-lang/rust/pull/103779
|
||||
|
||||
88
third_party/rust/memchr/build.rs
vendored
88
third_party/rust/memchr/build.rs
vendored
@@ -1,88 +0,0 @@
|
||||
use std::env;
|
||||
|
||||
fn main() {
|
||||
enable_simd_optimizations();
|
||||
enable_libc();
|
||||
}
|
||||
|
||||
// This adds various simd cfgs if this compiler and target support it.
|
||||
//
|
||||
// This can be disabled with RUSTFLAGS="--cfg memchr_disable_auto_simd", but
|
||||
// this is generally only intended for testing.
|
||||
//
|
||||
// On targets which don't feature SSE2, this is disabled, as LLVM wouln't know
|
||||
// how to work with SSE2 operands. Enabling SSE4.2 and AVX on SSE2-only targets
|
||||
// is not a problem. In that case, the fastest option will be chosen at
|
||||
// runtime.
|
||||
fn enable_simd_optimizations() {
|
||||
if is_env_set("CARGO_CFG_MEMCHR_DISABLE_AUTO_SIMD") {
|
||||
return;
|
||||
}
|
||||
let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap();
|
||||
match &arch[..] {
|
||||
"x86_64" => {
|
||||
if !target_has_feature("sse2") {
|
||||
return;
|
||||
}
|
||||
println!("cargo:rustc-cfg=memchr_runtime_simd");
|
||||
println!("cargo:rustc-cfg=memchr_runtime_sse2");
|
||||
println!("cargo:rustc-cfg=memchr_runtime_sse42");
|
||||
println!("cargo:rustc-cfg=memchr_runtime_avx");
|
||||
}
|
||||
"wasm32" | "wasm64" => {
|
||||
if !target_has_feature("simd128") {
|
||||
return;
|
||||
}
|
||||
println!("cargo:rustc-cfg=memchr_runtime_simd");
|
||||
println!("cargo:rustc-cfg=memchr_runtime_wasm128");
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
}
|
||||
|
||||
// This adds a `memchr_libc` cfg if and only if libc can be used, if no other
|
||||
// better option is available.
|
||||
//
|
||||
// This could be performed in the source code, but it's simpler to do it once
|
||||
// here and consolidate it into one cfg knob.
|
||||
//
|
||||
// Basically, we use libc only if its enabled and if we aren't targeting a
|
||||
// known bad platform. For example, wasm32 doesn't have a libc and the
|
||||
// performance of memchr on Windows is seemingly worse than the fallback
|
||||
// implementation.
|
||||
fn enable_libc() {
|
||||
const NO_ARCH: &'static [&'static str] = &["wasm32", "windows"];
|
||||
const NO_ENV: &'static [&'static str] = &["sgx"];
|
||||
|
||||
if !is_feature_set("LIBC") {
|
||||
return;
|
||||
}
|
||||
|
||||
let arch = match env::var("CARGO_CFG_TARGET_ARCH") {
|
||||
Err(_) => return,
|
||||
Ok(arch) => arch,
|
||||
};
|
||||
let env = match env::var("CARGO_CFG_TARGET_ENV") {
|
||||
Err(_) => return,
|
||||
Ok(env) => env,
|
||||
};
|
||||
if NO_ARCH.contains(&&*arch) || NO_ENV.contains(&&*env) {
|
||||
return;
|
||||
}
|
||||
|
||||
println!("cargo:rustc-cfg=memchr_libc");
|
||||
}
|
||||
|
||||
fn is_feature_set(name: &str) -> bool {
|
||||
is_env_set(&format!("CARGO_FEATURE_{}", name))
|
||||
}
|
||||
|
||||
fn is_env_set(name: &str) -> bool {
|
||||
env::var_os(name).is_some()
|
||||
}
|
||||
|
||||
fn target_has_feature(feature: &str) -> bool {
|
||||
env::var("CARGO_CFG_TARGET_FEATURE")
|
||||
.map(|features| features.contains(feature))
|
||||
.unwrap_or(false)
|
||||
}
|
||||
@@ -1,74 +0,0 @@
|
||||
#!/usr/bin/env python
|
||||
|
||||
# This does simple normalized frequency analysis on UTF-8 encoded text. The
|
||||
# result of the analysis is translated to a ranked list, where every byte is
|
||||
# assigned a rank. This list is written to src/freqs.rs.
|
||||
#
|
||||
# Currently, the frequencies are generated from the following corpuses:
|
||||
#
|
||||
# * The CIA world fact book
|
||||
# * The source code of rustc
|
||||
# * Septuaginta
|
||||
|
||||
from __future__ import absolute_import, division, print_function
|
||||
|
||||
import argparse
|
||||
from collections import Counter
|
||||
import sys
|
||||
|
||||
preamble = '''
|
||||
// NOTE: The following code was generated by "scripts/frequencies.py", do not
|
||||
// edit directly
|
||||
'''.lstrip()
|
||||
|
||||
|
||||
def eprint(*args, **kwargs):
|
||||
kwargs['file'] = sys.stderr
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
def main():
|
||||
p = argparse.ArgumentParser()
|
||||
p.add_argument('corpus', metavar='FILE', nargs='+')
|
||||
args = p.parse_args()
|
||||
|
||||
# Get frequency counts of each byte.
|
||||
freqs = Counter()
|
||||
for i in range(0, 256):
|
||||
freqs[i] = 0
|
||||
|
||||
eprint('reading entire corpus into memory')
|
||||
corpus = []
|
||||
for fpath in args.corpus:
|
||||
corpus.append(open(fpath, 'rb').read())
|
||||
|
||||
eprint('computing byte frequencies')
|
||||
for c in corpus:
|
||||
for byte in c:
|
||||
freqs[byte] += 1.0 / float(len(c))
|
||||
|
||||
eprint('writing Rust code')
|
||||
# Get the rank of each byte. A lower rank => lower relative frequency.
|
||||
rank = [0] * 256
|
||||
for i, (byte, _) in enumerate(freqs.most_common()):
|
||||
# print(byte)
|
||||
rank[byte] = 255 - i
|
||||
|
||||
# Forcefully set the highest rank possible for bytes that start multi-byte
|
||||
# UTF-8 sequences. The idea here is that a continuation byte will be more
|
||||
# discerning in a homogenous haystack.
|
||||
for byte in range(0xC0, 0xFF + 1):
|
||||
rank[byte] = 255
|
||||
|
||||
# Now write Rust.
|
||||
olines = ['pub const BYTE_FREQUENCIES: [u8; 256] = [']
|
||||
for byte in range(256):
|
||||
olines.append(' %3d, // %r' % (rank[byte], chr(byte)))
|
||||
olines.append('];')
|
||||
|
||||
print(preamble)
|
||||
print('\n'.join(olines))
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
137
third_party/rust/memchr/src/arch/aarch64/memchr.rs
vendored
Normal file
137
third_party/rust/memchr/src/arch/aarch64/memchr.rs
vendored
Normal file
@@ -0,0 +1,137 @@
|
||||
/*!
|
||||
Wrapper routines for `memchr` and friends.
|
||||
|
||||
These routines choose the best implementation at compile time. (This is
|
||||
different from `x86_64` because it is expected that `neon` is almost always
|
||||
available for `aarch64` targets.)
|
||||
*/
|
||||
|
||||
macro_rules! defraw {
|
||||
($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{
|
||||
#[cfg(target_feature = "neon")]
|
||||
{
|
||||
use crate::arch::aarch64::neon::memchr::$ty;
|
||||
|
||||
debug!("chose neon for {}", stringify!($ty));
|
||||
debug_assert!($ty::is_available());
|
||||
// SAFETY: We know that wasm memchr is always available whenever
|
||||
// code is compiled for `aarch64` with the `neon` target feature
|
||||
// enabled.
|
||||
$ty::new_unchecked($($needles),+).$find($start, $end)
|
||||
}
|
||||
#[cfg(not(target_feature = "neon"))]
|
||||
{
|
||||
use crate::arch::all::memchr::$ty;
|
||||
|
||||
debug!(
|
||||
"no neon feature available, using fallback for {}",
|
||||
stringify!($ty),
|
||||
);
|
||||
$ty::new($($needles),+).$find($start, $end)
|
||||
}
|
||||
}}
|
||||
}
|
||||
|
||||
/// memchr, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memchr_raw(
|
||||
n1: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(One, find_raw, start, end, n1)
|
||||
}
|
||||
|
||||
/// memrchr, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memrchr_raw(
|
||||
n1: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(One, rfind_raw, start, end, n1)
|
||||
}
|
||||
|
||||
/// memchr2, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Two::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memchr2_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(Two, find_raw, start, end, n1, n2)
|
||||
}
|
||||
|
||||
/// memrchr2, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Two::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memrchr2_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(Two, rfind_raw, start, end, n1, n2)
|
||||
}
|
||||
|
||||
/// memchr3, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Three::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memchr3_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
n3: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(Three, find_raw, start, end, n1, n2, n3)
|
||||
}
|
||||
|
||||
/// memrchr3, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Three::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memrchr3_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
n3: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(Three, rfind_raw, start, end, n1, n2, n3)
|
||||
}
|
||||
|
||||
/// Count all matching bytes, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::count_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn count_raw(
|
||||
n1: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> usize {
|
||||
defraw!(One, count_raw, start, end, n1)
|
||||
}
|
||||
7
third_party/rust/memchr/src/arch/aarch64/mod.rs
vendored
Normal file
7
third_party/rust/memchr/src/arch/aarch64/mod.rs
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
/*!
|
||||
Vector algorithms for the `aarch64` target.
|
||||
*/
|
||||
|
||||
pub mod neon;
|
||||
|
||||
pub(crate) mod memchr;
|
||||
1031
third_party/rust/memchr/src/arch/aarch64/neon/memchr.rs
vendored
Normal file
1031
third_party/rust/memchr/src/arch/aarch64/neon/memchr.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6
third_party/rust/memchr/src/arch/aarch64/neon/mod.rs
vendored
Normal file
6
third_party/rust/memchr/src/arch/aarch64/neon/mod.rs
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
/*!
|
||||
Algorithms for the `aarch64` target using 128-bit vectors via NEON.
|
||||
*/
|
||||
|
||||
pub mod memchr;
|
||||
pub mod packedpair;
|
||||
236
third_party/rust/memchr/src/arch/aarch64/neon/packedpair.rs
vendored
Normal file
236
third_party/rust/memchr/src/arch/aarch64/neon/packedpair.rs
vendored
Normal file
@@ -0,0 +1,236 @@
|
||||
/*!
|
||||
A 128-bit vector implementation of the "packed pair" SIMD algorithm.
|
||||
|
||||
The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
|
||||
difference is that it (by default) uses a background distribution of byte
|
||||
frequencies to heuristically select the pair of bytes to search for.
|
||||
|
||||
[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
|
||||
*/
|
||||
|
||||
use core::arch::aarch64::uint8x16_t;
|
||||
|
||||
use crate::arch::{all::packedpair::Pair, generic::packedpair};
|
||||
|
||||
/// A "packed pair" finder that uses 128-bit vector operations.
|
||||
///
|
||||
/// This finder picks two bytes that it believes have high predictive power
|
||||
/// for indicating an overall match of a needle. Depending on whether
|
||||
/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
|
||||
/// where the needle matches or could match. In the prefilter case, candidates
|
||||
/// are reported whenever the [`Pair`] of bytes given matches.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Finder(packedpair::Finder<uint8x16_t>);
|
||||
|
||||
/// A "packed pair" finder that uses 128-bit vector operations.
|
||||
///
|
||||
/// This finder picks two bytes that it believes have high predictive power
|
||||
/// for indicating an overall match of a needle. Depending on whether
|
||||
/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
|
||||
/// where the needle matches or could match. In the prefilter case, candidates
|
||||
/// are reported whenever the [`Pair`] of bytes given matches.
|
||||
impl Finder {
|
||||
/// Create a new pair searcher. The searcher returned can either report
|
||||
/// exact matches of `needle` or act as a prefilter and report candidate
|
||||
/// positions of `needle`.
|
||||
///
|
||||
/// If neon is unavailable in the current environment or if a [`Pair`]
|
||||
/// could not be constructed from the needle given, then `None` is
|
||||
/// returned.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Option<Finder> {
|
||||
Finder::with_pair(needle, Pair::new(needle)?)
|
||||
}
|
||||
|
||||
/// Create a new "packed pair" finder using the pair of bytes given.
|
||||
///
|
||||
/// This constructor permits callers to control precisely which pair of
|
||||
/// bytes is used as a predicate.
|
||||
///
|
||||
/// If neon is unavailable in the current environment, then `None` is
|
||||
/// returned.
|
||||
#[inline]
|
||||
pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
|
||||
if Finder::is_available() {
|
||||
// SAFETY: we check that sse2 is available above. We are also
|
||||
// guaranteed to have needle.len() > 1 because we have a valid
|
||||
// Pair.
|
||||
unsafe { Some(Finder::with_pair_impl(needle, pair)) }
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `Finder` specific to neon vectors and routines.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Same as the safety for `packedpair::Finder::new`, and callers must also
|
||||
/// ensure that neon is available.
|
||||
#[target_feature(enable = "neon")]
|
||||
#[inline]
|
||||
unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
|
||||
let finder = packedpair::Finder::<uint8x16_t>::new(needle, pair);
|
||||
Finder(finder)
|
||||
}
|
||||
|
||||
/// Returns true when this implementation is available in the current
|
||||
/// environment.
|
||||
///
|
||||
/// When this is true, it is guaranteed that [`Finder::with_pair`] will
|
||||
/// return a `Some` value. Similarly, when it is false, it is guaranteed
|
||||
/// that `Finder::with_pair` will return a `None` value. Notice that this
|
||||
/// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
|
||||
/// even when `Finder::is_available` is true, it is not guaranteed that a
|
||||
/// valid [`Pair`] can be found from the needle given.
|
||||
///
|
||||
/// Note also that for the lifetime of a single program, if this returns
|
||||
/// true then it will always return true.
|
||||
#[inline]
|
||||
pub fn is_available() -> bool {
|
||||
#[cfg(target_feature = "neon")]
|
||||
{
|
||||
true
|
||||
}
|
||||
#[cfg(not(target_feature = "neon"))]
|
||||
{
|
||||
false
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a search using neon vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
#[inline]
|
||||
pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
// SAFETY: Building a `Finder` means it's safe to call 'neon' routines.
|
||||
unsafe { self.find_impl(haystack, needle) }
|
||||
}
|
||||
|
||||
/// Execute a search using neon vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
#[inline]
|
||||
pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
|
||||
// SAFETY: Building a `Finder` means it's safe to call 'neon' routines.
|
||||
unsafe { self.find_prefilter_impl(haystack) }
|
||||
}
|
||||
|
||||
/// Execute a search using neon vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// (The target feature safety obligation is automatically fulfilled by
|
||||
/// virtue of being a method on `Finder`, which can only be constructed
|
||||
/// when it is safe to call `neon` routines.)
|
||||
#[target_feature(enable = "neon")]
|
||||
#[inline]
|
||||
unsafe fn find_impl(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
self.0.find(haystack, needle)
|
||||
}
|
||||
|
||||
/// Execute a prefilter search using neon vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// (The target feature safety obligation is automatically fulfilled by
|
||||
/// virtue of being a method on `Finder`, which can only be constructed
|
||||
/// when it is safe to call `neon` routines.)
|
||||
#[target_feature(enable = "neon")]
|
||||
#[inline]
|
||||
unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
|
||||
self.0.find_prefilter(haystack)
|
||||
}
|
||||
|
||||
/// Returns the pair of offsets (into the needle) used to check as a
|
||||
/// predicate before confirming whether a needle exists at a particular
|
||||
/// position.
|
||||
#[inline]
|
||||
pub fn pair(&self) -> &Pair {
|
||||
self.0.pair()
|
||||
}
|
||||
|
||||
/// Returns the minimum haystack length that this `Finder` can search.
|
||||
///
|
||||
/// Using a haystack with length smaller than this in a search will result
|
||||
/// in a panic. The reason for this restriction is that this finder is
|
||||
/// meant to be a low-level component that is part of a larger substring
|
||||
/// strategy. In that sense, it avoids trying to handle all cases and
|
||||
/// instead only handles the cases that it can handle very well.
|
||||
#[inline]
|
||||
pub fn min_haystack_len(&self) -> usize {
|
||||
self.0.min_haystack_len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
|
||||
let f = Finder::new(needle)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find(haystack, needle))
|
||||
}
|
||||
|
||||
define_substring_forward_quickcheck!(find);
|
||||
|
||||
#[test]
|
||||
fn forward_substring() {
|
||||
crate::tests::substring::Runner::new().fwd(find).run()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
let pair = Pair::with_indices(needle, index1, index2)?;
|
||||
let f = Finder::with_pair(needle, pair)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find(haystack, needle))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair_prefilter() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
let pair = Pair::with_indices(needle, index1, index2)?;
|
||||
let f = Finder::with_pair(needle, pair)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find_prefilter(haystack))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
}
|
||||
1022
third_party/rust/memchr/src/arch/all/memchr.rs
vendored
Normal file
1022
third_party/rust/memchr/src/arch/all/memchr.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
234
third_party/rust/memchr/src/arch/all/mod.rs
vendored
Normal file
234
third_party/rust/memchr/src/arch/all/mod.rs
vendored
Normal file
@@ -0,0 +1,234 @@
|
||||
/*!
|
||||
Contains architecture independent routines.
|
||||
|
||||
These routines are often used as a "fallback" implementation when the more
|
||||
specialized architecture dependent routines are unavailable.
|
||||
*/
|
||||
|
||||
pub mod memchr;
|
||||
pub mod packedpair;
|
||||
pub mod rabinkarp;
|
||||
#[cfg(feature = "alloc")]
|
||||
pub mod shiftor;
|
||||
pub mod twoway;
|
||||
|
||||
/// Returns true if and only if `needle` is a prefix of `haystack`.
|
||||
///
|
||||
/// This uses a latency optimized variant of `memcmp` internally which *might*
|
||||
/// make this faster for very short strings.
|
||||
///
|
||||
/// # Inlining
|
||||
///
|
||||
/// This routine is marked `inline(always)`. If you want to call this function
|
||||
/// in a way that is not always inlined, you'll need to wrap a call to it in
|
||||
/// another function that is marked as `inline(never)` or just `inline`.
|
||||
#[inline(always)]
|
||||
pub fn is_prefix(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
needle.len() <= haystack.len()
|
||||
&& is_equal(&haystack[..needle.len()], needle)
|
||||
}
|
||||
|
||||
/// Returns true if and only if `needle` is a suffix of `haystack`.
|
||||
///
|
||||
/// This uses a latency optimized variant of `memcmp` internally which *might*
|
||||
/// make this faster for very short strings.
|
||||
///
|
||||
/// # Inlining
|
||||
///
|
||||
/// This routine is marked `inline(always)`. If you want to call this function
|
||||
/// in a way that is not always inlined, you'll need to wrap a call to it in
|
||||
/// another function that is marked as `inline(never)` or just `inline`.
|
||||
#[inline(always)]
|
||||
pub fn is_suffix(haystack: &[u8], needle: &[u8]) -> bool {
|
||||
needle.len() <= haystack.len()
|
||||
&& is_equal(&haystack[haystack.len() - needle.len()..], needle)
|
||||
}
|
||||
|
||||
/// Compare corresponding bytes in `x` and `y` for equality.
|
||||
///
|
||||
/// That is, this returns true if and only if `x.len() == y.len()` and
|
||||
/// `x[i] == y[i]` for all `0 <= i < x.len()`.
|
||||
///
|
||||
/// # Inlining
|
||||
///
|
||||
/// This routine is marked `inline(always)`. If you want to call this function
|
||||
/// in a way that is not always inlined, you'll need to wrap a call to it in
|
||||
/// another function that is marked as `inline(never)` or just `inline`.
|
||||
///
|
||||
/// # Motivation
|
||||
///
|
||||
/// Why not use slice equality instead? Well, slice equality usually results in
|
||||
/// a call out to the current platform's `libc` which might not be inlineable
|
||||
/// or have other overhead. This routine isn't guaranteed to be a win, but it
|
||||
/// might be in some cases.
|
||||
#[inline(always)]
|
||||
pub fn is_equal(x: &[u8], y: &[u8]) -> bool {
|
||||
if x.len() != y.len() {
|
||||
return false;
|
||||
}
|
||||
// SAFETY: Our pointers are derived directly from borrowed slices which
|
||||
// uphold all of our safety guarantees except for length. We account for
|
||||
// length with the check above.
|
||||
unsafe { is_equal_raw(x.as_ptr(), y.as_ptr(), x.len()) }
|
||||
}
|
||||
|
||||
/// Compare `n` bytes at the given pointers for equality.
|
||||
///
|
||||
/// This returns true if and only if `*x.add(i) == *y.add(i)` for all
|
||||
/// `0 <= i < n`.
|
||||
///
|
||||
/// # Inlining
|
||||
///
|
||||
/// This routine is marked `inline(always)`. If you want to call this function
|
||||
/// in a way that is not always inlined, you'll need to wrap a call to it in
|
||||
/// another function that is marked as `inline(never)` or just `inline`.
|
||||
///
|
||||
/// # Motivation
|
||||
///
|
||||
/// Why not use slice equality instead? Well, slice equality usually results in
|
||||
/// a call out to the current platform's `libc` which might not be inlineable
|
||||
/// or have other overhead. This routine isn't guaranteed to be a win, but it
|
||||
/// might be in some cases.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// * Both `x` and `y` must be valid for reads of up to `n` bytes.
|
||||
/// * Both `x` and `y` must point to an initialized value.
|
||||
/// * Both `x` and `y` must each point to an allocated object and
|
||||
/// must either be in bounds or at most one byte past the end of the
|
||||
/// allocated object. `x` and `y` do not need to point to the same allocated
|
||||
/// object, but they may.
|
||||
/// * Both `x` and `y` must be _derived from_ a pointer to their respective
|
||||
/// allocated objects.
|
||||
/// * The distance between `x` and `x+n` must not overflow `isize`. Similarly
|
||||
/// for `y` and `y+n`.
|
||||
/// * The distance being in bounds must not rely on "wrapping around" the
|
||||
/// address space.
|
||||
#[inline(always)]
|
||||
pub unsafe fn is_equal_raw(
|
||||
mut x: *const u8,
|
||||
mut y: *const u8,
|
||||
mut n: usize,
|
||||
) -> bool {
|
||||
// When we have 4 or more bytes to compare, then proceed in chunks of 4 at
|
||||
// a time using unaligned loads.
|
||||
//
|
||||
// Also, why do 4 byte loads instead of, say, 8 byte loads? The reason is
|
||||
// that this particular version of memcmp is likely to be called with tiny
|
||||
// needles. That means that if we do 8 byte loads, then a higher proportion
|
||||
// of memcmp calls will use the slower variant above. With that said, this
|
||||
// is a hypothesis and is only loosely supported by benchmarks. There's
|
||||
// likely some improvement that could be made here. The main thing here
|
||||
// though is to optimize for latency, not throughput.
|
||||
|
||||
// SAFETY: The caller is responsible for ensuring the pointers we get are
|
||||
// valid and readable for at least `n` bytes. We also do unaligned loads,
|
||||
// so there's no need to ensure we're aligned. (This is justified by this
|
||||
// routine being specifically for short strings.)
|
||||
while n >= 4 {
|
||||
let vx = x.cast::<u32>().read_unaligned();
|
||||
let vy = y.cast::<u32>().read_unaligned();
|
||||
if vx != vy {
|
||||
return false;
|
||||
}
|
||||
x = x.add(4);
|
||||
y = y.add(4);
|
||||
n -= 4;
|
||||
}
|
||||
// If we don't have enough bytes to do 4-byte at a time loads, then
|
||||
// do partial loads. Note that I used to have a byte-at-a-time
|
||||
// loop here and that turned out to be quite a bit slower for the
|
||||
// memmem/pathological/defeat-simple-vector-alphabet benchmark.
|
||||
if n >= 2 {
|
||||
let vx = x.cast::<u16>().read_unaligned();
|
||||
let vy = y.cast::<u16>().read_unaligned();
|
||||
if vx != vy {
|
||||
return false;
|
||||
}
|
||||
x = x.add(2);
|
||||
y = y.add(2);
|
||||
n -= 2;
|
||||
}
|
||||
if n > 0 {
|
||||
if x.read() != y.read() {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
true
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn equals_different_lengths() {
|
||||
assert!(!is_equal(b"", b"a"));
|
||||
assert!(!is_equal(b"a", b""));
|
||||
assert!(!is_equal(b"ab", b"a"));
|
||||
assert!(!is_equal(b"a", b"ab"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn equals_mismatch() {
|
||||
let one_mismatch = [
|
||||
(&b"a"[..], &b"x"[..]),
|
||||
(&b"ab"[..], &b"ax"[..]),
|
||||
(&b"abc"[..], &b"abx"[..]),
|
||||
(&b"abcd"[..], &b"abcx"[..]),
|
||||
(&b"abcde"[..], &b"abcdx"[..]),
|
||||
(&b"abcdef"[..], &b"abcdex"[..]),
|
||||
(&b"abcdefg"[..], &b"abcdefx"[..]),
|
||||
(&b"abcdefgh"[..], &b"abcdefgx"[..]),
|
||||
(&b"abcdefghi"[..], &b"abcdefghx"[..]),
|
||||
(&b"abcdefghij"[..], &b"abcdefghix"[..]),
|
||||
(&b"abcdefghijk"[..], &b"abcdefghijx"[..]),
|
||||
(&b"abcdefghijkl"[..], &b"abcdefghijkx"[..]),
|
||||
(&b"abcdefghijklm"[..], &b"abcdefghijklx"[..]),
|
||||
(&b"abcdefghijklmn"[..], &b"abcdefghijklmx"[..]),
|
||||
];
|
||||
for (x, y) in one_mismatch {
|
||||
assert_eq!(x.len(), y.len(), "lengths should match");
|
||||
assert!(!is_equal(x, y));
|
||||
assert!(!is_equal(y, x));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn equals_yes() {
|
||||
assert!(is_equal(b"", b""));
|
||||
assert!(is_equal(b"a", b"a"));
|
||||
assert!(is_equal(b"ab", b"ab"));
|
||||
assert!(is_equal(b"abc", b"abc"));
|
||||
assert!(is_equal(b"abcd", b"abcd"));
|
||||
assert!(is_equal(b"abcde", b"abcde"));
|
||||
assert!(is_equal(b"abcdef", b"abcdef"));
|
||||
assert!(is_equal(b"abcdefg", b"abcdefg"));
|
||||
assert!(is_equal(b"abcdefgh", b"abcdefgh"));
|
||||
assert!(is_equal(b"abcdefghi", b"abcdefghi"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn prefix() {
|
||||
assert!(is_prefix(b"", b""));
|
||||
assert!(is_prefix(b"a", b""));
|
||||
assert!(is_prefix(b"ab", b""));
|
||||
assert!(is_prefix(b"foo", b"foo"));
|
||||
assert!(is_prefix(b"foobar", b"foo"));
|
||||
|
||||
assert!(!is_prefix(b"foo", b"fob"));
|
||||
assert!(!is_prefix(b"foobar", b"fob"));
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix() {
|
||||
assert!(is_suffix(b"", b""));
|
||||
assert!(is_suffix(b"a", b""));
|
||||
assert!(is_suffix(b"ab", b""));
|
||||
assert!(is_suffix(b"foo", b"foo"));
|
||||
assert!(is_suffix(b"foobar", b"bar"));
|
||||
|
||||
assert!(!is_suffix(b"foo", b"goo"));
|
||||
assert!(!is_suffix(b"foobar", b"gar"));
|
||||
}
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
pub const BYTE_FREQUENCIES: [u8; 256] = [
|
||||
pub(crate) const RANK: [u8; 256] = [
|
||||
55, // '\x00'
|
||||
52, // '\x01'
|
||||
51, // '\x02'
|
||||
359
third_party/rust/memchr/src/arch/all/packedpair/mod.rs
vendored
Normal file
359
third_party/rust/memchr/src/arch/all/packedpair/mod.rs
vendored
Normal file
@@ -0,0 +1,359 @@
|
||||
/*!
|
||||
Provides an architecture independent implementation of the "packed pair"
|
||||
algorithm.
|
||||
|
||||
The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
|
||||
difference is that it (by default) uses a background distribution of byte
|
||||
frequencies to heuristically select the pair of bytes to search for. Note that
|
||||
this module provides an architecture independent version that doesn't do as
|
||||
good of a job keeping the search for candidates inside a SIMD hot path. It
|
||||
however can be good enough in many circumstances.
|
||||
|
||||
[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
|
||||
*/
|
||||
|
||||
use crate::memchr;
|
||||
|
||||
mod default_rank;
|
||||
|
||||
/// An architecture independent "packed pair" finder.
|
||||
///
|
||||
/// This finder picks two bytes that it believes have high predictive power for
|
||||
/// indicating an overall match of a needle. At search time, it reports offsets
|
||||
/// where the needle could match based on whether the pair of bytes it chose
|
||||
/// match.
|
||||
///
|
||||
/// This is architecture independent because it utilizes `memchr` to find the
|
||||
/// occurrence of one of the bytes in the pair, and then checks whether the
|
||||
/// second byte matches. If it does, in the case of [`Finder::find_prefilter`],
|
||||
/// the location at which the needle could match is returned.
|
||||
///
|
||||
/// It is generally preferred to use architecture specific routines for a
|
||||
/// "packed pair" prefilter, but this can be a useful fallback when the
|
||||
/// architecture independent routines are unavailable.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Finder {
|
||||
pair: Pair,
|
||||
byte1: u8,
|
||||
byte2: u8,
|
||||
}
|
||||
|
||||
impl Finder {
|
||||
/// Create a new prefilter that reports possible locations where the given
|
||||
/// needle matches.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Option<Finder> {
|
||||
Finder::with_pair(needle, Pair::new(needle)?)
|
||||
}
|
||||
|
||||
/// Create a new prefilter using the pair given.
|
||||
///
|
||||
/// If the prefilter could not be constructed, then `None` is returned.
|
||||
///
|
||||
/// This constructor permits callers to control precisely which pair of
|
||||
/// bytes is used as a predicate.
|
||||
#[inline]
|
||||
pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
|
||||
let byte1 = needle[usize::from(pair.index1())];
|
||||
let byte2 = needle[usize::from(pair.index2())];
|
||||
// Currently this can never fail so we could just return a Finder,
|
||||
// but it's conceivable this could change.
|
||||
Some(Finder { pair, byte1, byte2 })
|
||||
}
|
||||
|
||||
/// Run this finder on the given haystack as a prefilter.
|
||||
///
|
||||
/// If a candidate match is found, then an offset where the needle *could*
|
||||
/// begin in the haystack is returned.
|
||||
#[inline]
|
||||
pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
|
||||
let mut i = 0;
|
||||
let index1 = usize::from(self.pair.index1());
|
||||
let index2 = usize::from(self.pair.index2());
|
||||
loop {
|
||||
// Use a fast vectorized implementation to skip to the next
|
||||
// occurrence of the rarest byte (heuristically chosen) in the
|
||||
// needle.
|
||||
i += memchr(self.byte1, &haystack[i..])?;
|
||||
let found = i;
|
||||
i += 1;
|
||||
|
||||
// If we can't align our first byte match with the haystack, then a
|
||||
// match is impossible.
|
||||
let aligned1 = match found.checked_sub(index1) {
|
||||
None => continue,
|
||||
Some(aligned1) => aligned1,
|
||||
};
|
||||
|
||||
// Now align the second byte match with the haystack. A mismatch
|
||||
// means that a match is impossible.
|
||||
let aligned2 = match aligned1.checked_add(index2) {
|
||||
None => continue,
|
||||
Some(aligned_index2) => aligned_index2,
|
||||
};
|
||||
if haystack.get(aligned2).map_or(true, |&b| b != self.byte2) {
|
||||
continue;
|
||||
}
|
||||
|
||||
// We've done what we can. There might be a match here.
|
||||
return Some(aligned1);
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the pair of offsets (into the needle) used to check as a
|
||||
/// predicate before confirming whether a needle exists at a particular
|
||||
/// position.
|
||||
#[inline]
|
||||
pub fn pair(&self) -> &Pair {
|
||||
&self.pair
|
||||
}
|
||||
}
|
||||
|
||||
/// A pair of byte offsets into a needle to use as a predicate.
|
||||
///
|
||||
/// This pair is used as a predicate to quickly filter out positions in a
|
||||
/// haystack in which a needle cannot match. In some cases, this pair can even
|
||||
/// be used in vector algorithms such that the vector algorithm only switches
|
||||
/// over to scalar code once this pair has been found.
|
||||
///
|
||||
/// A pair of offsets can be used in both substring search implementations and
|
||||
/// in prefilters. The former will report matches of a needle in a haystack
|
||||
/// where as the latter will only report possible matches of a needle.
|
||||
///
|
||||
/// The offsets are limited each to a maximum of 255 to keep memory usage low.
|
||||
/// Moreover, it's rarely advantageous to create a predicate using offsets
|
||||
/// greater than 255 anyway.
|
||||
///
|
||||
/// The only guarantee enforced on the pair of offsets is that they are not
|
||||
/// equivalent. It is not necessarily the case that `index1 < index2` for
|
||||
/// example. By convention, `index1` corresponds to the byte in the needle
|
||||
/// that is believed to be most the predictive. Note also that because of the
|
||||
/// requirement that the indices be both valid for the needle used to build
|
||||
/// the pair and not equal, it follows that a pair can only be constructed for
|
||||
/// needles with length at least 2.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Pair {
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
}
|
||||
|
||||
impl Pair {
|
||||
/// Create a new pair of offsets from the given needle.
|
||||
///
|
||||
/// If a pair could not be created (for example, if the needle is too
|
||||
/// short), then `None` is returned.
|
||||
///
|
||||
/// This chooses the pair in the needle that is believed to be as
|
||||
/// predictive of an overall match of the needle as possible.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Option<Pair> {
|
||||
Pair::with_ranker(needle, DefaultFrequencyRank)
|
||||
}
|
||||
|
||||
/// Create a new pair of offsets from the given needle and ranker.
|
||||
///
|
||||
/// This permits the caller to choose a background frequency distribution
|
||||
/// with which bytes are selected. The idea is to select a pair of bytes
|
||||
/// that is believed to strongly predict a match in the haystack. This
|
||||
/// usually means selecting bytes that occur rarely in a haystack.
|
||||
///
|
||||
/// If a pair could not be created (for example, if the needle is too
|
||||
/// short), then `None` is returned.
|
||||
#[inline]
|
||||
pub fn with_ranker<R: HeuristicFrequencyRank>(
|
||||
needle: &[u8],
|
||||
ranker: R,
|
||||
) -> Option<Pair> {
|
||||
if needle.len() <= 1 {
|
||||
return None;
|
||||
}
|
||||
// Find the rarest two bytes. We make them distinct indices by
|
||||
// construction. (The actual byte value may be the same in degenerate
|
||||
// cases, but that's OK.)
|
||||
let (mut rare1, mut index1) = (needle[0], 0);
|
||||
let (mut rare2, mut index2) = (needle[1], 1);
|
||||
if ranker.rank(rare2) < ranker.rank(rare1) {
|
||||
core::mem::swap(&mut rare1, &mut rare2);
|
||||
core::mem::swap(&mut index1, &mut index2);
|
||||
}
|
||||
let max = usize::from(core::u8::MAX);
|
||||
for (i, &b) in needle.iter().enumerate().take(max).skip(2) {
|
||||
if ranker.rank(b) < ranker.rank(rare1) {
|
||||
rare2 = rare1;
|
||||
index2 = index1;
|
||||
rare1 = b;
|
||||
index1 = u8::try_from(i).unwrap();
|
||||
} else if b != rare1 && ranker.rank(b) < ranker.rank(rare2) {
|
||||
rare2 = b;
|
||||
index2 = u8::try_from(i).unwrap();
|
||||
}
|
||||
}
|
||||
// While not strictly required for how a Pair is normally used, we
|
||||
// really don't want these to be equivalent. If they were, it would
|
||||
// reduce the effectiveness of candidate searching using these rare
|
||||
// bytes by increasing the rate of false positives.
|
||||
assert_ne!(index1, index2);
|
||||
Some(Pair { index1, index2 })
|
||||
}
|
||||
|
||||
/// Create a new pair using the offsets given for the needle given.
|
||||
///
|
||||
/// This bypasses any sort of heuristic process for choosing the offsets
|
||||
/// and permits the caller to choose the offsets themselves.
|
||||
///
|
||||
/// Indices are limited to valid `u8` values so that a `Pair` uses less
|
||||
/// memory. It is not possible to create a `Pair` with offsets bigger than
|
||||
/// `u8::MAX`. It's likely that such a thing is not needed, but if it is,
|
||||
/// it's suggested to build your own bespoke algorithm because you're
|
||||
/// likely working on a very niche case. (File an issue if this suggestion
|
||||
/// does not make sense to you.)
|
||||
///
|
||||
/// If a pair could not be created (for example, if the needle is too
|
||||
/// short), then `None` is returned.
|
||||
#[inline]
|
||||
pub fn with_indices(
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Pair> {
|
||||
// While not strictly required for how a Pair is normally used, we
|
||||
// really don't want these to be equivalent. If they were, it would
|
||||
// reduce the effectiveness of candidate searching using these rare
|
||||
// bytes by increasing the rate of false positives.
|
||||
if index1 == index2 {
|
||||
return None;
|
||||
}
|
||||
// Similarly, invalid indices means the Pair is invalid too.
|
||||
if usize::from(index1) >= needle.len() {
|
||||
return None;
|
||||
}
|
||||
if usize::from(index2) >= needle.len() {
|
||||
return None;
|
||||
}
|
||||
Some(Pair { index1, index2 })
|
||||
}
|
||||
|
||||
/// Returns the first offset of the pair.
|
||||
#[inline]
|
||||
pub fn index1(&self) -> u8 {
|
||||
self.index1
|
||||
}
|
||||
|
||||
/// Returns the second offset of the pair.
|
||||
#[inline]
|
||||
pub fn index2(&self) -> u8 {
|
||||
self.index2
|
||||
}
|
||||
}
|
||||
|
||||
/// This trait allows the user to customize the heuristic used to determine the
|
||||
/// relative frequency of a given byte in the dataset being searched.
|
||||
///
|
||||
/// The use of this trait can have a dramatic impact on performance depending
|
||||
/// on the type of data being searched. The details of why are explained in the
|
||||
/// docs of [`crate::memmem::Prefilter`]. To summarize, the core algorithm uses
|
||||
/// a prefilter to quickly identify candidate matches that are later verified
|
||||
/// more slowly. This prefilter is implemented in terms of trying to find
|
||||
/// `rare` bytes at specific offsets that will occur less frequently in the
|
||||
/// dataset. While the concept of a `rare` byte is similar for most datasets,
|
||||
/// there are some specific datasets (like binary executables) that have
|
||||
/// dramatically different byte distributions. For these datasets customizing
|
||||
/// the byte frequency heuristic can have a massive impact on performance, and
|
||||
/// might even need to be done at runtime.
|
||||
///
|
||||
/// The default implementation of `HeuristicFrequencyRank` reads from the
|
||||
/// static frequency table defined in `src/memmem/byte_frequencies.rs`. This
|
||||
/// is optimal for most inputs, so if you are unsure of the impact of using a
|
||||
/// custom `HeuristicFrequencyRank` you should probably just use the default.
|
||||
///
|
||||
/// # Example
|
||||
///
|
||||
/// ```
|
||||
/// use memchr::{
|
||||
/// arch::all::packedpair::HeuristicFrequencyRank,
|
||||
/// memmem::FinderBuilder,
|
||||
/// };
|
||||
///
|
||||
/// /// A byte-frequency table that is good for scanning binary executables.
|
||||
/// struct Binary;
|
||||
///
|
||||
/// impl HeuristicFrequencyRank for Binary {
|
||||
/// fn rank(&self, byte: u8) -> u8 {
|
||||
/// const TABLE: [u8; 256] = [
|
||||
/// 255, 128, 61, 43, 50, 41, 27, 28, 57, 15, 21, 13, 24, 17, 17,
|
||||
/// 89, 58, 16, 11, 7, 14, 23, 7, 6, 24, 9, 6, 5, 9, 4, 7, 16,
|
||||
/// 68, 11, 9, 6, 88, 7, 4, 4, 23, 9, 4, 8, 8, 5, 10, 4, 30, 11,
|
||||
/// 9, 24, 11, 5, 5, 5, 19, 11, 6, 17, 9, 9, 6, 8,
|
||||
/// 48, 58, 11, 14, 53, 40, 9, 9, 254, 35, 3, 6, 52, 23, 6, 6, 27,
|
||||
/// 4, 7, 11, 14, 13, 10, 11, 11, 5, 2, 10, 16, 12, 6, 19,
|
||||
/// 19, 20, 5, 14, 16, 31, 19, 7, 14, 20, 4, 4, 19, 8, 18, 20, 24,
|
||||
/// 1, 25, 19, 58, 29, 10, 5, 15, 20, 2, 2, 9, 4, 3, 5,
|
||||
/// 51, 11, 4, 53, 23, 39, 6, 4, 13, 81, 4, 186, 5, 67, 3, 2, 15,
|
||||
/// 0, 0, 1, 3, 2, 0, 0, 5, 0, 0, 0, 2, 0, 0, 0,
|
||||
/// 12, 2, 1, 1, 3, 1, 1, 1, 6, 1, 2, 1, 3, 1, 1, 2, 9, 1, 1, 0,
|
||||
/// 2, 2, 4, 4, 11, 6, 7, 3, 6, 9, 4, 5,
|
||||
/// 46, 18, 8, 18, 17, 3, 8, 20, 16, 10, 3, 7, 175, 4, 6, 7, 13,
|
||||
/// 3, 7, 3, 3, 1, 3, 3, 10, 3, 1, 5, 2, 0, 1, 2,
|
||||
/// 16, 3, 5, 1, 6, 1, 1, 2, 58, 20, 3, 14, 12, 2, 1, 3, 16, 3, 5,
|
||||
/// 8, 3, 1, 8, 6, 17, 6, 5, 3, 8, 6, 13, 175,
|
||||
/// ];
|
||||
/// TABLE[byte as usize]
|
||||
/// }
|
||||
/// }
|
||||
/// // Create a new finder with the custom heuristic.
|
||||
/// let finder = FinderBuilder::new()
|
||||
/// .build_forward_with_ranker(Binary, b"\x00\x00\xdd\xdd");
|
||||
/// // Find needle with custom heuristic.
|
||||
/// assert!(finder.find(b"\x00\x00\x00\xdd\xdd").is_some());
|
||||
/// ```
|
||||
pub trait HeuristicFrequencyRank {
|
||||
/// Return the heuristic frequency rank of the given byte. A lower rank
|
||||
/// means the byte is believed to occur less frequently in the haystack.
|
||||
///
|
||||
/// Some uses of this heuristic may treat arbitrary absolute rank values as
|
||||
/// significant. For example, an implementation detail in this crate may
|
||||
/// determine that heuristic prefilters are inappropriate if every byte in
|
||||
/// the needle has a "high" rank.
|
||||
fn rank(&self, byte: u8) -> u8;
|
||||
}
|
||||
|
||||
/// The default byte frequency heuristic that is good for most haystacks.
|
||||
pub(crate) struct DefaultFrequencyRank;
|
||||
|
||||
impl HeuristicFrequencyRank for DefaultFrequencyRank {
|
||||
fn rank(&self, byte: u8) -> u8 {
|
||||
self::default_rank::RANK[usize::from(byte)]
|
||||
}
|
||||
}
|
||||
|
||||
/// This permits passing any implementation of `HeuristicFrequencyRank` as a
|
||||
/// borrowed version of itself.
|
||||
impl<'a, R> HeuristicFrequencyRank for &'a R
|
||||
where
|
||||
R: HeuristicFrequencyRank,
|
||||
{
|
||||
fn rank(&self, byte: u8) -> u8 {
|
||||
(**self).rank(byte)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
_index1: u8,
|
||||
_index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
// We ignore the index positions requested since it winds up making
|
||||
// this test too slow overall.
|
||||
let f = Finder::new(needle)?;
|
||||
Some(f.find_prefilter(haystack))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
}
|
||||
390
third_party/rust/memchr/src/arch/all/rabinkarp.rs
vendored
Normal file
390
third_party/rust/memchr/src/arch/all/rabinkarp.rs
vendored
Normal file
@@ -0,0 +1,390 @@
|
||||
/*!
|
||||
An implementation of the [Rabin-Karp substring search algorithm][rabinkarp].
|
||||
|
||||
Rabin-Karp works by creating a hash of the needle provided and then computing
|
||||
a rolling hash for each needle sized window in the haystack. When the rolling
|
||||
hash matches the hash of the needle, a byte-wise comparison is done to check
|
||||
if a match exists. The worst case time complexity of Rabin-Karp is `O(m *
|
||||
n)` where `m ~ len(needle)` and `n ~ len(haystack)`. Its worst case space
|
||||
complexity is constant.
|
||||
|
||||
The main utility of Rabin-Karp is that the searcher can be constructed very
|
||||
quickly with very little memory. This makes it especially useful when searching
|
||||
for small needles in small haystacks, as it might finish its search before a
|
||||
beefier algorithm (like Two-Way) even starts.
|
||||
|
||||
[rabinkarp]: https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
|
||||
*/
|
||||
|
||||
/*
|
||||
(This was the comment I wrote for this module originally when it was not
|
||||
exposed. The comment still looks useful, but it's a bit in the weeds, so it's
|
||||
not public itself.)
|
||||
|
||||
This module implements the classical Rabin-Karp substring search algorithm,
|
||||
with no extra frills. While its use would seem to break our time complexity
|
||||
guarantee of O(m+n) (RK's time complexity is O(mn)), we are careful to only
|
||||
ever use RK on a constant subset of haystacks. The main point here is that
|
||||
RK has good latency properties for small needles/haystacks. It's very quick
|
||||
to compute a needle hash and zip through the haystack when compared to
|
||||
initializing Two-Way, for example. And this is especially useful for cases
|
||||
where the haystack is just too short for vector instructions to do much good.
|
||||
|
||||
The hashing function used here is the same one recommended by ESMAJ.
|
||||
|
||||
Another choice instead of Rabin-Karp would be Shift-Or. But its latency
|
||||
isn't quite as good since its preprocessing time is a bit more expensive
|
||||
(both in practice and in theory). However, perhaps Shift-Or has a place
|
||||
somewhere else for short patterns. I think the main problem is that it
|
||||
requires space proportional to the alphabet and the needle. If we, for
|
||||
example, supported needles up to length 16, then the total table size would be
|
||||
len(alphabet)*size_of::<u16>()==512 bytes. Which isn't exactly small, and it's
|
||||
probably bad to put that on the stack. So ideally, we'd throw it on the heap,
|
||||
but we'd really like to write as much code without using alloc/std as possible.
|
||||
But maybe it's worth the special casing. It's a TODO to benchmark.
|
||||
|
||||
Wikipedia has a decent explanation, if a bit heavy on the theory:
|
||||
https://en.wikipedia.org/wiki/Rabin%E2%80%93Karp_algorithm
|
||||
|
||||
But ESMAJ provides something a bit more concrete:
|
||||
http://www-igm.univ-mlv.fr/~lecroq/string/node5.html
|
||||
|
||||
Finally, aho-corasick uses Rabin-Karp for multiple pattern match in some cases:
|
||||
https://github.com/BurntSushi/aho-corasick/blob/3852632f10587db0ff72ef29e88d58bf305a0946/src/packed/rabinkarp.rs
|
||||
*/
|
||||
|
||||
use crate::ext::Pointer;
|
||||
|
||||
/// A forward substring searcher using the Rabin-Karp algorithm.
|
||||
///
|
||||
/// Note that, as a lower level API, a `Finder` does not have access to the
|
||||
/// needle it was constructed with. For this reason, executing a search
|
||||
/// with a `Finder` requires passing both the needle and the haystack,
|
||||
/// where the needle is exactly equivalent to the one given to the `Finder`
|
||||
/// at construction time. This design was chosen so that callers can have
|
||||
/// more precise control over where and how many times a needle is stored.
|
||||
/// For example, in cases where Rabin-Karp is just one of several possible
|
||||
/// substring search algorithms.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct Finder {
|
||||
/// The actual hash.
|
||||
hash: Hash,
|
||||
/// The factor needed to multiply a byte by in order to subtract it from
|
||||
/// the hash. It is defined to be 2^(n-1) (using wrapping exponentiation),
|
||||
/// where n is the length of the needle. This is how we "remove" a byte
|
||||
/// from the hash once the hash window rolls past it.
|
||||
hash_2pow: u32,
|
||||
}
|
||||
|
||||
impl Finder {
|
||||
/// Create a new Rabin-Karp forward searcher for the given `needle`.
|
||||
///
|
||||
/// The needle may be empty. The empty needle matches at every byte offset.
|
||||
///
|
||||
/// Note that callers must pass the same needle to all search calls using
|
||||
/// this `Finder`.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Finder {
|
||||
let mut s = Finder { hash: Hash::new(), hash_2pow: 1 };
|
||||
let first_byte = match needle.get(0) {
|
||||
None => return s,
|
||||
Some(&first_byte) => first_byte,
|
||||
};
|
||||
s.hash.add(first_byte);
|
||||
for b in needle.iter().copied().skip(1) {
|
||||
s.hash.add(b);
|
||||
s.hash_2pow = s.hash_2pow.wrapping_shl(1);
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Return the first occurrence of the `needle` in the `haystack`
|
||||
/// given. If no such occurrence exists, then `None` is returned.
|
||||
///
|
||||
/// The `needle` provided must match the needle given to this finder at
|
||||
/// construction time.
|
||||
///
|
||||
/// The maximum value this can return is `haystack.len()`, which can only
|
||||
/// occur when the needle and haystack both have length zero. Otherwise,
|
||||
/// for non-empty haystacks, the maximum value is `haystack.len() - 1`.
|
||||
#[inline]
|
||||
pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
unsafe {
|
||||
let hstart = haystack.as_ptr();
|
||||
let hend = hstart.add(haystack.len());
|
||||
let nstart = needle.as_ptr();
|
||||
let nend = nstart.add(needle.len());
|
||||
let found = self.find_raw(hstart, hend, nstart, nend)?;
|
||||
Some(found.distance(hstart))
|
||||
}
|
||||
}
|
||||
|
||||
/// Like `find`, but accepts and returns raw pointers.
|
||||
///
|
||||
/// When a match is found, the pointer returned is guaranteed to be
|
||||
/// `>= start` and `<= end`. The pointer returned is only ever equivalent
|
||||
/// to `end` when both the needle and haystack are empty. (That is, the
|
||||
/// empty string matches the empty string.)
|
||||
///
|
||||
/// This routine is useful if you're already using raw pointers and would
|
||||
/// like to avoid converting back to a slice before executing a search.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Note that `start` and `end` below refer to both pairs of pointers given
|
||||
/// to this routine. That is, the conditions apply to both `hstart`/`hend`
|
||||
/// and `nstart`/`nend`.
|
||||
///
|
||||
/// * Both `start` and `end` must be valid for reads.
|
||||
/// * Both `start` and `end` must point to an initialized value.
|
||||
/// * Both `start` and `end` must point to the same allocated object and
|
||||
/// must either be in bounds or at most one byte past the end of the
|
||||
/// allocated object.
|
||||
/// * Both `start` and `end` must be _derived from_ a pointer to the same
|
||||
/// object.
|
||||
/// * The distance between `start` and `end` must not overflow `isize`.
|
||||
/// * The distance being in bounds must not rely on "wrapping around" the
|
||||
/// address space.
|
||||
/// * It must be the case that `start <= end`.
|
||||
#[inline]
|
||||
pub unsafe fn find_raw(
|
||||
&self,
|
||||
hstart: *const u8,
|
||||
hend: *const u8,
|
||||
nstart: *const u8,
|
||||
nend: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
let hlen = hend.distance(hstart);
|
||||
let nlen = nend.distance(nstart);
|
||||
if nlen > hlen {
|
||||
return None;
|
||||
}
|
||||
let mut cur = hstart;
|
||||
let end = hend.sub(nlen);
|
||||
let mut hash = Hash::forward(cur, cur.add(nlen));
|
||||
loop {
|
||||
if self.hash == hash && is_equal_raw(cur, nstart, nlen) {
|
||||
return Some(cur);
|
||||
}
|
||||
if cur >= end {
|
||||
return None;
|
||||
}
|
||||
hash.roll(self, cur.read(), cur.add(nlen).read());
|
||||
cur = cur.add(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A reverse substring searcher using the Rabin-Karp algorithm.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct FinderRev(Finder);
|
||||
|
||||
impl FinderRev {
|
||||
/// Create a new Rabin-Karp reverse searcher for the given `needle`.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> FinderRev {
|
||||
let mut s = FinderRev(Finder { hash: Hash::new(), hash_2pow: 1 });
|
||||
let last_byte = match needle.last() {
|
||||
None => return s,
|
||||
Some(&last_byte) => last_byte,
|
||||
};
|
||||
s.0.hash.add(last_byte);
|
||||
for b in needle.iter().rev().copied().skip(1) {
|
||||
s.0.hash.add(b);
|
||||
s.0.hash_2pow = s.0.hash_2pow.wrapping_shl(1);
|
||||
}
|
||||
s
|
||||
}
|
||||
|
||||
/// Return the last occurrence of the `needle` in the `haystack`
|
||||
/// given. If no such occurrence exists, then `None` is returned.
|
||||
///
|
||||
/// The `needle` provided must match the needle given to this finder at
|
||||
/// construction time.
|
||||
///
|
||||
/// The maximum value this can return is `haystack.len()`, which can only
|
||||
/// occur when the needle and haystack both have length zero. Otherwise,
|
||||
/// for non-empty haystacks, the maximum value is `haystack.len() - 1`.
|
||||
#[inline]
|
||||
pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
unsafe {
|
||||
let hstart = haystack.as_ptr();
|
||||
let hend = hstart.add(haystack.len());
|
||||
let nstart = needle.as_ptr();
|
||||
let nend = nstart.add(needle.len());
|
||||
let found = self.rfind_raw(hstart, hend, nstart, nend)?;
|
||||
Some(found.distance(hstart))
|
||||
}
|
||||
}
|
||||
|
||||
/// Like `rfind`, but accepts and returns raw pointers.
|
||||
///
|
||||
/// When a match is found, the pointer returned is guaranteed to be
|
||||
/// `>= start` and `<= end`. The pointer returned is only ever equivalent
|
||||
/// to `end` when both the needle and haystack are empty. (That is, the
|
||||
/// empty string matches the empty string.)
|
||||
///
|
||||
/// This routine is useful if you're already using raw pointers and would
|
||||
/// like to avoid converting back to a slice before executing a search.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Note that `start` and `end` below refer to both pairs of pointers given
|
||||
/// to this routine. That is, the conditions apply to both `hstart`/`hend`
|
||||
/// and `nstart`/`nend`.
|
||||
///
|
||||
/// * Both `start` and `end` must be valid for reads.
|
||||
/// * Both `start` and `end` must point to an initialized value.
|
||||
/// * Both `start` and `end` must point to the same allocated object and
|
||||
/// must either be in bounds or at most one byte past the end of the
|
||||
/// allocated object.
|
||||
/// * Both `start` and `end` must be _derived from_ a pointer to the same
|
||||
/// object.
|
||||
/// * The distance between `start` and `end` must not overflow `isize`.
|
||||
/// * The distance being in bounds must not rely on "wrapping around" the
|
||||
/// address space.
|
||||
/// * It must be the case that `start <= end`.
|
||||
#[inline]
|
||||
pub unsafe fn rfind_raw(
|
||||
&self,
|
||||
hstart: *const u8,
|
||||
hend: *const u8,
|
||||
nstart: *const u8,
|
||||
nend: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
let hlen = hend.distance(hstart);
|
||||
let nlen = nend.distance(nstart);
|
||||
if nlen > hlen {
|
||||
return None;
|
||||
}
|
||||
let mut cur = hend.sub(nlen);
|
||||
let start = hstart;
|
||||
let mut hash = Hash::reverse(cur, cur.add(nlen));
|
||||
loop {
|
||||
if self.0.hash == hash && is_equal_raw(cur, nstart, nlen) {
|
||||
return Some(cur);
|
||||
}
|
||||
if cur <= start {
|
||||
return None;
|
||||
}
|
||||
cur = cur.sub(1);
|
||||
hash.roll(&self.0, cur.add(nlen).read(), cur.read());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Whether RK is believed to be very fast for the given needle/haystack.
|
||||
#[inline]
|
||||
pub(crate) fn is_fast(haystack: &[u8], _needle: &[u8]) -> bool {
|
||||
haystack.len() < 16
|
||||
}
|
||||
|
||||
/// A Rabin-Karp hash. This might represent the hash of a needle, or the hash
|
||||
/// of a rolling window in the haystack.
|
||||
#[derive(Clone, Copy, Debug, Default, Eq, PartialEq)]
|
||||
struct Hash(u32);
|
||||
|
||||
impl Hash {
|
||||
/// Create a new hash that represents the empty string.
|
||||
#[inline(always)]
|
||||
fn new() -> Hash {
|
||||
Hash(0)
|
||||
}
|
||||
|
||||
/// Create a new hash from the bytes given for use in forward searches.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The given pointers must be valid to read from within their range.
|
||||
#[inline(always)]
|
||||
unsafe fn forward(mut start: *const u8, end: *const u8) -> Hash {
|
||||
let mut hash = Hash::new();
|
||||
while start < end {
|
||||
hash.add(start.read());
|
||||
start = start.add(1);
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Create a new hash from the bytes given for use in reverse searches.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// The given pointers must be valid to read from within their range.
|
||||
#[inline(always)]
|
||||
unsafe fn reverse(start: *const u8, mut end: *const u8) -> Hash {
|
||||
let mut hash = Hash::new();
|
||||
while start < end {
|
||||
end = end.sub(1);
|
||||
hash.add(end.read());
|
||||
}
|
||||
hash
|
||||
}
|
||||
|
||||
/// Add 'new' and remove 'old' from this hash. The given needle hash should
|
||||
/// correspond to the hash computed for the needle being searched for.
|
||||
///
|
||||
/// This is meant to be used when the rolling window of the haystack is
|
||||
/// advanced.
|
||||
#[inline(always)]
|
||||
fn roll(&mut self, finder: &Finder, old: u8, new: u8) {
|
||||
self.del(finder, old);
|
||||
self.add(new);
|
||||
}
|
||||
|
||||
/// Add a byte to this hash.
|
||||
#[inline(always)]
|
||||
fn add(&mut self, byte: u8) {
|
||||
self.0 = self.0.wrapping_shl(1).wrapping_add(u32::from(byte));
|
||||
}
|
||||
|
||||
/// Remove a byte from this hash. The given needle hash should correspond
|
||||
/// to the hash computed for the needle being searched for.
|
||||
#[inline(always)]
|
||||
fn del(&mut self, finder: &Finder, byte: u8) {
|
||||
let factor = finder.hash_2pow;
|
||||
self.0 = self.0.wrapping_sub(u32::from(byte).wrapping_mul(factor));
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns true when `x[i] == y[i]` for all `0 <= i < n`.
|
||||
///
|
||||
/// We forcefully don't inline this to hint at the compiler that it is unlikely
|
||||
/// to be called. This causes the inner rabinkarp loop above to be a bit
|
||||
/// tighter and leads to some performance improvement. See the
|
||||
/// memmem/krate/prebuilt/sliceslice-words/words benchmark.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Same as `crate::arch::all::is_equal_raw`.
|
||||
#[cold]
|
||||
#[inline(never)]
|
||||
unsafe fn is_equal_raw(x: *const u8, y: *const u8, n: usize) -> bool {
|
||||
crate::arch::all::is_equal_raw(x, y, n)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
define_substring_forward_quickcheck!(|h, n| Some(
|
||||
Finder::new(n).find(h, n)
|
||||
));
|
||||
define_substring_reverse_quickcheck!(|h, n| Some(
|
||||
FinderRev::new(n).rfind(h, n)
|
||||
));
|
||||
|
||||
#[test]
|
||||
fn forward() {
|
||||
crate::tests::substring::Runner::new()
|
||||
.fwd(|h, n| Some(Finder::new(n).find(h, n)))
|
||||
.run();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reverse() {
|
||||
crate::tests::substring::Runner::new()
|
||||
.rev(|h, n| Some(FinderRev::new(n).rfind(h, n)))
|
||||
.run();
|
||||
}
|
||||
}
|
||||
89
third_party/rust/memchr/src/arch/all/shiftor.rs
vendored
Normal file
89
third_party/rust/memchr/src/arch/all/shiftor.rs
vendored
Normal file
@@ -0,0 +1,89 @@
|
||||
/*!
|
||||
An implementation of the [Shift-Or substring search algorithm][shiftor].
|
||||
|
||||
[shiftor]: https://en.wikipedia.org/wiki/Bitap_algorithm
|
||||
*/
|
||||
|
||||
use alloc::boxed::Box;
|
||||
|
||||
/// The type of our mask.
|
||||
///
|
||||
/// While we don't expose anyway to configure this in the public API, if one
|
||||
/// really needs less memory usage or support for longer needles, then it is
|
||||
/// suggested to copy the code from this module and modify it to fit your
|
||||
/// needs. The code below is written to be correct regardless of whether Mask
|
||||
/// is a u8, u16, u32, u64 or u128.
|
||||
type Mask = u16;
|
||||
|
||||
/// A forward substring searcher using the Shift-Or algorithm.
|
||||
#[derive(Debug)]
|
||||
pub struct Finder {
|
||||
masks: Box<[Mask; 256]>,
|
||||
needle_len: usize,
|
||||
}
|
||||
|
||||
impl Finder {
|
||||
const MAX_NEEDLE_LEN: usize = (Mask::BITS - 1) as usize;
|
||||
|
||||
/// Create a new Shift-Or forward searcher for the given `needle`.
|
||||
///
|
||||
/// The needle may be empty. The empty needle matches at every byte offset.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Option<Finder> {
|
||||
let needle_len = needle.len();
|
||||
if needle_len > Finder::MAX_NEEDLE_LEN {
|
||||
// A match is found when bit 7 is set in 'result' in the search
|
||||
// routine below. So our needle can't be bigger than 7. We could
|
||||
// permit bigger needles by using u16, u32 or u64 for our mask
|
||||
// entries. But this is all we need for this example.
|
||||
return None;
|
||||
}
|
||||
let mut searcher = Finder { masks: Box::from([!0; 256]), needle_len };
|
||||
for (i, &byte) in needle.iter().enumerate() {
|
||||
searcher.masks[usize::from(byte)] &= !(1 << i);
|
||||
}
|
||||
Some(searcher)
|
||||
}
|
||||
|
||||
/// Return the first occurrence of the needle given to `Finder::new` in
|
||||
/// the `haystack` given. If no such occurrence exists, then `None` is
|
||||
/// returned.
|
||||
///
|
||||
/// Unlike most other substring search implementations in this crate, this
|
||||
/// finder does not require passing the needle at search time. A match can
|
||||
/// be determined without the needle at all since the required information
|
||||
/// is already encoded into this finder at construction time.
|
||||
///
|
||||
/// The maximum value this can return is `haystack.len()`, which can only
|
||||
/// occur when the needle and haystack both have length zero. Otherwise,
|
||||
/// for non-empty haystacks, the maximum value is `haystack.len() - 1`.
|
||||
#[inline]
|
||||
pub fn find(&self, haystack: &[u8]) -> Option<usize> {
|
||||
if self.needle_len == 0 {
|
||||
return Some(0);
|
||||
}
|
||||
let mut result = !1;
|
||||
for (i, &byte) in haystack.iter().enumerate() {
|
||||
result |= self.masks[usize::from(byte)];
|
||||
result <<= 1;
|
||||
if result & (1 << self.needle_len) == 0 {
|
||||
return Some(i + 1 - self.needle_len);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
define_substring_forward_quickcheck!(|h, n| Some(Finder::new(n)?.find(h)));
|
||||
|
||||
#[test]
|
||||
fn forward() {
|
||||
crate::tests::substring::Runner::new()
|
||||
.fwd(|h, n| Some(Finder::new(n)?.find(h)))
|
||||
.run();
|
||||
}
|
||||
}
|
||||
@@ -1,31 +1,62 @@
|
||||
/*!
|
||||
An implementation of the [Two-Way substring search algorithm][two-way].
|
||||
|
||||
[`Finder`] can be built for forward searches, while [`FinderRev`] can be built
|
||||
for reverse searches.
|
||||
|
||||
Two-Way makes for a nice general purpose substring search algorithm because of
|
||||
its time and space complexity properties. It also performs well in practice.
|
||||
Namely, with `m = len(needle)` and `n = len(haystack)`, Two-Way takes `O(m)`
|
||||
time to create a finder, `O(1)` space and `O(n)` search time. In other words,
|
||||
the preprocessing step is quick, doesn't require any heap memory and the worst
|
||||
case search time is guaranteed to be linear in the haystack regardless of the
|
||||
size of the needle.
|
||||
|
||||
While vector algorithms will usually beat Two-Way handedly, vector algorithms
|
||||
also usually have pathological or edge cases that are better handled by Two-Way.
|
||||
Moreover, not all targets support vector algorithms or implementations for them
|
||||
simply may not exist yet.
|
||||
|
||||
Two-Way can be found in the `memmem` implementations in at least [GNU libc] and
|
||||
[musl].
|
||||
|
||||
[two-way]: https://en.wikipedia.org/wiki/Two-way_string-matching_algorithm
|
||||
[GNU libc]: https://www.gnu.org/software/libc/
|
||||
[musl]: https://www.musl-libc.org/
|
||||
*/
|
||||
|
||||
use core::cmp;
|
||||
|
||||
use crate::memmem::{prefilter::Pre, util};
|
||||
use crate::{
|
||||
arch::all::{is_prefix, is_suffix},
|
||||
memmem::Pre,
|
||||
};
|
||||
|
||||
/// Two-Way search in the forward direction.
|
||||
/// A forward substring searcher that uses the Two-Way algorithm.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct Forward(TwoWay);
|
||||
pub struct Finder(TwoWay);
|
||||
|
||||
/// Two-Way search in the reverse direction.
|
||||
/// A reverse substring searcher that uses the Two-Way algorithm.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct Reverse(TwoWay);
|
||||
pub struct FinderRev(TwoWay);
|
||||
|
||||
/// An implementation of the TwoWay substring search algorithm, with heuristics
|
||||
/// for accelerating search based on frequency analysis.
|
||||
/// An implementation of the TwoWay substring search algorithm.
|
||||
///
|
||||
/// This searcher supports forward and reverse search, although not
|
||||
/// simultaneously. It runs in O(n + m) time and O(1) space, where
|
||||
/// simultaneously. It runs in `O(n + m)` time and `O(1)` space, where
|
||||
/// `n ~ len(needle)` and `m ~ len(haystack)`.
|
||||
///
|
||||
/// The implementation here roughly matches that which was developed by
|
||||
/// Crochemore and Perrin in their 1991 paper "Two-way string-matching." The
|
||||
/// changes in this implementation are 1) the use of zero-based indices, 2) a
|
||||
/// heuristic skip table based on the last byte (borrowed from Rust's standard
|
||||
/// library) and 3) the addition of heuristics for a fast skip loop. That is,
|
||||
/// (3) this will detect bytes that are believed to be rare in the needle and
|
||||
/// use fast vectorized instructions to find their occurrences quickly. The
|
||||
/// Two-Way algorithm is then used to confirm whether a match at that location
|
||||
/// occurred.
|
||||
/// library) and 3) the addition of heuristics for a fast skip loop. For (3),
|
||||
/// callers can pass any kind of prefilter they want, but usually it's one
|
||||
/// based on a heuristic that uses an approximate background frequency of bytes
|
||||
/// to choose rare bytes to quickly look for candidate match positions. Note
|
||||
/// though that currently, this prefilter functionality is not exposed directly
|
||||
/// in the public API. (File an issue if you want it and provide a use case
|
||||
/// please.)
|
||||
///
|
||||
/// The heuristic for fast skipping is automatically shut off if it's
|
||||
/// detected to be ineffective at search time. Generally, this only occurs in
|
||||
@@ -36,20 +67,20 @@ pub(crate) struct Reverse(TwoWay);
|
||||
/// likely necessary to read the Two-Way paper cited above in order to fully
|
||||
/// grok this code. The essence of it is:
|
||||
///
|
||||
/// 1) Do something to detect a "critical" position in the needle.
|
||||
/// 2) For the current position in the haystack, look if needle[critical..]
|
||||
/// matches at that position.
|
||||
/// 3) If so, look if needle[..critical] matches.
|
||||
/// 4) If a mismatch occurs, shift the search by some amount based on the
|
||||
/// critical position and a pre-computed shift.
|
||||
/// 1. Do something to detect a "critical" position in the needle.
|
||||
/// 2. For the current position in the haystack, look if `needle[critical..]`
|
||||
/// matches at that position.
|
||||
/// 3. If so, look if `needle[..critical]` matches.
|
||||
/// 4. If a mismatch occurs, shift the search by some amount based on the
|
||||
/// critical position and a pre-computed shift.
|
||||
///
|
||||
/// This type is wrapped in Forward and Reverse types that expose consistent
|
||||
/// forward or reverse APIs.
|
||||
/// This type is wrapped in the forward and reverse finders that expose
|
||||
/// consistent forward or reverse APIs.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
struct TwoWay {
|
||||
/// A small bitset used as a quick prefilter (in addition to the faster
|
||||
/// SIMD based prefilter). Namely, a bit 'i' is set if and only if b%64==i
|
||||
/// for any b in the needle.
|
||||
/// A small bitset used as a quick prefilter (in addition to any prefilter
|
||||
/// given by the caller). Namely, a bit `i` is set if and only if `b%64==i`
|
||||
/// for any `b == needle[i]`.
|
||||
///
|
||||
/// When used as a prefilter, if the last byte at the current candidate
|
||||
/// position is NOT in this set, then we can skip that entire candidate
|
||||
@@ -74,14 +105,13 @@ struct TwoWay {
|
||||
shift: Shift,
|
||||
}
|
||||
|
||||
impl Forward {
|
||||
/// Create a searcher that uses the Two-Way algorithm by searching forwards
|
||||
/// through any haystack.
|
||||
pub(crate) fn new(needle: &[u8]) -> Forward {
|
||||
if needle.is_empty() {
|
||||
return Forward(TwoWay::empty());
|
||||
}
|
||||
|
||||
impl Finder {
|
||||
/// Create a searcher that finds occurrences of the given `needle`.
|
||||
///
|
||||
/// An empty `needle` results in a match at every position in a haystack,
|
||||
/// including at `haystack.len()`.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Finder {
|
||||
let byteset = ApproximateByteSet::new(needle);
|
||||
let min_suffix = Suffix::forward(needle, SuffixKind::Minimal);
|
||||
let max_suffix = Suffix::forward(needle, SuffixKind::Maximal);
|
||||
@@ -92,27 +122,38 @@ impl Forward {
|
||||
(max_suffix.period, max_suffix.pos)
|
||||
};
|
||||
let shift = Shift::forward(needle, period_lower_bound, critical_pos);
|
||||
Forward(TwoWay { byteset, critical_pos, shift })
|
||||
Finder(TwoWay { byteset, critical_pos, shift })
|
||||
}
|
||||
|
||||
/// Find the position of the first occurrence of this searcher's needle in
|
||||
/// the given haystack. If one does not exist, then return None.
|
||||
/// Returns the first occurrence of `needle` in the given `haystack`, or
|
||||
/// `None` if no such occurrence could be found.
|
||||
///
|
||||
/// This accepts prefilter state that is useful when using the same
|
||||
/// searcher multiple times, such as in an iterator.
|
||||
/// The `needle` given must be the same as the `needle` provided to
|
||||
/// [`Finder::new`].
|
||||
///
|
||||
/// Callers must guarantee that the needle is non-empty and its length is
|
||||
/// <= the haystack's length.
|
||||
/// An empty `needle` results in a match at every position in a haystack,
|
||||
/// including at `haystack.len()`.
|
||||
#[inline]
|
||||
pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
self.find_with_prefilter(None, haystack, needle)
|
||||
}
|
||||
|
||||
/// This is like [`Finder::find`], but it accepts a prefilter for
|
||||
/// accelerating searches.
|
||||
///
|
||||
/// Currently this is not exposed in the public API because, at the time
|
||||
/// of writing, I didn't want to spend time thinking about how to expose
|
||||
/// the prefilter infrastructure (if at all). If you have a compelling use
|
||||
/// case for exposing this routine, please create an issue. Do *not* open
|
||||
/// a PR that just exposes `Pre` and friends. Exporting this routine will
|
||||
/// require API design.
|
||||
#[inline(always)]
|
||||
pub(crate) fn find(
|
||||
pub(crate) fn find_with_prefilter(
|
||||
&self,
|
||||
pre: Option<&mut Pre<'_>>,
|
||||
pre: Option<Pre<'_>>,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
debug_assert!(!needle.is_empty(), "needle should not be empty");
|
||||
debug_assert!(needle.len() <= haystack.len(), "haystack too short");
|
||||
|
||||
match self.0.shift {
|
||||
Shift::Small { period } => {
|
||||
self.find_small_imp(pre, haystack, needle, period)
|
||||
@@ -123,25 +164,6 @@ impl Forward {
|
||||
}
|
||||
}
|
||||
|
||||
/// Like find, but handles the degenerate substring test cases. This is
|
||||
/// only useful for conveniently testing this substring implementation in
|
||||
/// isolation.
|
||||
#[cfg(test)]
|
||||
fn find_general(
|
||||
&self,
|
||||
pre: Option<&mut Pre<'_>>,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
if needle.is_empty() {
|
||||
Some(0)
|
||||
} else if haystack.len() < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.find(pre, haystack, needle)
|
||||
}
|
||||
}
|
||||
|
||||
// Each of the two search implementations below can be accelerated by a
|
||||
// prefilter, but it is not always enabled. To avoid its overhead when
|
||||
// its disabled, we explicitly inline each search implementation based on
|
||||
@@ -151,19 +173,22 @@ impl Forward {
|
||||
#[inline(always)]
|
||||
fn find_small_imp(
|
||||
&self,
|
||||
mut pre: Option<&mut Pre<'_>>,
|
||||
mut pre: Option<Pre<'_>>,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
period: usize,
|
||||
) -> Option<usize> {
|
||||
let last_byte = needle.len() - 1;
|
||||
let mut pos = 0;
|
||||
let mut shift = 0;
|
||||
let last_byte_pos = match needle.len().checked_sub(1) {
|
||||
None => return Some(pos),
|
||||
Some(last_byte) => last_byte,
|
||||
};
|
||||
while pos + needle.len() <= haystack.len() {
|
||||
let mut i = cmp::max(self.0.critical_pos, shift);
|
||||
if let Some(pre) = pre.as_mut() {
|
||||
if pre.should_call() {
|
||||
pos += pre.call(&haystack[pos..], needle)?;
|
||||
if pre.is_effective() {
|
||||
pos += pre.find(&haystack[pos..])?;
|
||||
shift = 0;
|
||||
i = self.0.critical_pos;
|
||||
if pos + needle.len() > haystack.len() {
|
||||
@@ -171,7 +196,7 @@ impl Forward {
|
||||
}
|
||||
}
|
||||
}
|
||||
if !self.0.byteset.contains(haystack[pos + last_byte]) {
|
||||
if !self.0.byteset.contains(haystack[pos + last_byte_pos]) {
|
||||
pos += needle.len();
|
||||
shift = 0;
|
||||
continue;
|
||||
@@ -200,24 +225,27 @@ impl Forward {
|
||||
#[inline(always)]
|
||||
fn find_large_imp(
|
||||
&self,
|
||||
mut pre: Option<&mut Pre<'_>>,
|
||||
mut pre: Option<Pre<'_>>,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
shift: usize,
|
||||
) -> Option<usize> {
|
||||
let last_byte = needle.len() - 1;
|
||||
let mut pos = 0;
|
||||
let last_byte_pos = match needle.len().checked_sub(1) {
|
||||
None => return Some(pos),
|
||||
Some(last_byte) => last_byte,
|
||||
};
|
||||
'outer: while pos + needle.len() <= haystack.len() {
|
||||
if let Some(pre) = pre.as_mut() {
|
||||
if pre.should_call() {
|
||||
pos += pre.call(&haystack[pos..], needle)?;
|
||||
if pre.is_effective() {
|
||||
pos += pre.find(&haystack[pos..])?;
|
||||
if pos + needle.len() > haystack.len() {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if !self.0.byteset.contains(haystack[pos + last_byte]) {
|
||||
if !self.0.byteset.contains(haystack[pos + last_byte_pos]) {
|
||||
pos += needle.len();
|
||||
continue;
|
||||
}
|
||||
@@ -241,14 +269,13 @@ impl Forward {
|
||||
}
|
||||
}
|
||||
|
||||
impl Reverse {
|
||||
/// Create a searcher that uses the Two-Way algorithm by searching in
|
||||
/// reverse through any haystack.
|
||||
pub(crate) fn new(needle: &[u8]) -> Reverse {
|
||||
if needle.is_empty() {
|
||||
return Reverse(TwoWay::empty());
|
||||
}
|
||||
|
||||
impl FinderRev {
|
||||
/// Create a searcher that finds occurrences of the given `needle`.
|
||||
///
|
||||
/// An empty `needle` results in a match at every position in a haystack,
|
||||
/// including at `haystack.len()`.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> FinderRev {
|
||||
let byteset = ApproximateByteSet::new(needle);
|
||||
let min_suffix = Suffix::reverse(needle, SuffixKind::Minimal);
|
||||
let max_suffix = Suffix::reverse(needle, SuffixKind::Maximal);
|
||||
@@ -258,27 +285,20 @@ impl Reverse {
|
||||
} else {
|
||||
(max_suffix.period, max_suffix.pos)
|
||||
};
|
||||
// let critical_pos = needle.len() - critical_pos;
|
||||
let shift = Shift::reverse(needle, period_lower_bound, critical_pos);
|
||||
Reverse(TwoWay { byteset, critical_pos, shift })
|
||||
FinderRev(TwoWay { byteset, critical_pos, shift })
|
||||
}
|
||||
|
||||
/// Find the position of the last occurrence of this searcher's needle
|
||||
/// in the given haystack. If one does not exist, then return None.
|
||||
/// Returns the last occurrence of `needle` in the given `haystack`, or
|
||||
/// `None` if no such occurrence could be found.
|
||||
///
|
||||
/// This will automatically initialize prefilter state. This should only
|
||||
/// be used for one-off searches.
|
||||
/// The `needle` given must be the same as the `needle` provided to
|
||||
/// [`FinderRev::new`].
|
||||
///
|
||||
/// Callers must guarantee that the needle is non-empty and its length is
|
||||
/// <= the haystack's length.
|
||||
#[inline(always)]
|
||||
pub(crate) fn rfind(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
debug_assert!(!needle.is_empty(), "needle should not be empty");
|
||||
debug_assert!(needle.len() <= haystack.len(), "haystack too short");
|
||||
/// An empty `needle` results in a match at every position in a haystack,
|
||||
/// including at `haystack.len()`.
|
||||
#[inline]
|
||||
pub fn rfind(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
// For the reverse case, we don't use a prefilter. It's plausible that
|
||||
// perhaps we should, but it's a lot of additional code to do it, and
|
||||
// it's not clear that it's actually worth it. If you have a really
|
||||
@@ -293,20 +313,6 @@ impl Reverse {
|
||||
}
|
||||
}
|
||||
|
||||
/// Like rfind, but handles the degenerate substring test cases. This is
|
||||
/// only useful for conveniently testing this substring implementation in
|
||||
/// isolation.
|
||||
#[cfg(test)]
|
||||
fn rfind_general(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
if needle.is_empty() {
|
||||
Some(haystack.len())
|
||||
} else if haystack.len() < needle.len() {
|
||||
None
|
||||
} else {
|
||||
self.rfind(haystack, needle)
|
||||
}
|
||||
}
|
||||
|
||||
#[inline(always)]
|
||||
fn rfind_small_imp(
|
||||
&self,
|
||||
@@ -317,6 +323,10 @@ impl Reverse {
|
||||
let nlen = needle.len();
|
||||
let mut pos = haystack.len();
|
||||
let mut shift = nlen;
|
||||
let first_byte = match needle.get(0) {
|
||||
None => return Some(pos),
|
||||
Some(&first_byte) => first_byte,
|
||||
};
|
||||
while pos >= nlen {
|
||||
if !self.0.byteset.contains(haystack[pos - nlen]) {
|
||||
pos -= nlen;
|
||||
@@ -327,7 +337,7 @@ impl Reverse {
|
||||
while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
|
||||
i -= 1;
|
||||
}
|
||||
if i > 0 || needle[0] != haystack[pos - nlen] {
|
||||
if i > 0 || first_byte != haystack[pos - nlen] {
|
||||
pos -= self.0.critical_pos - i + 1;
|
||||
shift = nlen;
|
||||
} else {
|
||||
@@ -354,6 +364,10 @@ impl Reverse {
|
||||
) -> Option<usize> {
|
||||
let nlen = needle.len();
|
||||
let mut pos = haystack.len();
|
||||
let first_byte = match needle.get(0) {
|
||||
None => return Some(pos),
|
||||
Some(&first_byte) => first_byte,
|
||||
};
|
||||
while pos >= nlen {
|
||||
if !self.0.byteset.contains(haystack[pos - nlen]) {
|
||||
pos -= nlen;
|
||||
@@ -363,7 +377,7 @@ impl Reverse {
|
||||
while i > 0 && needle[i - 1] == haystack[pos - nlen + i - 1] {
|
||||
i -= 1;
|
||||
}
|
||||
if i > 0 || needle[0] != haystack[pos - nlen] {
|
||||
if i > 0 || first_byte != haystack[pos - nlen] {
|
||||
pos -= self.0.critical_pos - i + 1;
|
||||
} else {
|
||||
let mut j = self.0.critical_pos;
|
||||
@@ -380,16 +394,6 @@ impl Reverse {
|
||||
}
|
||||
}
|
||||
|
||||
impl TwoWay {
|
||||
fn empty() -> TwoWay {
|
||||
TwoWay {
|
||||
byteset: ApproximateByteSet::new(b""),
|
||||
critical_pos: 0,
|
||||
shift: Shift::Large { shift: 0 },
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// A representation of the amount we're allowed to shift by during Two-Way
|
||||
/// search.
|
||||
///
|
||||
@@ -444,7 +448,7 @@ impl Shift {
|
||||
}
|
||||
|
||||
let (u, v) = needle.split_at(critical_pos);
|
||||
if !util::is_suffix(&v[..period_lower_bound], u) {
|
||||
if !is_suffix(&v[..period_lower_bound], u) {
|
||||
return Shift::Large { shift: large };
|
||||
}
|
||||
Shift::Small { period: period_lower_bound }
|
||||
@@ -467,7 +471,7 @@ impl Shift {
|
||||
}
|
||||
|
||||
let (v, u) = needle.split_at(critical_pos);
|
||||
if !util::is_prefix(&v[v.len() - period_lower_bound..], u) {
|
||||
if !is_prefix(&v[v.len() - period_lower_bound..], u) {
|
||||
return Shift::Large { shift: large };
|
||||
}
|
||||
Shift::Small { period: period_lower_bound }
|
||||
@@ -494,8 +498,6 @@ struct Suffix {
|
||||
|
||||
impl Suffix {
|
||||
fn forward(needle: &[u8], kind: SuffixKind) -> Suffix {
|
||||
debug_assert!(!needle.is_empty());
|
||||
|
||||
// suffix represents our maximal (or minimal) suffix, along with
|
||||
// its period.
|
||||
let mut suffix = Suffix { pos: 0, period: 1 };
|
||||
@@ -544,14 +546,15 @@ impl Suffix {
|
||||
}
|
||||
|
||||
fn reverse(needle: &[u8], kind: SuffixKind) -> Suffix {
|
||||
debug_assert!(!needle.is_empty());
|
||||
|
||||
// See the comments in `forward` for how this works.
|
||||
let mut suffix = Suffix { pos: needle.len(), period: 1 };
|
||||
if needle.len() == 1 {
|
||||
return suffix;
|
||||
}
|
||||
let mut candidate_start = needle.len() - 1;
|
||||
let mut candidate_start = match needle.len().checked_sub(1) {
|
||||
None => return suffix,
|
||||
Some(candidate_start) => candidate_start,
|
||||
};
|
||||
let mut offset = 0;
|
||||
|
||||
while offset < candidate_start {
|
||||
@@ -665,17 +668,12 @@ impl ApproximateByteSet {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, feature = "std", not(miri)))]
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use quickcheck::quickcheck;
|
||||
use alloc::vec::Vec;
|
||||
|
||||
use super::*;
|
||||
|
||||
define_memmem_quickcheck_tests!(
|
||||
super::simpletests::twoway_find,
|
||||
super::simpletests::twoway_rfind
|
||||
);
|
||||
|
||||
/// Convenience wrapper for computing the suffix as a byte string.
|
||||
fn get_suffix_forward(needle: &[u8], kind: SuffixKind) -> (&[u8], usize) {
|
||||
let s = Suffix::forward(needle, kind);
|
||||
@@ -710,13 +708,34 @@ mod tests {
|
||||
got
|
||||
}
|
||||
|
||||
define_substring_forward_quickcheck!(|h, n| Some(
|
||||
Finder::new(n).find(h, n)
|
||||
));
|
||||
define_substring_reverse_quickcheck!(|h, n| Some(
|
||||
FinderRev::new(n).rfind(h, n)
|
||||
));
|
||||
|
||||
#[test]
|
||||
fn forward() {
|
||||
crate::tests::substring::Runner::new()
|
||||
.fwd(|h, n| Some(Finder::new(n).find(h, n)))
|
||||
.run();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn reverse() {
|
||||
crate::tests::substring::Runner::new()
|
||||
.rev(|h, n| Some(FinderRev::new(n).rfind(h, n)))
|
||||
.run();
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn suffix_forward() {
|
||||
macro_rules! assert_suffix_min {
|
||||
($given:expr, $expected:expr, $period:expr) => {
|
||||
let (got_suffix, got_period) =
|
||||
get_suffix_forward($given.as_bytes(), SuffixKind::Minimal);
|
||||
let got_suffix = std::str::from_utf8(got_suffix).unwrap();
|
||||
let got_suffix = core::str::from_utf8(got_suffix).unwrap();
|
||||
assert_eq!(($expected, $period), (got_suffix, got_period));
|
||||
};
|
||||
}
|
||||
@@ -725,7 +744,7 @@ mod tests {
|
||||
($given:expr, $expected:expr, $period:expr) => {
|
||||
let (got_suffix, got_period) =
|
||||
get_suffix_forward($given.as_bytes(), SuffixKind::Maximal);
|
||||
let got_suffix = std::str::from_utf8(got_suffix).unwrap();
|
||||
let got_suffix = core::str::from_utf8(got_suffix).unwrap();
|
||||
assert_eq!(($expected, $period), (got_suffix, got_period));
|
||||
};
|
||||
}
|
||||
@@ -773,7 +792,7 @@ mod tests {
|
||||
($given:expr, $expected:expr, $period:expr) => {
|
||||
let (got_suffix, got_period) =
|
||||
get_suffix_reverse($given.as_bytes(), SuffixKind::Minimal);
|
||||
let got_suffix = std::str::from_utf8(got_suffix).unwrap();
|
||||
let got_suffix = core::str::from_utf8(got_suffix).unwrap();
|
||||
assert_eq!(($expected, $period), (got_suffix, got_period));
|
||||
};
|
||||
}
|
||||
@@ -782,7 +801,7 @@ mod tests {
|
||||
($given:expr, $expected:expr, $period:expr) => {
|
||||
let (got_suffix, got_period) =
|
||||
get_suffix_reverse($given.as_bytes(), SuffixKind::Maximal);
|
||||
let got_suffix = std::str::from_utf8(got_suffix).unwrap();
|
||||
let got_suffix = core::str::from_utf8(got_suffix).unwrap();
|
||||
assert_eq!(($expected, $period), (got_suffix, got_period));
|
||||
};
|
||||
}
|
||||
@@ -821,7 +840,8 @@ mod tests {
|
||||
assert_suffix_max!("aaa", "aaa", 1);
|
||||
}
|
||||
|
||||
quickcheck! {
|
||||
#[cfg(not(miri))]
|
||||
quickcheck::quickcheck! {
|
||||
fn qc_suffix_forward_maximal(bytes: Vec<u8>) -> bool {
|
||||
if bytes.is_empty() {
|
||||
return true;
|
||||
@@ -842,27 +862,6 @@ mod tests {
|
||||
expected == got
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod simpletests {
|
||||
use super::*;
|
||||
|
||||
pub(crate) fn twoway_find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
Forward::new(needle).find_general(None, haystack, needle)
|
||||
}
|
||||
|
||||
pub(crate) fn twoway_rfind(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
Reverse::new(needle).rfind_general(haystack, needle)
|
||||
}
|
||||
|
||||
define_memmem_simple_tests!(twoway_find, twoway_rfind);
|
||||
|
||||
// This is a regression test caught by quickcheck that exercised a bug in
|
||||
// the reverse small period handling. The bug was that we were using 'if j
|
||||
@@ -870,7 +869,7 @@ mod simpletests {
|
||||
// j >= shift', which matches the corresponding guard in the forward impl.
|
||||
#[test]
|
||||
fn regression_rev_small_period() {
|
||||
let rfind = super::simpletests::twoway_rfind;
|
||||
let rfind = |h, n| FinderRev::new(n).rfind(h, n);
|
||||
let haystack = "ababaz";
|
||||
let needle = "abab";
|
||||
assert_eq!(Some(0), rfind(haystack.as_bytes(), needle.as_bytes()));
|
||||
1214
third_party/rust/memchr/src/arch/generic/memchr.rs
vendored
Normal file
1214
third_party/rust/memchr/src/arch/generic/memchr.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
14
third_party/rust/memchr/src/arch/generic/mod.rs
vendored
Normal file
14
third_party/rust/memchr/src/arch/generic/mod.rs
vendored
Normal file
@@ -0,0 +1,14 @@
|
||||
/*!
|
||||
This module defines "generic" routines that can be specialized to specific
|
||||
architectures.
|
||||
|
||||
We don't expose this module primarily because it would require exposing all
|
||||
of the internal infrastructure required to write these generic routines.
|
||||
That infrastructure should be treated as an implementation detail so that
|
||||
it is allowed to evolve. Instead, what we expose are architecture specific
|
||||
instantiations of these generic implementations. The generic code just lets us
|
||||
write the code once (usually).
|
||||
*/
|
||||
|
||||
pub(crate) mod memchr;
|
||||
pub(crate) mod packedpair;
|
||||
317
third_party/rust/memchr/src/arch/generic/packedpair.rs
vendored
Normal file
317
third_party/rust/memchr/src/arch/generic/packedpair.rs
vendored
Normal file
@@ -0,0 +1,317 @@
|
||||
/*!
|
||||
Generic crate-internal routines for the "packed pair" SIMD algorithm.
|
||||
|
||||
The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
|
||||
difference is that it (by default) uses a background distribution of byte
|
||||
frequencies to heuristically select the pair of bytes to search for.
|
||||
|
||||
[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
|
||||
*/
|
||||
|
||||
use crate::{
|
||||
arch::all::{is_equal_raw, packedpair::Pair},
|
||||
ext::Pointer,
|
||||
vector::{MoveMask, Vector},
|
||||
};
|
||||
|
||||
/// A generic architecture dependent "packed pair" finder.
|
||||
///
|
||||
/// This finder picks two bytes that it believes have high predictive power
|
||||
/// for indicating an overall match of a needle. Depending on whether
|
||||
/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
|
||||
/// where the needle matches or could match. In the prefilter case, candidates
|
||||
/// are reported whenever the [`Pair`] of bytes given matches.
|
||||
///
|
||||
/// This is architecture dependent because it uses specific vector operations
|
||||
/// to look for occurrences of the pair of bytes.
|
||||
///
|
||||
/// This type is not meant to be exported and is instead meant to be used as
|
||||
/// the implementation for architecture specific facades. Why? Because it's a
|
||||
/// bit of a quirky API that requires `inline(always)` annotations. And pretty
|
||||
/// much everything has safety obligations due (at least) to the caller needing
|
||||
/// to inline calls into routines marked with
|
||||
/// `#[target_feature(enable = "...")]`.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub(crate) struct Finder<V> {
|
||||
pair: Pair,
|
||||
v1: V,
|
||||
v2: V,
|
||||
min_haystack_len: usize,
|
||||
}
|
||||
|
||||
impl<V: Vector> Finder<V> {
|
||||
/// Create a new pair searcher. The searcher returned can either report
|
||||
/// exact matches of `needle` or act as a prefilter and report candidate
|
||||
/// positions of `needle`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Callers must ensure that whatever vector type this routine is called
|
||||
/// with is supported by the current environment.
|
||||
///
|
||||
/// Callers must also ensure that `needle.len() >= 2`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn new(needle: &[u8], pair: Pair) -> Finder<V> {
|
||||
let max_index = pair.index1().max(pair.index2());
|
||||
let min_haystack_len =
|
||||
core::cmp::max(needle.len(), usize::from(max_index) + V::BYTES);
|
||||
let v1 = V::splat(needle[usize::from(pair.index1())]);
|
||||
let v2 = V::splat(needle[usize::from(pair.index2())]);
|
||||
Finder { pair, v1, v2, min_haystack_len }
|
||||
}
|
||||
|
||||
/// Searches the given haystack for the given needle. The needle given
|
||||
/// should be the same as the needle that this finder was initialized
|
||||
/// with.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Since this is meant to be used with vector functions, callers need to
|
||||
/// specialize this inside of a function with a `target_feature` attribute.
|
||||
/// Therefore, callers must ensure that whatever target feature is being
|
||||
/// used supports the vector functions that this function is specialized
|
||||
/// for. (For the specific vector functions used, see the Vector trait
|
||||
/// implementations.)
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn find(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
assert!(
|
||||
haystack.len() >= self.min_haystack_len,
|
||||
"haystack too small, should be at least {} but got {}",
|
||||
self.min_haystack_len,
|
||||
haystack.len(),
|
||||
);
|
||||
|
||||
let all = V::Mask::all_zeros_except_least_significant(0);
|
||||
let start = haystack.as_ptr();
|
||||
let end = start.add(haystack.len());
|
||||
let max = end.sub(self.min_haystack_len);
|
||||
let mut cur = start;
|
||||
|
||||
// N.B. I did experiment with unrolling the loop to deal with size(V)
|
||||
// bytes at a time and 2*size(V) bytes at a time. The double unroll
|
||||
// was marginally faster while the quadruple unroll was unambiguously
|
||||
// slower. In the end, I decided the complexity from unrolling wasn't
|
||||
// worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to
|
||||
// compare.
|
||||
while cur <= max {
|
||||
if let Some(chunki) = self.find_in_chunk(needle, cur, end, all) {
|
||||
return Some(matched(start, cur, chunki));
|
||||
}
|
||||
cur = cur.add(V::BYTES);
|
||||
}
|
||||
if cur < end {
|
||||
let remaining = end.distance(cur);
|
||||
debug_assert!(
|
||||
remaining < self.min_haystack_len,
|
||||
"remaining bytes should be smaller than the minimum haystack \
|
||||
length of {}, but there are {} bytes remaining",
|
||||
self.min_haystack_len,
|
||||
remaining,
|
||||
);
|
||||
if remaining < needle.len() {
|
||||
return None;
|
||||
}
|
||||
debug_assert!(
|
||||
max < cur,
|
||||
"after main loop, cur should have exceeded max",
|
||||
);
|
||||
let overlap = cur.distance(max);
|
||||
debug_assert!(
|
||||
overlap > 0,
|
||||
"overlap ({}) must always be non-zero",
|
||||
overlap,
|
||||
);
|
||||
debug_assert!(
|
||||
overlap < V::BYTES,
|
||||
"overlap ({}) cannot possibly be >= than a vector ({})",
|
||||
overlap,
|
||||
V::BYTES,
|
||||
);
|
||||
// The mask has all of its bits set except for the first N least
|
||||
// significant bits, where N=overlap. This way, any matches that
|
||||
// occur in find_in_chunk within the overlap are automatically
|
||||
// ignored.
|
||||
let mask = V::Mask::all_zeros_except_least_significant(overlap);
|
||||
cur = max;
|
||||
let m = self.find_in_chunk(needle, cur, end, mask);
|
||||
if let Some(chunki) = m {
|
||||
return Some(matched(start, cur, chunki));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Searches the given haystack for offsets that represent candidate
|
||||
/// matches of the `needle` given to this finder's constructor. The offsets
|
||||
/// returned, if they are a match, correspond to the starting offset of
|
||||
/// `needle` in the given `haystack`.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Since this is meant to be used with vector functions, callers need to
|
||||
/// specialize this inside of a function with a `target_feature` attribute.
|
||||
/// Therefore, callers must ensure that whatever target feature is being
|
||||
/// used supports the vector functions that this function is specialized
|
||||
/// for. (For the specific vector functions used, see the Vector trait
|
||||
/// implementations.)
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn find_prefilter(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
) -> Option<usize> {
|
||||
assert!(
|
||||
haystack.len() >= self.min_haystack_len,
|
||||
"haystack too small, should be at least {} but got {}",
|
||||
self.min_haystack_len,
|
||||
haystack.len(),
|
||||
);
|
||||
|
||||
let start = haystack.as_ptr();
|
||||
let end = start.add(haystack.len());
|
||||
let max = end.sub(self.min_haystack_len);
|
||||
let mut cur = start;
|
||||
|
||||
// N.B. I did experiment with unrolling the loop to deal with size(V)
|
||||
// bytes at a time and 2*size(V) bytes at a time. The double unroll
|
||||
// was marginally faster while the quadruple unroll was unambiguously
|
||||
// slower. In the end, I decided the complexity from unrolling wasn't
|
||||
// worth it. I used the memmem/krate/prebuilt/huge-en/ benchmarks to
|
||||
// compare.
|
||||
while cur <= max {
|
||||
if let Some(chunki) = self.find_prefilter_in_chunk(cur) {
|
||||
return Some(matched(start, cur, chunki));
|
||||
}
|
||||
cur = cur.add(V::BYTES);
|
||||
}
|
||||
if cur < end {
|
||||
// This routine immediately quits if a candidate match is found.
|
||||
// That means that if we're here, no candidate matches have been
|
||||
// found at or before 'ptr'. Thus, we don't need to mask anything
|
||||
// out even though we might technically search part of the haystack
|
||||
// that we've already searched (because we know it can't match).
|
||||
cur = max;
|
||||
if let Some(chunki) = self.find_prefilter_in_chunk(cur) {
|
||||
return Some(matched(start, cur, chunki));
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Search for an occurrence of our byte pair from the needle in the chunk
|
||||
/// pointed to by cur, with the end of the haystack pointed to by end.
|
||||
/// When an occurrence is found, memcmp is run to check if a match occurs
|
||||
/// at the corresponding position.
|
||||
///
|
||||
/// `mask` should have bits set corresponding the positions in the chunk
|
||||
/// in which matches are considered. This is only used for the last vector
|
||||
/// load where the beginning of the vector might have overlapped with the
|
||||
/// last load in the main loop. The mask lets us avoid visiting positions
|
||||
/// that have already been discarded as matches.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// It must be safe to do an unaligned read of size(V) bytes starting at
|
||||
/// both (cur + self.index1) and (cur + self.index2). It must also be safe
|
||||
/// to do unaligned loads on cur up to (end - needle.len()).
|
||||
#[inline(always)]
|
||||
unsafe fn find_in_chunk(
|
||||
&self,
|
||||
needle: &[u8],
|
||||
cur: *const u8,
|
||||
end: *const u8,
|
||||
mask: V::Mask,
|
||||
) -> Option<usize> {
|
||||
let index1 = usize::from(self.pair.index1());
|
||||
let index2 = usize::from(self.pair.index2());
|
||||
let chunk1 = V::load_unaligned(cur.add(index1));
|
||||
let chunk2 = V::load_unaligned(cur.add(index2));
|
||||
let eq1 = chunk1.cmpeq(self.v1);
|
||||
let eq2 = chunk2.cmpeq(self.v2);
|
||||
|
||||
let mut offsets = eq1.and(eq2).movemask().and(mask);
|
||||
while offsets.has_non_zero() {
|
||||
let offset = offsets.first_offset();
|
||||
let cur = cur.add(offset);
|
||||
if end.sub(needle.len()) < cur {
|
||||
return None;
|
||||
}
|
||||
if is_equal_raw(needle.as_ptr(), cur, needle.len()) {
|
||||
return Some(offset);
|
||||
}
|
||||
offsets = offsets.clear_least_significant_bit();
|
||||
}
|
||||
None
|
||||
}
|
||||
|
||||
/// Search for an occurrence of our byte pair from the needle in the chunk
|
||||
/// pointed to by cur, with the end of the haystack pointed to by end.
|
||||
/// When an occurrence is found, memcmp is run to check if a match occurs
|
||||
/// at the corresponding position.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// It must be safe to do an unaligned read of size(V) bytes starting at
|
||||
/// both (cur + self.index1) and (cur + self.index2). It must also be safe
|
||||
/// to do unaligned reads on cur up to (end - needle.len()).
|
||||
#[inline(always)]
|
||||
unsafe fn find_prefilter_in_chunk(&self, cur: *const u8) -> Option<usize> {
|
||||
let index1 = usize::from(self.pair.index1());
|
||||
let index2 = usize::from(self.pair.index2());
|
||||
let chunk1 = V::load_unaligned(cur.add(index1));
|
||||
let chunk2 = V::load_unaligned(cur.add(index2));
|
||||
let eq1 = chunk1.cmpeq(self.v1);
|
||||
let eq2 = chunk2.cmpeq(self.v2);
|
||||
|
||||
let offsets = eq1.and(eq2).movemask();
|
||||
if !offsets.has_non_zero() {
|
||||
return None;
|
||||
}
|
||||
Some(offsets.first_offset())
|
||||
}
|
||||
|
||||
/// Returns the pair of offsets (into the needle) used to check as a
|
||||
/// predicate before confirming whether a needle exists at a particular
|
||||
/// position.
|
||||
#[inline]
|
||||
pub(crate) fn pair(&self) -> &Pair {
|
||||
&self.pair
|
||||
}
|
||||
|
||||
/// Returns the minimum haystack length that this `Finder` can search.
|
||||
///
|
||||
/// Providing a haystack to this `Finder` shorter than this length is
|
||||
/// guaranteed to result in a panic.
|
||||
#[inline(always)]
|
||||
pub(crate) fn min_haystack_len(&self) -> usize {
|
||||
self.min_haystack_len
|
||||
}
|
||||
}
|
||||
|
||||
/// Accepts a chunk-relative offset and returns a haystack relative offset.
|
||||
///
|
||||
/// This used to be marked `#[cold]` and `#[inline(never)]`, but I couldn't
|
||||
/// observe a consistent measureable difference between that and just inlining
|
||||
/// it. So we go with inlining it.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Same at `ptr::offset_from` in addition to `cur >= start`.
|
||||
#[inline(always)]
|
||||
unsafe fn matched(start: *const u8, cur: *const u8, chunki: usize) -> usize {
|
||||
cur.distance(start) + chunki
|
||||
}
|
||||
|
||||
// If you're looking for tests, those are run for each instantiation of the
|
||||
// above code. So for example, see arch::x86_64::sse2::packedpair.
|
||||
16
third_party/rust/memchr/src/arch/mod.rs
vendored
Normal file
16
third_party/rust/memchr/src/arch/mod.rs
vendored
Normal file
@@ -0,0 +1,16 @@
|
||||
/*!
|
||||
A module with low-level architecture dependent routines.
|
||||
|
||||
These routines are useful as primitives for tasks not covered by the higher
|
||||
level crate API.
|
||||
*/
|
||||
|
||||
pub mod all;
|
||||
pub(crate) mod generic;
|
||||
|
||||
#[cfg(target_arch = "aarch64")]
|
||||
pub mod aarch64;
|
||||
#[cfg(all(target_arch = "wasm32", target_feature = "simd128"))]
|
||||
pub mod wasm32;
|
||||
#[cfg(target_arch = "x86_64")]
|
||||
pub mod x86_64;
|
||||
124
third_party/rust/memchr/src/arch/wasm32/memchr.rs
vendored
Normal file
124
third_party/rust/memchr/src/arch/wasm32/memchr.rs
vendored
Normal file
@@ -0,0 +1,124 @@
|
||||
/*!
|
||||
Wrapper routines for `memchr` and friends.
|
||||
|
||||
These routines choose the best implementation at compile time. (This is
|
||||
different from `x86_64` because it is expected that `simd128` is almost always
|
||||
available for `wasm32` targets.)
|
||||
*/
|
||||
|
||||
macro_rules! defraw {
|
||||
($ty:ident, $find:ident, $start:ident, $end:ident, $($needles:ident),+) => {{
|
||||
use crate::arch::wasm32::simd128::memchr::$ty;
|
||||
|
||||
debug!("chose simd128 for {}", stringify!($ty));
|
||||
debug_assert!($ty::is_available());
|
||||
// SAFETY: We know that wasm memchr is always available whenever
|
||||
// code is compiled for `wasm32` with the `simd128` target feature
|
||||
// enabled.
|
||||
$ty::new_unchecked($($needles),+).$find($start, $end)
|
||||
}}
|
||||
}
|
||||
|
||||
/// memchr, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memchr_raw(
|
||||
n1: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(One, find_raw, start, end, n1)
|
||||
}
|
||||
|
||||
/// memrchr, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memrchr_raw(
|
||||
n1: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(One, rfind_raw, start, end, n1)
|
||||
}
|
||||
|
||||
/// memchr2, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Two::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memchr2_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(Two, find_raw, start, end, n1, n2)
|
||||
}
|
||||
|
||||
/// memrchr2, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Two::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memrchr2_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(Two, rfind_raw, start, end, n1, n2)
|
||||
}
|
||||
|
||||
/// memchr3, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Three::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memchr3_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
n3: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(Three, find_raw, start, end, n1, n2, n3)
|
||||
}
|
||||
|
||||
/// memrchr3, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Three::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn memrchr3_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
n3: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
defraw!(Three, rfind_raw, start, end, n1, n2, n3)
|
||||
}
|
||||
|
||||
/// Count all matching bytes, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::count_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) unsafe fn count_raw(
|
||||
n1: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> usize {
|
||||
defraw!(One, count_raw, start, end, n1)
|
||||
}
|
||||
7
third_party/rust/memchr/src/arch/wasm32/mod.rs
vendored
Normal file
7
third_party/rust/memchr/src/arch/wasm32/mod.rs
vendored
Normal file
@@ -0,0 +1,7 @@
|
||||
/*!
|
||||
Vector algorithms for the `wasm32` target.
|
||||
*/
|
||||
|
||||
pub mod simd128;
|
||||
|
||||
pub(crate) mod memchr;
|
||||
1020
third_party/rust/memchr/src/arch/wasm32/simd128/memchr.rs
vendored
Normal file
1020
third_party/rust/memchr/src/arch/wasm32/simd128/memchr.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6
third_party/rust/memchr/src/arch/wasm32/simd128/mod.rs
vendored
Normal file
6
third_party/rust/memchr/src/arch/wasm32/simd128/mod.rs
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
/*!
|
||||
Algorithms for the `wasm32` target using 128-bit vectors via simd128.
|
||||
*/
|
||||
|
||||
pub mod memchr;
|
||||
pub mod packedpair;
|
||||
228
third_party/rust/memchr/src/arch/wasm32/simd128/packedpair.rs
vendored
Normal file
228
third_party/rust/memchr/src/arch/wasm32/simd128/packedpair.rs
vendored
Normal file
@@ -0,0 +1,228 @@
|
||||
/*!
|
||||
A 128-bit vector implementation of the "packed pair" SIMD algorithm.
|
||||
|
||||
The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
|
||||
difference is that it (by default) uses a background distribution of byte
|
||||
frequencies to heuristically select the pair of bytes to search for.
|
||||
|
||||
[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
|
||||
*/
|
||||
|
||||
use core::arch::wasm32::v128;
|
||||
|
||||
use crate::arch::{all::packedpair::Pair, generic::packedpair};
|
||||
|
||||
/// A "packed pair" finder that uses 128-bit vector operations.
|
||||
///
|
||||
/// This finder picks two bytes that it believes have high predictive power
|
||||
/// for indicating an overall match of a needle. Depending on whether
|
||||
/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
|
||||
/// where the needle matches or could match. In the prefilter case, candidates
|
||||
/// are reported whenever the [`Pair`] of bytes given matches.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Finder(packedpair::Finder<v128>);
|
||||
|
||||
impl Finder {
|
||||
/// Create a new pair searcher. The searcher returned can either report
|
||||
/// exact matches of `needle` or act as a prefilter and report candidate
|
||||
/// positions of `needle`.
|
||||
///
|
||||
/// If simd128 is unavailable in the current environment or if a [`Pair`]
|
||||
/// could not be constructed from the needle given, then `None` is
|
||||
/// returned.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Option<Finder> {
|
||||
Finder::with_pair(needle, Pair::new(needle)?)
|
||||
}
|
||||
|
||||
/// Create a new "packed pair" finder using the pair of bytes given.
|
||||
///
|
||||
/// This constructor permits callers to control precisely which pair of
|
||||
/// bytes is used as a predicate.
|
||||
///
|
||||
/// If simd128 is unavailable in the current environment, then `None` is
|
||||
/// returned.
|
||||
#[inline]
|
||||
pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
|
||||
if Finder::is_available() {
|
||||
// SAFETY: we check that simd128 is available above. We are also
|
||||
// guaranteed to have needle.len() > 1 because we have a valid
|
||||
// Pair.
|
||||
unsafe { Some(Finder::with_pair_impl(needle, pair)) }
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `Finder` specific to simd128 vectors and routines.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Same as the safety for `packedpair::Finder::new`, and callers must also
|
||||
/// ensure that simd128 is available.
|
||||
#[target_feature(enable = "simd128")]
|
||||
#[inline]
|
||||
unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
|
||||
let finder = packedpair::Finder::<v128>::new(needle, pair);
|
||||
Finder(finder)
|
||||
}
|
||||
|
||||
/// Returns true when this implementation is available in the current
|
||||
/// environment.
|
||||
///
|
||||
/// When this is true, it is guaranteed that [`Finder::with_pair`] will
|
||||
/// return a `Some` value. Similarly, when it is false, it is guaranteed
|
||||
/// that `Finder::with_pair` will return a `None` value. Notice that this
|
||||
/// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
|
||||
/// even when `Finder::is_available` is true, it is not guaranteed that a
|
||||
/// valid [`Pair`] can be found from the needle given.
|
||||
///
|
||||
/// Note also that for the lifetime of a single program, if this returns
|
||||
/// true then it will always return true.
|
||||
#[inline]
|
||||
pub fn is_available() -> bool {
|
||||
// We used to gate on `cfg(target_feature = "simd128")` here, but
|
||||
// we've since required the feature to be enabled at compile time to
|
||||
// even include this module at all. Therefore, it is always enabled
|
||||
// in this context. See the linked issue for why this was changed.
|
||||
//
|
||||
// Ref: https://github.com/BurntSushi/memchr/issues/144
|
||||
true
|
||||
}
|
||||
|
||||
/// Execute a search using wasm32 v128 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
#[inline]
|
||||
pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
self.find_impl(haystack, needle)
|
||||
}
|
||||
|
||||
/// Execute a search using wasm32 v128 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
#[inline]
|
||||
pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
|
||||
self.find_prefilter_impl(haystack)
|
||||
}
|
||||
|
||||
/// Execute a search using wasm32 v128 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// (The target feature safety obligation is automatically fulfilled by
|
||||
/// virtue of being a method on `Finder`, which can only be constructed
|
||||
/// when it is safe to call `simd128` routines.)
|
||||
#[target_feature(enable = "simd128")]
|
||||
#[inline]
|
||||
fn find_impl(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
// SAFETY: The target feature safety obligation is automatically
|
||||
// fulfilled by virtue of being a method on `Finder`, which can only be
|
||||
// constructed when it is safe to call `simd128` routines.
|
||||
unsafe { self.0.find(haystack, needle) }
|
||||
}
|
||||
|
||||
/// Execute a prefilter search using wasm32 v128 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// (The target feature safety obligation is automatically fulfilled by
|
||||
/// virtue of being a method on `Finder`, which can only be constructed
|
||||
/// when it is safe to call `simd128` routines.)
|
||||
#[target_feature(enable = "simd128")]
|
||||
#[inline]
|
||||
fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
|
||||
// SAFETY: The target feature safety obligation is automatically
|
||||
// fulfilled by virtue of being a method on `Finder`, which can only be
|
||||
// constructed when it is safe to call `simd128` routines.
|
||||
unsafe { self.0.find_prefilter(haystack) }
|
||||
}
|
||||
|
||||
/// Returns the pair of offsets (into the needle) used to check as a
|
||||
/// predicate before confirming whether a needle exists at a particular
|
||||
/// position.
|
||||
#[inline]
|
||||
pub fn pair(&self) -> &Pair {
|
||||
self.0.pair()
|
||||
}
|
||||
|
||||
/// Returns the minimum haystack length that this `Finder` can search.
|
||||
///
|
||||
/// Using a haystack with length smaller than this in a search will result
|
||||
/// in a panic. The reason for this restriction is that this finder is
|
||||
/// meant to be a low-level component that is part of a larger substring
|
||||
/// strategy. In that sense, it avoids trying to handle all cases and
|
||||
/// instead only handles the cases that it can handle very well.
|
||||
#[inline]
|
||||
pub fn min_haystack_len(&self) -> usize {
|
||||
self.0.min_haystack_len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
|
||||
let f = Finder::new(needle)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find(haystack, needle))
|
||||
}
|
||||
|
||||
define_substring_forward_quickcheck!(find);
|
||||
|
||||
#[test]
|
||||
fn forward_substring() {
|
||||
crate::tests::substring::Runner::new().fwd(find).run()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
let pair = Pair::with_indices(needle, index1, index2)?;
|
||||
let f = Finder::with_pair(needle, pair)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find(haystack, needle))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair_prefilter() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
let pair = Pair::with_indices(needle, index1, index2)?;
|
||||
let f = Finder::with_pair(needle, pair)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find_prefilter(haystack))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
}
|
||||
1352
third_party/rust/memchr/src/arch/x86_64/avx2/memchr.rs
vendored
Normal file
1352
third_party/rust/memchr/src/arch/x86_64/avx2/memchr.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6
third_party/rust/memchr/src/arch/x86_64/avx2/mod.rs
vendored
Normal file
6
third_party/rust/memchr/src/arch/x86_64/avx2/mod.rs
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
/*!
|
||||
Algorithms for the `x86_64` target using 256-bit vectors via AVX2.
|
||||
*/
|
||||
|
||||
pub mod memchr;
|
||||
pub mod packedpair;
|
||||
272
third_party/rust/memchr/src/arch/x86_64/avx2/packedpair.rs
vendored
Normal file
272
third_party/rust/memchr/src/arch/x86_64/avx2/packedpair.rs
vendored
Normal file
@@ -0,0 +1,272 @@
|
||||
/*!
|
||||
A 256-bit vector implementation of the "packed pair" SIMD algorithm.
|
||||
|
||||
The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
|
||||
difference is that it (by default) uses a background distribution of byte
|
||||
frequencies to heuristically select the pair of bytes to search for.
|
||||
|
||||
[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
|
||||
*/
|
||||
|
||||
use core::arch::x86_64::{__m128i, __m256i};
|
||||
|
||||
use crate::arch::{all::packedpair::Pair, generic::packedpair};
|
||||
|
||||
/// A "packed pair" finder that uses 256-bit vector operations.
|
||||
///
|
||||
/// This finder picks two bytes that it believes have high predictive power
|
||||
/// for indicating an overall match of a needle. Depending on whether
|
||||
/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
|
||||
/// where the needle matches or could match. In the prefilter case, candidates
|
||||
/// are reported whenever the [`Pair`] of bytes given matches.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Finder {
|
||||
sse2: packedpair::Finder<__m128i>,
|
||||
avx2: packedpair::Finder<__m256i>,
|
||||
}
|
||||
|
||||
impl Finder {
|
||||
/// Create a new pair searcher. The searcher returned can either report
|
||||
/// exact matches of `needle` or act as a prefilter and report candidate
|
||||
/// positions of `needle`.
|
||||
///
|
||||
/// If AVX2 is unavailable in the current environment or if a [`Pair`]
|
||||
/// could not be constructed from the needle given, then `None` is
|
||||
/// returned.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Option<Finder> {
|
||||
Finder::with_pair(needle, Pair::new(needle)?)
|
||||
}
|
||||
|
||||
/// Create a new "packed pair" finder using the pair of bytes given.
|
||||
///
|
||||
/// This constructor permits callers to control precisely which pair of
|
||||
/// bytes is used as a predicate.
|
||||
///
|
||||
/// If AVX2 is unavailable in the current environment, then `None` is
|
||||
/// returned.
|
||||
#[inline]
|
||||
pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
|
||||
if Finder::is_available() {
|
||||
// SAFETY: we check that sse2/avx2 is available above. We are also
|
||||
// guaranteed to have needle.len() > 1 because we have a valid
|
||||
// Pair.
|
||||
unsafe { Some(Finder::with_pair_impl(needle, pair)) }
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `Finder` specific to SSE2 vectors and routines.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Same as the safety for `packedpair::Finder::new`, and callers must also
|
||||
/// ensure that both SSE2 and AVX2 are available.
|
||||
#[target_feature(enable = "sse2", enable = "avx2")]
|
||||
#[inline]
|
||||
unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
|
||||
let sse2 = packedpair::Finder::<__m128i>::new(needle, pair);
|
||||
let avx2 = packedpair::Finder::<__m256i>::new(needle, pair);
|
||||
Finder { sse2, avx2 }
|
||||
}
|
||||
|
||||
/// Returns true when this implementation is available in the current
|
||||
/// environment.
|
||||
///
|
||||
/// When this is true, it is guaranteed that [`Finder::with_pair`] will
|
||||
/// return a `Some` value. Similarly, when it is false, it is guaranteed
|
||||
/// that `Finder::with_pair` will return a `None` value. Notice that this
|
||||
/// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
|
||||
/// even when `Finder::is_available` is true, it is not guaranteed that a
|
||||
/// valid [`Pair`] can be found from the needle given.
|
||||
///
|
||||
/// Note also that for the lifetime of a single program, if this returns
|
||||
/// true then it will always return true.
|
||||
#[inline]
|
||||
pub fn is_available() -> bool {
|
||||
#[cfg(not(target_feature = "sse2"))]
|
||||
{
|
||||
false
|
||||
}
|
||||
#[cfg(target_feature = "sse2")]
|
||||
{
|
||||
#[cfg(target_feature = "avx2")]
|
||||
{
|
||||
true
|
||||
}
|
||||
#[cfg(not(target_feature = "avx2"))]
|
||||
{
|
||||
#[cfg(feature = "std")]
|
||||
{
|
||||
std::is_x86_feature_detected!("avx2")
|
||||
}
|
||||
#[cfg(not(feature = "std"))]
|
||||
{
|
||||
false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a search using AVX2 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
#[inline]
|
||||
pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
// SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
|
||||
unsafe { self.find_impl(haystack, needle) }
|
||||
}
|
||||
|
||||
/// Run this finder on the given haystack as a prefilter.
|
||||
///
|
||||
/// If a candidate match is found, then an offset where the needle *could*
|
||||
/// begin in the haystack is returned.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
#[inline]
|
||||
pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
|
||||
// SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
|
||||
unsafe { self.find_prefilter_impl(haystack) }
|
||||
}
|
||||
|
||||
/// Execute a search using AVX2 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// (The target feature safety obligation is automatically fulfilled by
|
||||
/// virtue of being a method on `Finder`, which can only be constructed
|
||||
/// when it is safe to call `sse2` and `avx2` routines.)
|
||||
#[target_feature(enable = "sse2", enable = "avx2")]
|
||||
#[inline]
|
||||
unsafe fn find_impl(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
if haystack.len() < self.avx2.min_haystack_len() {
|
||||
self.sse2.find(haystack, needle)
|
||||
} else {
|
||||
self.avx2.find(haystack, needle)
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a prefilter search using AVX2 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// (The target feature safety obligation is automatically fulfilled by
|
||||
/// virtue of being a method on `Finder`, which can only be constructed
|
||||
/// when it is safe to call `sse2` and `avx2` routines.)
|
||||
#[target_feature(enable = "sse2", enable = "avx2")]
|
||||
#[inline]
|
||||
unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
|
||||
if haystack.len() < self.avx2.min_haystack_len() {
|
||||
self.sse2.find_prefilter(haystack)
|
||||
} else {
|
||||
self.avx2.find_prefilter(haystack)
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the pair of offsets (into the needle) used to check as a
|
||||
/// predicate before confirming whether a needle exists at a particular
|
||||
/// position.
|
||||
#[inline]
|
||||
pub fn pair(&self) -> &Pair {
|
||||
self.avx2.pair()
|
||||
}
|
||||
|
||||
/// Returns the minimum haystack length that this `Finder` can search.
|
||||
///
|
||||
/// Using a haystack with length smaller than this in a search will result
|
||||
/// in a panic. The reason for this restriction is that this finder is
|
||||
/// meant to be a low-level component that is part of a larger substring
|
||||
/// strategy. In that sense, it avoids trying to handle all cases and
|
||||
/// instead only handles the cases that it can handle very well.
|
||||
#[inline]
|
||||
pub fn min_haystack_len(&self) -> usize {
|
||||
// The caller doesn't need to care about AVX2's min_haystack_len
|
||||
// since this implementation will automatically switch to the SSE2
|
||||
// implementation if the haystack is too short for AVX2. Therefore, the
|
||||
// caller only needs to care about SSE2's min_haystack_len.
|
||||
//
|
||||
// This does assume that SSE2's min_haystack_len is less than or
|
||||
// equal to AVX2's min_haystack_len. In practice, this is true and
|
||||
// there is no way it could be false based on how this Finder is
|
||||
// implemented. Namely, both SSE2 and AVX2 use the same `Pair`. If
|
||||
// they used different pairs, then it's possible (although perhaps
|
||||
// pathological) for SSE2's min_haystack_len to be bigger than AVX2's.
|
||||
self.sse2.min_haystack_len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
|
||||
let f = Finder::new(needle)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find(haystack, needle))
|
||||
}
|
||||
|
||||
define_substring_forward_quickcheck!(find);
|
||||
|
||||
#[test]
|
||||
fn forward_substring() {
|
||||
crate::tests::substring::Runner::new().fwd(find).run()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
let pair = Pair::with_indices(needle, index1, index2)?;
|
||||
let f = Finder::with_pair(needle, pair)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find(haystack, needle))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair_prefilter() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
if !cfg!(target_feature = "sse2") {
|
||||
return None;
|
||||
}
|
||||
let pair = Pair::with_indices(needle, index1, index2)?;
|
||||
let f = Finder::with_pair(needle, pair)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find_prefilter(haystack))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
}
|
||||
335
third_party/rust/memchr/src/arch/x86_64/memchr.rs
vendored
Normal file
335
third_party/rust/memchr/src/arch/x86_64/memchr.rs
vendored
Normal file
@@ -0,0 +1,335 @@
|
||||
/*!
|
||||
Wrapper routines for `memchr` and friends.
|
||||
|
||||
These routines efficiently dispatch to the best implementation based on what
|
||||
the CPU supports.
|
||||
*/
|
||||
|
||||
/// Provides a way to run a memchr-like function while amortizing the cost of
|
||||
/// runtime CPU feature detection.
|
||||
///
|
||||
/// This works by loading a function pointer from an atomic global. Initially,
|
||||
/// this global is set to a function that does CPU feature detection. For
|
||||
/// example, if AVX2 is enabled, then the AVX2 implementation is used.
|
||||
/// Otherwise, at least on x86_64, the SSE2 implementation is used. (And
|
||||
/// in some niche cases, if SSE2 isn't available, then the architecture
|
||||
/// independent fallback implementation is used.)
|
||||
///
|
||||
/// After the first call to this function, the atomic global is replaced with
|
||||
/// the specific AVX2, SSE2 or fallback routine chosen. Subsequent calls then
|
||||
/// will directly call the chosen routine instead of needing to go through the
|
||||
/// CPU feature detection branching again.
|
||||
///
|
||||
/// This particular macro is specifically written to provide the implementation
|
||||
/// of functions with the following signature:
|
||||
///
|
||||
/// ```ignore
|
||||
/// fn memchr(needle1: u8, start: *const u8, end: *const u8) -> Option<usize>;
|
||||
/// ```
|
||||
///
|
||||
/// Where you can also have `memchr2` and `memchr3`, but with `needle2` and
|
||||
/// `needle3`, respectively. The `start` and `end` parameters correspond to the
|
||||
/// start and end of the haystack, respectively.
|
||||
///
|
||||
/// We use raw pointers here instead of the more obvious `haystack: &[u8]` so
|
||||
/// that the function is compatible with our lower level iterator logic that
|
||||
/// operates on raw pointers. We use this macro to implement "raw" memchr
|
||||
/// routines with the signature above, and then define memchr routines using
|
||||
/// regular slices on top of them.
|
||||
///
|
||||
/// Note that we use `#[cfg(target_feature = "sse2")]` below even though
|
||||
/// it shouldn't be strictly necessary because without it, it seems to
|
||||
/// cause the compiler to blow up. I guess it can't handle a function
|
||||
/// pointer being created with a sse target feature? Dunno. See the
|
||||
/// `build-for-x86-64-but-non-sse-target` CI job if you want to experiment with
|
||||
/// this.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Primarily callers must that `$fnty` is a correct function pointer type and
|
||||
/// not something else.
|
||||
///
|
||||
/// Callers must also ensure that `$memchrty::$memchrfind` corresponds to a
|
||||
/// routine that returns a valid function pointer when a match is found. That
|
||||
/// is, a pointer that is `>= start` and `< end`.
|
||||
///
|
||||
/// Callers must also ensure that the `$hay_start` and `$hay_end` identifiers
|
||||
/// correspond to valid pointers.
|
||||
macro_rules! unsafe_ifunc {
|
||||
(
|
||||
$memchrty:ident,
|
||||
$memchrfind:ident,
|
||||
$fnty:ty,
|
||||
$retty:ty,
|
||||
$hay_start:ident,
|
||||
$hay_end:ident,
|
||||
$($needle:ident),+
|
||||
) => {{
|
||||
#![allow(unused_unsafe)]
|
||||
|
||||
use core::sync::atomic::{AtomicPtr, Ordering};
|
||||
|
||||
type Fn = *mut ();
|
||||
type RealFn = $fnty;
|
||||
static FN: AtomicPtr<()> = AtomicPtr::new(detect as Fn);
|
||||
|
||||
#[cfg(target_feature = "sse2")]
|
||||
#[target_feature(enable = "sse2", enable = "avx2")]
|
||||
unsafe fn find_avx2(
|
||||
$($needle: u8),+,
|
||||
$hay_start: *const u8,
|
||||
$hay_end: *const u8,
|
||||
) -> $retty {
|
||||
use crate::arch::x86_64::avx2::memchr::$memchrty;
|
||||
$memchrty::new_unchecked($($needle),+)
|
||||
.$memchrfind($hay_start, $hay_end)
|
||||
}
|
||||
|
||||
#[cfg(target_feature = "sse2")]
|
||||
#[target_feature(enable = "sse2")]
|
||||
unsafe fn find_sse2(
|
||||
$($needle: u8),+,
|
||||
$hay_start: *const u8,
|
||||
$hay_end: *const u8,
|
||||
) -> $retty {
|
||||
use crate::arch::x86_64::sse2::memchr::$memchrty;
|
||||
$memchrty::new_unchecked($($needle),+)
|
||||
.$memchrfind($hay_start, $hay_end)
|
||||
}
|
||||
|
||||
unsafe fn find_fallback(
|
||||
$($needle: u8),+,
|
||||
$hay_start: *const u8,
|
||||
$hay_end: *const u8,
|
||||
) -> $retty {
|
||||
use crate::arch::all::memchr::$memchrty;
|
||||
$memchrty::new($($needle),+).$memchrfind($hay_start, $hay_end)
|
||||
}
|
||||
|
||||
unsafe fn detect(
|
||||
$($needle: u8),+,
|
||||
$hay_start: *const u8,
|
||||
$hay_end: *const u8,
|
||||
) -> $retty {
|
||||
let fun = {
|
||||
#[cfg(not(target_feature = "sse2"))]
|
||||
{
|
||||
debug!(
|
||||
"no sse2 feature available, using fallback for {}",
|
||||
stringify!($memchrty),
|
||||
);
|
||||
find_fallback as RealFn
|
||||
}
|
||||
#[cfg(target_feature = "sse2")]
|
||||
{
|
||||
use crate::arch::x86_64::{sse2, avx2};
|
||||
if avx2::memchr::$memchrty::is_available() {
|
||||
debug!("chose AVX2 for {}", stringify!($memchrty));
|
||||
find_avx2 as RealFn
|
||||
} else if sse2::memchr::$memchrty::is_available() {
|
||||
debug!("chose SSE2 for {}", stringify!($memchrty));
|
||||
find_sse2 as RealFn
|
||||
} else {
|
||||
debug!("chose fallback for {}", stringify!($memchrty));
|
||||
find_fallback as RealFn
|
||||
}
|
||||
}
|
||||
};
|
||||
FN.store(fun as Fn, Ordering::Relaxed);
|
||||
// SAFETY: The only thing we need to uphold here is the
|
||||
// `#[target_feature]` requirements. Since we check is_available
|
||||
// above before using the corresponding implementation, we are
|
||||
// guaranteed to only call code that is supported on the current
|
||||
// CPU.
|
||||
fun($($needle),+, $hay_start, $hay_end)
|
||||
}
|
||||
|
||||
// SAFETY: By virtue of the caller contract, RealFn is a function
|
||||
// pointer, which is always safe to transmute with a *mut (). Also,
|
||||
// since we use $memchrty::is_available, it is guaranteed to be safe
|
||||
// to call $memchrty::$memchrfind.
|
||||
unsafe {
|
||||
let fun = FN.load(Ordering::Relaxed);
|
||||
core::mem::transmute::<Fn, RealFn>(fun)(
|
||||
$($needle),+,
|
||||
$hay_start,
|
||||
$hay_end,
|
||||
)
|
||||
}
|
||||
}};
|
||||
}
|
||||
|
||||
// The routines below dispatch to AVX2, SSE2 or a fallback routine based on
|
||||
// what's available in the current environment. The secret sauce here is that
|
||||
// we only check for which one to use approximately once, and then "cache" that
|
||||
// choice into a global function pointer. Subsequent invocations then just call
|
||||
// the appropriate function directly.
|
||||
|
||||
/// memchr, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn memchr_raw(
|
||||
n1: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
// SAFETY: We provide a valid function pointer type.
|
||||
unsafe_ifunc!(
|
||||
One,
|
||||
find_raw,
|
||||
unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
|
||||
Option<*const u8>,
|
||||
start,
|
||||
end,
|
||||
n1
|
||||
)
|
||||
}
|
||||
|
||||
/// memrchr, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn memrchr_raw(
|
||||
n1: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
// SAFETY: We provide a valid function pointer type.
|
||||
unsafe_ifunc!(
|
||||
One,
|
||||
rfind_raw,
|
||||
unsafe fn(u8, *const u8, *const u8) -> Option<*const u8>,
|
||||
Option<*const u8>,
|
||||
start,
|
||||
end,
|
||||
n1
|
||||
)
|
||||
}
|
||||
|
||||
/// memchr2, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Two::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn memchr2_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
// SAFETY: We provide a valid function pointer type.
|
||||
unsafe_ifunc!(
|
||||
Two,
|
||||
find_raw,
|
||||
unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
|
||||
Option<*const u8>,
|
||||
start,
|
||||
end,
|
||||
n1,
|
||||
n2
|
||||
)
|
||||
}
|
||||
|
||||
/// memrchr2, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Two::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn memrchr2_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
// SAFETY: We provide a valid function pointer type.
|
||||
unsafe_ifunc!(
|
||||
Two,
|
||||
rfind_raw,
|
||||
unsafe fn(u8, u8, *const u8, *const u8) -> Option<*const u8>,
|
||||
Option<*const u8>,
|
||||
start,
|
||||
end,
|
||||
n1,
|
||||
n2
|
||||
)
|
||||
}
|
||||
|
||||
/// memchr3, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Three::find_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn memchr3_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
n3: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
// SAFETY: We provide a valid function pointer type.
|
||||
unsafe_ifunc!(
|
||||
Three,
|
||||
find_raw,
|
||||
unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
|
||||
Option<*const u8>,
|
||||
start,
|
||||
end,
|
||||
n1,
|
||||
n2,
|
||||
n3
|
||||
)
|
||||
}
|
||||
|
||||
/// memrchr3, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `Three::rfind_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn memrchr3_raw(
|
||||
n1: u8,
|
||||
n2: u8,
|
||||
n3: u8,
|
||||
start: *const u8,
|
||||
end: *const u8,
|
||||
) -> Option<*const u8> {
|
||||
// SAFETY: We provide a valid function pointer type.
|
||||
unsafe_ifunc!(
|
||||
Three,
|
||||
rfind_raw,
|
||||
unsafe fn(u8, u8, u8, *const u8, *const u8) -> Option<*const u8>,
|
||||
Option<*const u8>,
|
||||
start,
|
||||
end,
|
||||
n1,
|
||||
n2,
|
||||
n3
|
||||
)
|
||||
}
|
||||
|
||||
/// Count all matching bytes, but using raw pointers to represent the haystack.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Pointers must be valid. See `One::count_raw`.
|
||||
#[inline(always)]
|
||||
pub(crate) fn count_raw(n1: u8, start: *const u8, end: *const u8) -> usize {
|
||||
// SAFETY: We provide a valid function pointer type.
|
||||
unsafe_ifunc!(
|
||||
One,
|
||||
count_raw,
|
||||
unsafe fn(u8, *const u8, *const u8) -> usize,
|
||||
usize,
|
||||
start,
|
||||
end,
|
||||
n1
|
||||
)
|
||||
}
|
||||
8
third_party/rust/memchr/src/arch/x86_64/mod.rs
vendored
Normal file
8
third_party/rust/memchr/src/arch/x86_64/mod.rs
vendored
Normal file
@@ -0,0 +1,8 @@
|
||||
/*!
|
||||
Vector algorithms for the `x86_64` target.
|
||||
*/
|
||||
|
||||
pub mod avx2;
|
||||
pub mod sse2;
|
||||
|
||||
pub(crate) mod memchr;
|
||||
1077
third_party/rust/memchr/src/arch/x86_64/sse2/memchr.rs
vendored
Normal file
1077
third_party/rust/memchr/src/arch/x86_64/sse2/memchr.rs
vendored
Normal file
File diff suppressed because it is too large
Load Diff
6
third_party/rust/memchr/src/arch/x86_64/sse2/mod.rs
vendored
Normal file
6
third_party/rust/memchr/src/arch/x86_64/sse2/mod.rs
vendored
Normal file
@@ -0,0 +1,6 @@
|
||||
/*!
|
||||
Algorithms for the `x86_64` target using 128-bit vectors via SSE2.
|
||||
*/
|
||||
|
||||
pub mod memchr;
|
||||
pub mod packedpair;
|
||||
232
third_party/rust/memchr/src/arch/x86_64/sse2/packedpair.rs
vendored
Normal file
232
third_party/rust/memchr/src/arch/x86_64/sse2/packedpair.rs
vendored
Normal file
@@ -0,0 +1,232 @@
|
||||
/*!
|
||||
A 128-bit vector implementation of the "packed pair" SIMD algorithm.
|
||||
|
||||
The "packed pair" algorithm is based on the [generic SIMD] algorithm. The main
|
||||
difference is that it (by default) uses a background distribution of byte
|
||||
frequencies to heuristically select the pair of bytes to search for.
|
||||
|
||||
[generic SIMD]: http://0x80.pl/articles/simd-strfind.html#first-and-last
|
||||
*/
|
||||
|
||||
use core::arch::x86_64::__m128i;
|
||||
|
||||
use crate::arch::{all::packedpair::Pair, generic::packedpair};
|
||||
|
||||
/// A "packed pair" finder that uses 128-bit vector operations.
|
||||
///
|
||||
/// This finder picks two bytes that it believes have high predictive power
|
||||
/// for indicating an overall match of a needle. Depending on whether
|
||||
/// `Finder::find` or `Finder::find_prefilter` is used, it reports offsets
|
||||
/// where the needle matches or could match. In the prefilter case, candidates
|
||||
/// are reported whenever the [`Pair`] of bytes given matches.
|
||||
#[derive(Clone, Copy, Debug)]
|
||||
pub struct Finder(packedpair::Finder<__m128i>);
|
||||
|
||||
impl Finder {
|
||||
/// Create a new pair searcher. The searcher returned can either report
|
||||
/// exact matches of `needle` or act as a prefilter and report candidate
|
||||
/// positions of `needle`.
|
||||
///
|
||||
/// If SSE2 is unavailable in the current environment or if a [`Pair`]
|
||||
/// could not be constructed from the needle given, then `None` is
|
||||
/// returned.
|
||||
#[inline]
|
||||
pub fn new(needle: &[u8]) -> Option<Finder> {
|
||||
Finder::with_pair(needle, Pair::new(needle)?)
|
||||
}
|
||||
|
||||
/// Create a new "packed pair" finder using the pair of bytes given.
|
||||
///
|
||||
/// This constructor permits callers to control precisely which pair of
|
||||
/// bytes is used as a predicate.
|
||||
///
|
||||
/// If SSE2 is unavailable in the current environment, then `None` is
|
||||
/// returned.
|
||||
#[inline]
|
||||
pub fn with_pair(needle: &[u8], pair: Pair) -> Option<Finder> {
|
||||
if Finder::is_available() {
|
||||
// SAFETY: we check that sse2 is available above. We are also
|
||||
// guaranteed to have needle.len() > 1 because we have a valid
|
||||
// Pair.
|
||||
unsafe { Some(Finder::with_pair_impl(needle, pair)) }
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
/// Create a new `Finder` specific to SSE2 vectors and routines.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Same as the safety for `packedpair::Finder::new`, and callers must also
|
||||
/// ensure that SSE2 is available.
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[inline]
|
||||
unsafe fn with_pair_impl(needle: &[u8], pair: Pair) -> Finder {
|
||||
let finder = packedpair::Finder::<__m128i>::new(needle, pair);
|
||||
Finder(finder)
|
||||
}
|
||||
|
||||
/// Returns true when this implementation is available in the current
|
||||
/// environment.
|
||||
///
|
||||
/// When this is true, it is guaranteed that [`Finder::with_pair`] will
|
||||
/// return a `Some` value. Similarly, when it is false, it is guaranteed
|
||||
/// that `Finder::with_pair` will return a `None` value. Notice that this
|
||||
/// does not guarantee that [`Finder::new`] will return a `Finder`. Namely,
|
||||
/// even when `Finder::is_available` is true, it is not guaranteed that a
|
||||
/// valid [`Pair`] can be found from the needle given.
|
||||
///
|
||||
/// Note also that for the lifetime of a single program, if this returns
|
||||
/// true then it will always return true.
|
||||
#[inline]
|
||||
pub fn is_available() -> bool {
|
||||
#[cfg(not(target_feature = "sse2"))]
|
||||
{
|
||||
false
|
||||
}
|
||||
#[cfg(target_feature = "sse2")]
|
||||
{
|
||||
true
|
||||
}
|
||||
}
|
||||
|
||||
/// Execute a search using SSE2 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
#[inline]
|
||||
pub fn find(&self, haystack: &[u8], needle: &[u8]) -> Option<usize> {
|
||||
// SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
|
||||
unsafe { self.find_impl(haystack, needle) }
|
||||
}
|
||||
|
||||
/// Run this finder on the given haystack as a prefilter.
|
||||
///
|
||||
/// If a candidate match is found, then an offset where the needle *could*
|
||||
/// begin in the haystack is returned.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
#[inline]
|
||||
pub fn find_prefilter(&self, haystack: &[u8]) -> Option<usize> {
|
||||
// SAFETY: Building a `Finder` means it's safe to call 'sse2' routines.
|
||||
unsafe { self.find_prefilter_impl(haystack) }
|
||||
}
|
||||
|
||||
/// Execute a search using SSE2 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// (The target feature safety obligation is automatically fulfilled by
|
||||
/// virtue of being a method on `Finder`, which can only be constructed
|
||||
/// when it is safe to call `sse2` routines.)
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[inline]
|
||||
unsafe fn find_impl(
|
||||
&self,
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
) -> Option<usize> {
|
||||
self.0.find(haystack, needle)
|
||||
}
|
||||
|
||||
/// Execute a prefilter search using SSE2 vectors and routines.
|
||||
///
|
||||
/// # Panics
|
||||
///
|
||||
/// When `haystack.len()` is less than [`Finder::min_haystack_len`].
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// (The target feature safety obligation is automatically fulfilled by
|
||||
/// virtue of being a method on `Finder`, which can only be constructed
|
||||
/// when it is safe to call `sse2` routines.)
|
||||
#[target_feature(enable = "sse2")]
|
||||
#[inline]
|
||||
unsafe fn find_prefilter_impl(&self, haystack: &[u8]) -> Option<usize> {
|
||||
self.0.find_prefilter(haystack)
|
||||
}
|
||||
|
||||
/// Returns the pair of offsets (into the needle) used to check as a
|
||||
/// predicate before confirming whether a needle exists at a particular
|
||||
/// position.
|
||||
#[inline]
|
||||
pub fn pair(&self) -> &Pair {
|
||||
self.0.pair()
|
||||
}
|
||||
|
||||
/// Returns the minimum haystack length that this `Finder` can search.
|
||||
///
|
||||
/// Using a haystack with length smaller than this in a search will result
|
||||
/// in a panic. The reason for this restriction is that this finder is
|
||||
/// meant to be a low-level component that is part of a larger substring
|
||||
/// strategy. In that sense, it avoids trying to handle all cases and
|
||||
/// instead only handles the cases that it can handle very well.
|
||||
#[inline]
|
||||
pub fn min_haystack_len(&self) -> usize {
|
||||
self.0.min_haystack_len()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
fn find(haystack: &[u8], needle: &[u8]) -> Option<Option<usize>> {
|
||||
let f = Finder::new(needle)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find(haystack, needle))
|
||||
}
|
||||
|
||||
define_substring_forward_quickcheck!(find);
|
||||
|
||||
#[test]
|
||||
fn forward_substring() {
|
||||
crate::tests::substring::Runner::new().fwd(find).run()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
let pair = Pair::with_indices(needle, index1, index2)?;
|
||||
let f = Finder::with_pair(needle, pair)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find(haystack, needle))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn forward_packedpair_prefilter() {
|
||||
fn find(
|
||||
haystack: &[u8],
|
||||
needle: &[u8],
|
||||
index1: u8,
|
||||
index2: u8,
|
||||
) -> Option<Option<usize>> {
|
||||
let pair = Pair::with_indices(needle, index1, index2)?;
|
||||
let f = Finder::with_pair(needle, pair)?;
|
||||
if haystack.len() < f.min_haystack_len() {
|
||||
return None;
|
||||
}
|
||||
Some(f.find_prefilter(haystack))
|
||||
}
|
||||
crate::tests::packedpair::Runner::new().fwd(find).run()
|
||||
}
|
||||
}
|
||||
70
third_party/rust/memchr/src/cow.rs
vendored
70
third_party/rust/memchr/src/cow.rs
vendored
@@ -4,22 +4,23 @@ use core::ops;
|
||||
///
|
||||
/// The purpose of this type is to permit usage of a "borrowed or owned
|
||||
/// byte string" in a way that keeps std/no-std compatibility. That is, in
|
||||
/// no-std mode, this type devolves into a simple &[u8] with no owned variant
|
||||
/// available. We can't just use a plain Cow because Cow is not in core.
|
||||
/// no-std/alloc mode, this type devolves into a simple &[u8] with no owned
|
||||
/// variant available. We can't just use a plain Cow because Cow is not in
|
||||
/// core.
|
||||
#[derive(Clone, Debug)]
|
||||
pub struct CowBytes<'a>(Imp<'a>);
|
||||
|
||||
// N.B. We don't use std::borrow::Cow here since we can get away with a
|
||||
// N.B. We don't use alloc::borrow::Cow here since we can get away with a
|
||||
// Box<[u8]> for our use case, which is 1/3 smaller than the Vec<u8> that
|
||||
// a Cow<[u8]> would use.
|
||||
#[cfg(feature = "std")]
|
||||
#[cfg(feature = "alloc")]
|
||||
#[derive(Clone, Debug)]
|
||||
enum Imp<'a> {
|
||||
Borrowed(&'a [u8]),
|
||||
Owned(Box<[u8]>),
|
||||
Owned(alloc::boxed::Box<[u8]>),
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "std"))]
|
||||
#[cfg(not(feature = "alloc"))]
|
||||
#[derive(Clone, Debug)]
|
||||
struct Imp<'a>(&'a [u8]);
|
||||
|
||||
@@ -35,21 +36,21 @@ impl<'a> ops::Deref for CowBytes<'a> {
|
||||
impl<'a> CowBytes<'a> {
|
||||
/// Create a new borrowed CowBytes.
|
||||
#[inline(always)]
|
||||
pub fn new<B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> CowBytes<'a> {
|
||||
pub(crate) fn new<B: ?Sized + AsRef<[u8]>>(bytes: &'a B) -> CowBytes<'a> {
|
||||
CowBytes(Imp::new(bytes.as_ref()))
|
||||
}
|
||||
|
||||
/// Create a new owned CowBytes.
|
||||
#[cfg(feature = "std")]
|
||||
#[cfg(feature = "alloc")]
|
||||
#[inline(always)]
|
||||
pub fn new_owned(bytes: Box<[u8]>) -> CowBytes<'static> {
|
||||
fn new_owned(bytes: alloc::boxed::Box<[u8]>) -> CowBytes<'static> {
|
||||
CowBytes(Imp::Owned(bytes))
|
||||
}
|
||||
|
||||
/// Return a borrowed byte string, regardless of whether this is an owned
|
||||
/// or borrowed byte string internally.
|
||||
#[inline(always)]
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
pub(crate) fn as_slice(&self) -> &[u8] {
|
||||
self.0.as_slice()
|
||||
}
|
||||
|
||||
@@ -57,39 +58,48 @@ impl<'a> CowBytes<'a> {
|
||||
///
|
||||
/// If this is already an owned byte string internally, then this is a
|
||||
/// no-op. Otherwise, the internal byte string is copied.
|
||||
#[cfg(feature = "std")]
|
||||
#[cfg(feature = "alloc")]
|
||||
#[inline(always)]
|
||||
pub fn into_owned(self) -> CowBytes<'static> {
|
||||
pub(crate) fn into_owned(self) -> CowBytes<'static> {
|
||||
match self.0 {
|
||||
Imp::Borrowed(b) => CowBytes::new_owned(Box::from(b)),
|
||||
Imp::Borrowed(b) => {
|
||||
CowBytes::new_owned(alloc::boxed::Box::from(b))
|
||||
}
|
||||
Imp::Owned(b) => CowBytes::new_owned(b),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Imp<'a> {
|
||||
#[cfg(feature = "std")]
|
||||
#[inline(always)]
|
||||
pub fn new(bytes: &'a [u8]) -> Imp<'a> {
|
||||
Imp::Borrowed(bytes)
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "std"))]
|
||||
#[inline(always)]
|
||||
pub fn new(bytes: &'a [u8]) -> Imp<'a> {
|
||||
Imp(bytes)
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
#[inline(always)]
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
match self {
|
||||
Imp::Owned(ref x) => x,
|
||||
Imp::Borrowed(x) => x,
|
||||
#[cfg(feature = "alloc")]
|
||||
{
|
||||
Imp::Borrowed(bytes)
|
||||
}
|
||||
#[cfg(not(feature = "alloc"))]
|
||||
{
|
||||
Imp(bytes)
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "std"))]
|
||||
#[cfg(feature = "alloc")]
|
||||
#[inline(always)]
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
#[cfg(feature = "alloc")]
|
||||
{
|
||||
match self {
|
||||
Imp::Owned(ref x) => x,
|
||||
Imp::Borrowed(x) => x,
|
||||
}
|
||||
}
|
||||
#[cfg(not(feature = "alloc"))]
|
||||
{
|
||||
self.0
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(not(feature = "alloc"))]
|
||||
#[inline(always)]
|
||||
pub fn as_slice(&self) -> &[u8] {
|
||||
self.0
|
||||
|
||||
54
third_party/rust/memchr/src/ext.rs
vendored
Normal file
54
third_party/rust/memchr/src/ext.rs
vendored
Normal file
@@ -0,0 +1,54 @@
|
||||
/// A trait for adding some helper routines to pointers.
|
||||
pub(crate) trait Pointer {
|
||||
/// Returns the distance, in units of `T`, between `self` and `origin`.
|
||||
///
|
||||
/// # Safety
|
||||
///
|
||||
/// Same as `ptr::offset_from` in addition to `self >= origin`.
|
||||
unsafe fn distance(self, origin: Self) -> usize;
|
||||
|
||||
/// Casts this pointer to `usize`.
|
||||
///
|
||||
/// Callers should not convert the `usize` back to a pointer if at all
|
||||
/// possible. (And if you believe it's necessary, open an issue to discuss
|
||||
/// why. Otherwise, it has the potential to violate pointer provenance.)
|
||||
/// The purpose of this function is just to be able to do arithmetic, i.e.,
|
||||
/// computing offsets or alignments.
|
||||
fn as_usize(self) -> usize;
|
||||
}
|
||||
|
||||
impl<T> Pointer for *const T {
|
||||
unsafe fn distance(self, origin: *const T) -> usize {
|
||||
// TODO: Replace with `ptr::sub_ptr` once stabilized.
|
||||
usize::try_from(self.offset_from(origin)).unwrap_unchecked()
|
||||
}
|
||||
|
||||
fn as_usize(self) -> usize {
|
||||
self as usize
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Pointer for *mut T {
|
||||
unsafe fn distance(self, origin: *mut T) -> usize {
|
||||
(self as *const T).distance(origin as *const T)
|
||||
}
|
||||
|
||||
fn as_usize(self) -> usize {
|
||||
(self as *const T).as_usize()
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait for adding some helper routines to raw bytes.
|
||||
#[cfg(test)]
|
||||
pub(crate) trait Byte {
|
||||
/// Converts this byte to a `char` if it's ASCII. Otherwise panics.
|
||||
fn to_char(self) -> char;
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
impl Byte for u8 {
|
||||
fn to_char(self) -> char {
|
||||
assert!(self.is_ascii());
|
||||
char::from(self)
|
||||
}
|
||||
}
|
||||
90
third_party/rust/memchr/src/lib.rs
vendored
90
third_party/rust/memchr/src/lib.rs
vendored
@@ -113,9 +113,9 @@ solution presented above, however, its throughput can easily be over an
|
||||
order of magnitude faster. This is a good general purpose trade off to make.
|
||||
You rarely lose, but often gain big.
|
||||
|
||||
**NOTE:** The name `memchr` comes from the corresponding routine in libc. A key
|
||||
advantage of using this library is that its performance is not tied to its
|
||||
quality of implementation in the libc you happen to be using, which can vary
|
||||
**NOTE:** The name `memchr` comes from the corresponding routine in `libc`. A
|
||||
key advantage of using this library is that its performance is not tied to its
|
||||
quality of implementation in the `libc` you happen to be using, which can vary
|
||||
greatly from platform to platform.
|
||||
|
||||
But what about substring search? This one is a bit more complicated. The
|
||||
@@ -131,32 +131,58 @@ implementation in the standard library, even if only for searching on UTF-8?
|
||||
The reason is that the implementation details for using SIMD in the standard
|
||||
library haven't quite been worked out yet.
|
||||
|
||||
**NOTE:** Currently, only `x86_64` targets have highly accelerated
|
||||
implementations of substring search. For `memchr`, all targets have
|
||||
somewhat-accelerated implementations, while only `x86_64` targets have highly
|
||||
accelerated implementations. This limitation is expected to be lifted once the
|
||||
standard library exposes a platform independent SIMD API.
|
||||
**NOTE:** Currently, only `x86_64`, `wasm32` and `aarch64` targets have vector
|
||||
accelerated implementations of `memchr` (and friends) and `memmem`.
|
||||
|
||||
# Crate features
|
||||
|
||||
* **std** - When enabled (the default), this will permit this crate to use
|
||||
features specific to the standard library. Currently, the only thing used
|
||||
from the standard library is runtime SIMD CPU feature detection. This means
|
||||
that this feature must be enabled to get AVX accelerated routines. When
|
||||
`std` is not enabled, this crate will still attempt to use SSE2 accelerated
|
||||
routines on `x86_64`.
|
||||
* **libc** - When enabled (**not** the default), this library will use your
|
||||
platform's libc implementation of `memchr` (and `memrchr` on Linux). This
|
||||
can be useful on non-`x86_64` targets where the fallback implementation in
|
||||
this crate is not as good as the one found in your libc. All other routines
|
||||
(e.g., `memchr[23]` and substring search) unconditionally use the
|
||||
implementation in this crate.
|
||||
* **std** - When enabled (the default), this will permit features specific to
|
||||
the standard library. Currently, the only thing used from the standard library
|
||||
is runtime SIMD CPU feature detection. This means that this feature must be
|
||||
enabled to get AVX2 accelerated routines on `x86_64` targets without enabling
|
||||
the `avx2` feature at compile time, for example. When `std` is not enabled,
|
||||
this crate will still attempt to use SSE2 accelerated routines on `x86_64`. It
|
||||
will also use AVX2 accelerated routines when the `avx2` feature is enabled at
|
||||
compile time. In general, enable this feature if you can.
|
||||
* **alloc** - When enabled (the default), APIs in this crate requiring some
|
||||
kind of allocation will become available. For example, the
|
||||
[`memmem::Finder::into_owned`](crate::memmem::Finder::into_owned) API and the
|
||||
[`arch::all::shiftor`](crate::arch::all::shiftor) substring search
|
||||
implementation. Otherwise, this crate is designed from the ground up to be
|
||||
usable in core-only contexts, so the `alloc` feature doesn't add much
|
||||
currently. Notably, disabling `std` but enabling `alloc` will **not** result
|
||||
in the use of AVX2 on `x86_64` targets unless the `avx2` feature is enabled
|
||||
at compile time. (With `std` enabled, AVX2 can be used even without the `avx2`
|
||||
feature enabled at compile time by way of runtime CPU feature detection.)
|
||||
* **logging** - When enabled (disabled by default), the `log` crate is used
|
||||
to emit log messages about what kinds of `memchr` and `memmem` algorithms
|
||||
are used. Namely, both `memchr` and `memmem` have a number of different
|
||||
implementation choices depending on the target and CPU, and the log messages
|
||||
can help show what specific implementations are being used. Generally, this is
|
||||
useful for debugging performance issues.
|
||||
* **libc** - **DEPRECATED**. Previously, this enabled the use of the target's
|
||||
`memchr` function from whatever `libc` was linked into the program. This
|
||||
feature is now a no-op because this crate's implementation of `memchr` should
|
||||
now be sufficiently fast on a number of platforms that `libc` should no longer
|
||||
be needed. (This feature is somewhat of a holdover from this crate's origins.
|
||||
Originally, this crate was literally just a safe wrapper function around the
|
||||
`memchr` function from `libc`.)
|
||||
*/
|
||||
|
||||
#![deny(missing_docs)]
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
// It's not worth trying to gate all code on just miri, so turn off relevant
|
||||
// dead code warnings.
|
||||
#![no_std]
|
||||
// It's just not worth trying to squash all dead code warnings. Pretty
|
||||
// unfortunate IMO. Not really sure how to fix this other than to either
|
||||
// live with it or sprinkle a whole mess of `cfg` annotations everywhere.
|
||||
#![cfg_attr(
|
||||
not(any(
|
||||
all(target_arch = "x86_64", target_feature = "sse2"),
|
||||
all(target_arch = "wasm32", target_feature = "simd128"),
|
||||
target_arch = "aarch64",
|
||||
)),
|
||||
allow(dead_code)
|
||||
)]
|
||||
// Same deal for miri.
|
||||
#![cfg_attr(miri, allow(dead_code, unused_macros))]
|
||||
|
||||
// Supporting 8-bit (or others) would be fine. If you need it, please submit a
|
||||
@@ -168,14 +194,28 @@ standard library exposes a platform independent SIMD API.
|
||||
)))]
|
||||
compile_error!("memchr currently not supported on non-{16,32,64}");
|
||||
|
||||
#[cfg(any(test, feature = "std"))]
|
||||
extern crate std;
|
||||
|
||||
#[cfg(any(test, feature = "alloc"))]
|
||||
extern crate alloc;
|
||||
|
||||
pub use crate::memchr::{
|
||||
memchr, memchr2, memchr2_iter, memchr3, memchr3_iter, memchr_iter,
|
||||
memrchr, memrchr2, memrchr2_iter, memrchr3, memrchr3_iter, memrchr_iter,
|
||||
Memchr, Memchr2, Memchr3,
|
||||
};
|
||||
|
||||
#[macro_use]
|
||||
mod macros;
|
||||
|
||||
#[cfg(test)]
|
||||
#[macro_use]
|
||||
mod tests;
|
||||
|
||||
pub mod arch;
|
||||
mod cow;
|
||||
mod ext;
|
||||
mod memchr;
|
||||
pub mod memmem;
|
||||
#[cfg(test)]
|
||||
mod tests;
|
||||
mod vector;
|
||||
|
||||
20
third_party/rust/memchr/src/macros.rs
vendored
Normal file
20
third_party/rust/memchr/src/macros.rs
vendored
Normal file
@@ -0,0 +1,20 @@
|
||||
// Some feature combinations result in some of these macros never being used.
|
||||
// Which is fine. Just squash the warnings.
|
||||
#![allow(unused_macros)]
|
||||
|
||||
macro_rules! log {
|
||||
($($tt:tt)*) => {
|
||||
#[cfg(feature = "logging")]
|
||||
{
|
||||
$($tt)*
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
macro_rules! debug {
|
||||
($($tt:tt)*) => { log!(log::debug!($($tt)*)) }
|
||||
}
|
||||
|
||||
macro_rules! trace {
|
||||
($($tt:tt)*) => { log!(log::trace!($($tt)*)) }
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user