Bug 1932316 - update Rust shlex crate to v1.0.3 r=valentin,supply-chain-reviewers
Differential Revision: https://phabricator.services.mozilla.com/D229591
This commit is contained in:
4
Cargo.lock
generated
4
Cargo.lock
generated
@@ -5714,9 +5714,9 @@ dependencies = [
|
||||
|
||||
[[package]]
|
||||
name = "shlex"
|
||||
version = "1.1.0"
|
||||
version = "1.3.0"
|
||||
source = "registry+https://github.com/rust-lang/crates.io-index"
|
||||
checksum = "43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"
|
||||
checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
|
||||
|
||||
[[package]]
|
||||
name = "signature_cache"
|
||||
|
||||
@@ -4384,6 +4384,11 @@ who = "Simon Friedberger <simon@mozilla.com>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "0.10.6 -> 0.10.7"
|
||||
|
||||
[[audits.shlex]]
|
||||
who = "Max Inden <mail@max-inden.de>"
|
||||
criteria = "safe-to-deploy"
|
||||
delta = "1.1.0 -> 1.3.0"
|
||||
|
||||
[[audits.slab]]
|
||||
who = "Mike Hommey <mh+mozilla@glandium.org>"
|
||||
criteria = "safe-to-deploy"
|
||||
|
||||
2
third_party/rust/shlex/.cargo-checksum.json
vendored
2
third_party/rust/shlex/.cargo-checksum.json
vendored
@@ -1 +1 @@
|
||||
{"files":{"CHANGELOG.md":"ae8160bce335d8cb67f0d522402ed7bdb47266ca774d2ba3edc661783c86bbbe","Cargo.toml":"cdc548ec58d7bcee2494dcab1de5996cdfc748622d685e1cf74a50d54edbdf34","LICENSE-APACHE":"553fffcd9b1cb158bc3e9edc35da85ca5c3b3d7d2e61c883ebcfa8a65814b583","LICENSE-MIT":"4455bf75a91154108304cb283e0fea9948c14f13e20d60887cf2552449dea3b1","README.md":"7b378c1f3f7a3c7a8a819a736a43aa6e5d984d11b412224ef25597dd1ae2fac2","src/lib.rs":"1a3880eb7688af89736e52de8deac316698e664b8b1b64f80c346bf79b18f8b8"},"package":"43b2853a4d09f215c24cc5489c992ce46052d359b5109343cbafbf26bc62f8a3"}
|
||||
{"files":{"CHANGELOG.md":"879a16b3fef6fb3251fcac516fe73414109e3b7df5eb2ec4863a7551674038a0","Cargo.toml":"d7eb8c4bce681b4dd1dfc2c98c649754390775f38f4796d491948ddbb53aa2ef","LICENSE-APACHE":"553fffcd9b1cb158bc3e9edc35da85ca5c3b3d7d2e61c883ebcfa8a65814b583","LICENSE-MIT":"4455bf75a91154108304cb283e0fea9948c14f13e20d60887cf2552449dea3b1","README.md":"082e505bba5dffc5904af5602b45d01129173e617db62c81e6c11d71c964ea71","src/bytes.rs":"eadfffcdb7846d341ba451d6118d275b9d0f14a9554984ccfcdbe9a8d77ec5ee","src/lib.rs":"44c8fb929e1443f2446d26025a9bcfca0b329811bbc309b4a6afb8ec17d7de8d","src/quoting_warning.md":"566d6509211ddcd4afbd4f1117c5234567f6b6d01f5da60acfaef011362be045"},"package":"0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"}
|
||||
4
third_party/rust/shlex/CHANGELOG.md
vendored
4
third_party/rust/shlex/CHANGELOG.md
vendored
@@ -1,3 +1,7 @@
|
||||
# 1.2.0
|
||||
|
||||
* Adds `bytes` module to support operating directly on byte strings.
|
||||
|
||||
# 1.1.0
|
||||
|
||||
* Adds the `std` feature (enabled by default)
|
||||
|
||||
27
third_party/rust/shlex/Cargo.toml
vendored
27
third_party/rust/shlex/Cargo.toml
vendored
@@ -3,19 +3,30 @@
|
||||
# When uploading crates to the registry Cargo will automatically
|
||||
# "normalize" Cargo.toml files for maximal compatibility
|
||||
# with all versions of Cargo and also rewrite `path` dependencies
|
||||
# to registry (e.g., crates.io) dependencies
|
||||
# to registry (e.g., crates.io) dependencies.
|
||||
#
|
||||
# If you believe there's an error in this file please file an
|
||||
# issue against the rust-lang/cargo repository. If you're
|
||||
# editing this file be aware that the upstream Cargo.toml
|
||||
# will likely look very different (and much more reasonable)
|
||||
# If you are reading this file be aware that the original Cargo.toml
|
||||
# will likely look very different (and much more reasonable).
|
||||
# See Cargo.toml.orig for the original contents.
|
||||
|
||||
[package]
|
||||
rust-version = "1.46.0"
|
||||
name = "shlex"
|
||||
version = "1.1.0"
|
||||
authors = ["comex <comexk@gmail.com>", "Fenhl <fenhl@fenhl.net>"]
|
||||
version = "1.3.0"
|
||||
authors = [
|
||||
"comex <comexk@gmail.com>",
|
||||
"Fenhl <fenhl@fenhl.net>",
|
||||
"Adrian Taylor <adetaylor@chromium.org>",
|
||||
"Alex Touchet <alextouchet@outlook.com>",
|
||||
"Daniel Parks <dp+git@oxidized.org>",
|
||||
"Garrett Berg <googberg@gmail.com>",
|
||||
]
|
||||
description = "Split a string into shell words, like Python's shlex."
|
||||
categories = ["command-line-interface", "parser-implementations"]
|
||||
readme = "README.md"
|
||||
categories = [
|
||||
"command-line-interface",
|
||||
"parser-implementations",
|
||||
]
|
||||
license = "MIT OR Apache-2.0"
|
||||
repository = "https://github.com/comex/rust-shlex"
|
||||
|
||||
|
||||
13
third_party/rust/shlex/README.md
vendored
13
third_party/rust/shlex/README.md
vendored
@@ -1,3 +1,11 @@
|
||||
[![ci badge]][ci link] [![crates.io badge]][crates.io link] [![docs.rs badge]][docs.rs link]
|
||||
|
||||
[crates.io badge]: https://img.shields.io/crates/v/shlex.svg?style=flat-square
|
||||
[crates.io link]: https://crates.io/crates/shlex
|
||||
[docs.rs badge]: https://img.shields.io/badge/docs-online-dddddd.svg?style=flat-square
|
||||
[docs.rs link]: https://docs.rs/shlex
|
||||
[ci badge]: https://img.shields.io/github/actions/workflow/status/comex/rust-shlex/test.yml?branch=master&style=flat-square
|
||||
[ci link]: https://github.com/comex/rust-shlex/actions
|
||||
|
||||
Same idea as (but implementation not directly based on) the Python shlex
|
||||
module. However, this implementation does not support any of the Python
|
||||
@@ -8,8 +16,9 @@ You only get the default settings of shlex.split, which mimic the POSIX shell:
|
||||
This implementation also deviates from the Python version in not treating \r
|
||||
specially, which I believe is more compliant.
|
||||
|
||||
The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate
|
||||
over the bytes directly as a micro-optimization.
|
||||
This crate can be used on either normal Rust strings, or on byte strings with
|
||||
the `bytes` module. The algorithms used are oblivious to UTF-8 high bytes, so
|
||||
internally they all work on bytes directly as a micro-optimization.
|
||||
|
||||
Disabling the `std` feature (which is enabled by default) will allow the crate
|
||||
to work in `no_std` environments, where the `alloc` crate, and a global
|
||||
|
||||
576
third_party/rust/shlex/src/bytes.rs
vendored
Normal file
576
third_party/rust/shlex/src/bytes.rs
vendored
Normal file
@@ -0,0 +1,576 @@
|
||||
// Copyright 2015 Nicholas Allegra (comex).
|
||||
// Licensed under the Apache License, Version 2.0 <https://www.apache.org/licenses/LICENSE-2.0> or
|
||||
// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
|
||||
// copied, modified, or distributed except according to those terms.
|
||||
|
||||
//! [`Shlex`] and friends for byte strings.
|
||||
//!
|
||||
//! This is used internally by the [outer module](crate), and may be more
|
||||
//! convenient if you are working with byte slices (`[u8]`) or types that are
|
||||
//! wrappers around bytes, such as [`OsStr`](std::ffi::OsStr):
|
||||
//!
|
||||
//! ```rust
|
||||
//! #[cfg(unix)] {
|
||||
//! use shlex::bytes::quote;
|
||||
//! use std::ffi::OsStr;
|
||||
//! use std::os::unix::ffi::OsStrExt;
|
||||
//!
|
||||
//! // `\x80` is invalid in UTF-8.
|
||||
//! let os_str = OsStr::from_bytes(b"a\x80b c");
|
||||
//! assert_eq!(quote(os_str.as_bytes()), &b"'a\x80b c'"[..]);
|
||||
//! }
|
||||
//! ```
|
||||
//!
|
||||
//! (On Windows, `OsStr` uses 16 bit wide characters so this will not work.)
|
||||
|
||||
extern crate alloc;
|
||||
use alloc::vec::Vec;
|
||||
use alloc::borrow::Cow;
|
||||
#[cfg(test)]
|
||||
use alloc::vec;
|
||||
#[cfg(test)]
|
||||
use alloc::borrow::ToOwned;
|
||||
#[cfg(all(doc, not(doctest)))]
|
||||
use crate::{self as shlex, quoting_warning};
|
||||
|
||||
use super::QuoteError;
|
||||
|
||||
/// An iterator that takes an input byte string and splits it into the words using the same syntax as
|
||||
/// the POSIX shell.
|
||||
pub struct Shlex<'a> {
|
||||
in_iter: core::slice::Iter<'a, u8>,
|
||||
/// The number of newlines read so far, plus one.
|
||||
pub line_no: usize,
|
||||
/// An input string is erroneous if it ends while inside a quotation or right after an
|
||||
/// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that
|
||||
/// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
|
||||
/// true; best to check it after you're done iterating.
|
||||
pub had_error: bool,
|
||||
}
|
||||
|
||||
impl<'a> Shlex<'a> {
|
||||
pub fn new(in_bytes: &'a [u8]) -> Self {
|
||||
Shlex {
|
||||
in_iter: in_bytes.iter(),
|
||||
line_no: 1,
|
||||
had_error: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_word(&mut self, mut ch: u8) -> Option<Vec<u8>> {
|
||||
let mut result: Vec<u8> = Vec::new();
|
||||
loop {
|
||||
match ch as char {
|
||||
'"' => if let Err(()) = self.parse_double(&mut result) {
|
||||
self.had_error = true;
|
||||
return None;
|
||||
},
|
||||
'\'' => if let Err(()) = self.parse_single(&mut result) {
|
||||
self.had_error = true;
|
||||
return None;
|
||||
},
|
||||
'\\' => if let Some(ch2) = self.next_char() {
|
||||
if ch2 != '\n' as u8 { result.push(ch2); }
|
||||
} else {
|
||||
self.had_error = true;
|
||||
return None;
|
||||
},
|
||||
' ' | '\t' | '\n' => { break; },
|
||||
_ => { result.push(ch as u8); },
|
||||
}
|
||||
if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
|
||||
}
|
||||
Some(result)
|
||||
}
|
||||
|
||||
fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
|
||||
loop {
|
||||
if let Some(ch2) = self.next_char() {
|
||||
match ch2 as char {
|
||||
'\\' => {
|
||||
if let Some(ch3) = self.next_char() {
|
||||
match ch3 as char {
|
||||
// \$ => $
|
||||
'$' | '`' | '"' | '\\' => { result.push(ch3); },
|
||||
// \<newline> => nothing
|
||||
'\n' => {},
|
||||
// \x => =x
|
||||
_ => { result.push('\\' as u8); result.push(ch3); }
|
||||
}
|
||||
} else {
|
||||
return Err(());
|
||||
}
|
||||
},
|
||||
'"' => { return Ok(()); },
|
||||
_ => { result.push(ch2); },
|
||||
}
|
||||
} else {
|
||||
return Err(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
|
||||
loop {
|
||||
if let Some(ch2) = self.next_char() {
|
||||
match ch2 as char {
|
||||
'\'' => { return Ok(()); },
|
||||
_ => { result.push(ch2); },
|
||||
}
|
||||
} else {
|
||||
return Err(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn next_char(&mut self) -> Option<u8> {
|
||||
let res = self.in_iter.next().copied();
|
||||
if res == Some(b'\n') { self.line_no += 1; }
|
||||
res
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Shlex<'a> {
|
||||
type Item = Vec<u8>;
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
if let Some(mut ch) = self.next_char() {
|
||||
// skip initial whitespace
|
||||
loop {
|
||||
match ch as char {
|
||||
' ' | '\t' | '\n' => {},
|
||||
'#' => {
|
||||
while let Some(ch2) = self.next_char() {
|
||||
if ch2 as char == '\n' { break; }
|
||||
}
|
||||
},
|
||||
_ => { break; }
|
||||
}
|
||||
if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
|
||||
}
|
||||
self.parse_word(ch)
|
||||
} else { // no initial character
|
||||
None
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/// Convenience function that consumes the whole byte string at once. Returns None if the input was
|
||||
/// erroneous.
|
||||
pub fn split(in_bytes: &[u8]) -> Option<Vec<Vec<u8>>> {
|
||||
let mut shl = Shlex::new(in_bytes);
|
||||
let res = shl.by_ref().collect();
|
||||
if shl.had_error { None } else { Some(res) }
|
||||
}
|
||||
|
||||
/// A more configurable interface to quote strings. If you only want the default settings you can
|
||||
/// use the convenience functions [`try_quote`] and [`try_join`].
|
||||
///
|
||||
/// The string equivalent is [`shlex::Quoter`].
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct Quoter {
|
||||
allow_nul: bool,
|
||||
// TODO: more options
|
||||
}
|
||||
|
||||
impl Quoter {
|
||||
/// Create a new [`Quoter`] with default settings.
|
||||
#[inline]
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not
|
||||
/// allowed and will result in an error of [`QuoteError::Nul`].
|
||||
#[inline]
|
||||
pub fn allow_nul(mut self, allow: bool) -> Self {
|
||||
self.allow_nul = allow;
|
||||
self
|
||||
}
|
||||
|
||||
/// Convenience function that consumes an iterable of words and turns it into a single byte string,
|
||||
/// quoting words when necessary. Consecutive words will be separated by a single space.
|
||||
pub fn join<'a, I: IntoIterator<Item = &'a [u8]>>(&self, words: I) -> Result<Vec<u8>, QuoteError> {
|
||||
Ok(words.into_iter()
|
||||
.map(|word| self.quote(word))
|
||||
.collect::<Result<Vec<Cow<[u8]>>, QuoteError>>()?
|
||||
.join(&b' '))
|
||||
}
|
||||
|
||||
/// Given a single word, return a byte string suitable to encode it as a shell argument.
|
||||
///
|
||||
/// If given valid UTF-8, this will never produce invalid UTF-8. This is because it only
|
||||
/// ever inserts valid ASCII characters before or after existing ASCII characters (or
|
||||
/// returns two single quotes if the input was an empty string). It will never modify a
|
||||
/// multibyte UTF-8 character.
|
||||
pub fn quote<'a>(&self, mut in_bytes: &'a [u8]) -> Result<Cow<'a, [u8]>, QuoteError> {
|
||||
if in_bytes.is_empty() {
|
||||
// Empty string. Special case that isn't meaningful as only part of a word.
|
||||
return Ok(b"''"[..].into());
|
||||
}
|
||||
if !self.allow_nul && in_bytes.iter().any(|&b| b == b'\0') {
|
||||
return Err(QuoteError::Nul);
|
||||
}
|
||||
let mut out: Vec<u8> = Vec::new();
|
||||
while !in_bytes.is_empty() {
|
||||
// Pick a quoting strategy for some prefix of the input. Normally this will cover the
|
||||
// entire input, but in some case we might need to divide the input into multiple chunks
|
||||
// that are quoted differently.
|
||||
let (cur_len, strategy) = quoting_strategy(in_bytes);
|
||||
if cur_len == in_bytes.len() && strategy == QuotingStrategy::Unquoted && out.is_empty() {
|
||||
// Entire string can be represented unquoted. Reuse the allocation.
|
||||
return Ok(in_bytes.into());
|
||||
}
|
||||
let (cur_chunk, rest) = in_bytes.split_at(cur_len);
|
||||
assert!(rest.len() < in_bytes.len()); // no infinite loop
|
||||
in_bytes = rest;
|
||||
append_quoted_chunk(&mut out, cur_chunk, strategy);
|
||||
}
|
||||
Ok(out.into())
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
#[derive(PartialEq)]
|
||||
enum QuotingStrategy {
|
||||
/// No quotes and no backslash escapes. (If backslash escapes would be necessary, we use a
|
||||
/// different strategy instead.)
|
||||
Unquoted,
|
||||
/// Single quoted.
|
||||
SingleQuoted,
|
||||
/// Double quotes, potentially with backslash escapes.
|
||||
DoubleQuoted,
|
||||
// TODO: add $'xxx' and "$(printf 'xxx')" styles
|
||||
}
|
||||
|
||||
/// Is this ASCII byte okay to emit unquoted?
|
||||
const fn unquoted_ok(c: u8) -> bool {
|
||||
match c as char {
|
||||
// Allowed characters:
|
||||
'+' | '-' | '.' | '/' | ':' | '@' | ']' | '_' |
|
||||
'0'..='9' | 'A'..='Z' | 'a'..='z'
|
||||
=> true,
|
||||
|
||||
// Non-allowed characters:
|
||||
// From POSIX https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html
|
||||
// "The application shall quote the following characters if they are to represent themselves:"
|
||||
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' | '\n' |
|
||||
// "and the following may need to be quoted under certain circumstances[..]:"
|
||||
'*' | '?' | '[' | '#' | '~' | '=' | '%' |
|
||||
// Brace expansion. These ought to be in the POSIX list but aren't yet;
|
||||
// see: https://www.austingroupbugs.net/view.php?id=1193
|
||||
'{' | '}' |
|
||||
// Also quote comma, just to be safe in the extremely odd case that the user of this crate
|
||||
// is intentionally placing a quoted string inside a brace expansion, e.g.:
|
||||
// format!("echo foo{{a,b,{}}}" | shlex::quote(some_str))
|
||||
',' |
|
||||
// '\r' is allowed in a word by all real shells I tested, but is treated as a word
|
||||
// separator by Python `shlex` | and might be translated to '\n' in interactive mode.
|
||||
'\r' |
|
||||
// '!' and '^' are treated specially in interactive mode; see quoting_warning.
|
||||
'!' | '^' |
|
||||
// Nul bytes and control characters.
|
||||
'\x00' ..= '\x1f' | '\x7f'
|
||||
=> false,
|
||||
'\u{80}' ..= '\u{10ffff}' => {
|
||||
// This is unreachable since `unquoted_ok` is only called for 0..128.
|
||||
// Non-ASCII bytes are handled separately in `quoting_strategy`.
|
||||
// Can't call unreachable!() from `const fn` on old Rust, so...
|
||||
unquoted_ok(c)
|
||||
},
|
||||
}
|
||||
// Note: The logic cited above for quoting comma might suggest that `..` should also be quoted,
|
||||
// it as a special case of brace expansion). But it's not necessary. There are three cases:
|
||||
//
|
||||
// 1. The user wants comma-based brace expansion, but the untrusted string being `quote`d
|
||||
// contains `..`, so they get something like `{foo,bar,3..5}`.
|
||||
// => That's safe; both Bash and Zsh expand this to `foo bar 3..5` rather than
|
||||
// `foo bar 3 4 5`. The presence of commas disables sequence expression expansion.
|
||||
//
|
||||
// 2. The user wants comma-based brace expansion where the contents of the braces are a
|
||||
// variable number of `quote`d strings and nothing else. There happens to be exactly
|
||||
// one string and it contains `..`, so they get something like `{3..5}`.
|
||||
// => Then this will expand as a sequence expression, which is unintended. But I don't mind,
|
||||
// because any such code is already buggy. Suppose the untrusted string *didn't* contain
|
||||
// `,` or `..`, resulting in shell input like `{foo}`. Then the shell would interpret it
|
||||
// as the literal string `{foo}` rather than brace-expanding it into `foo`.
|
||||
//
|
||||
// 3. The user wants a sequence expression and wants to supply an untrusted string as one of
|
||||
// the endpoints or the increment.
|
||||
// => Well, that's just silly, since the endpoints can only be numbers or single letters.
|
||||
}
|
||||
|
||||
/// Optimized version of `unquoted_ok`.
|
||||
fn unquoted_ok_fast(c: u8) -> bool {
|
||||
const UNQUOTED_OK_MASK: u128 = {
|
||||
// Make a mask of all bytes in 0..<0x80 that pass.
|
||||
let mut c = 0u8;
|
||||
let mut mask = 0u128;
|
||||
while c < 0x80 {
|
||||
if unquoted_ok(c) {
|
||||
mask |= 1u128 << c;
|
||||
}
|
||||
c += 1;
|
||||
}
|
||||
mask
|
||||
};
|
||||
((UNQUOTED_OK_MASK >> c) & 1) != 0
|
||||
}
|
||||
|
||||
/// Is this ASCII byte okay to emit in single quotes?
|
||||
fn single_quoted_ok(c: u8) -> bool {
|
||||
match c {
|
||||
// No single quotes in single quotes.
|
||||
b'\'' => false,
|
||||
// To work around a Bash bug, ^ is only allowed right after an opening single quote; see
|
||||
// quoting_warning.
|
||||
b'^' => false,
|
||||
// Backslashes in single quotes are literal according to POSIX, but Fish treats them as an
|
||||
// escape character. Ban them. Fish doesn't aim to be POSIX-compatible, but we *can*
|
||||
// achieve Fish compatibility using double quotes, so we might as well.
|
||||
b'\\' => false,
|
||||
_ => true
|
||||
}
|
||||
}
|
||||
|
||||
/// Is this ASCII byte okay to emit in double quotes?
|
||||
fn double_quoted_ok(c: u8) -> bool {
|
||||
match c {
|
||||
// Work around Python `shlex` bug where parsing "\`" and "\$" doesn't strip the
|
||||
// backslash, even though POSIX requires it.
|
||||
b'`' | b'$' => false,
|
||||
// '!' and '^' are treated specially in interactive mode; see quoting_warning.
|
||||
b'!' | b'^' => false,
|
||||
_ => true
|
||||
}
|
||||
}
|
||||
|
||||
/// Given an input, return a quoting strategy that can cover some prefix of the string, along with
|
||||
/// the size of that prefix.
|
||||
///
|
||||
/// Precondition: input size is nonzero. (Empty strings are handled by the caller.)
|
||||
/// Postcondition: returned size is nonzero.
|
||||
#[cfg_attr(manual_codegen_check, inline(never))]
|
||||
fn quoting_strategy(in_bytes: &[u8]) -> (usize, QuotingStrategy) {
|
||||
const UNQUOTED_OK: u8 = 1;
|
||||
const SINGLE_QUOTED_OK: u8 = 2;
|
||||
const DOUBLE_QUOTED_OK: u8 = 4;
|
||||
|
||||
let mut prev_ok = SINGLE_QUOTED_OK | DOUBLE_QUOTED_OK | UNQUOTED_OK;
|
||||
let mut i = 0;
|
||||
|
||||
if in_bytes[0] == b'^' {
|
||||
// To work around a Bash bug, ^ is only allowed right after an opening single quote; see
|
||||
// quoting_warning.
|
||||
prev_ok = SINGLE_QUOTED_OK;
|
||||
i = 1;
|
||||
}
|
||||
|
||||
while i < in_bytes.len() {
|
||||
let c = in_bytes[i];
|
||||
let mut cur_ok = prev_ok;
|
||||
|
||||
if c >= 0x80 {
|
||||
// Normally, non-ASCII characters shouldn't require quoting, but see quoting_warning.md
|
||||
// about \xa0. For now, just treat all non-ASCII characters as requiring quotes. This
|
||||
// also ensures things are safe in the off-chance that you're in a legacy 8-bit locale that
|
||||
// has additional characters satisfying `isblank`.
|
||||
cur_ok &= !UNQUOTED_OK;
|
||||
} else {
|
||||
if !unquoted_ok_fast(c) {
|
||||
cur_ok &= !UNQUOTED_OK;
|
||||
}
|
||||
if !single_quoted_ok(c){
|
||||
cur_ok &= !SINGLE_QUOTED_OK;
|
||||
}
|
||||
if !double_quoted_ok(c) {
|
||||
cur_ok &= !DOUBLE_QUOTED_OK;
|
||||
}
|
||||
}
|
||||
|
||||
if cur_ok == 0 {
|
||||
// There are no quoting strategies that would work for both the previous characters and
|
||||
// this one. So we have to end the chunk before this character. The caller will call
|
||||
// `quoting_strategy` again to handle the rest of the string.
|
||||
break;
|
||||
}
|
||||
|
||||
prev_ok = cur_ok;
|
||||
i += 1;
|
||||
}
|
||||
|
||||
// Pick the best allowed strategy.
|
||||
let strategy = if prev_ok & UNQUOTED_OK != 0 {
|
||||
QuotingStrategy::Unquoted
|
||||
} else if prev_ok & SINGLE_QUOTED_OK != 0 {
|
||||
QuotingStrategy::SingleQuoted
|
||||
} else if prev_ok & DOUBLE_QUOTED_OK != 0 {
|
||||
QuotingStrategy::DoubleQuoted
|
||||
} else {
|
||||
unreachable!()
|
||||
};
|
||||
debug_assert!(i > 0);
|
||||
(i, strategy)
|
||||
}
|
||||
|
||||
fn append_quoted_chunk(out: &mut Vec<u8>, cur_chunk: &[u8], strategy: QuotingStrategy) {
|
||||
match strategy {
|
||||
QuotingStrategy::Unquoted => {
|
||||
out.extend_from_slice(cur_chunk);
|
||||
},
|
||||
QuotingStrategy::SingleQuoted => {
|
||||
out.reserve(cur_chunk.len() + 2);
|
||||
out.push(b'\'');
|
||||
out.extend_from_slice(cur_chunk);
|
||||
out.push(b'\'');
|
||||
},
|
||||
QuotingStrategy::DoubleQuoted => {
|
||||
out.reserve(cur_chunk.len() + 2);
|
||||
out.push(b'"');
|
||||
for &c in cur_chunk.into_iter() {
|
||||
if let b'$' | b'`' | b'"' | b'\\' = c {
|
||||
// Add a preceding backslash.
|
||||
// Note: We shouldn't actually get here for $ and ` because they don't pass
|
||||
// `double_quoted_ok`.
|
||||
out.push(b'\\');
|
||||
}
|
||||
// Add the character itself.
|
||||
out.push(c);
|
||||
}
|
||||
out.push(b'"');
|
||||
},
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience function that consumes an iterable of words and turns it into a single byte string,
|
||||
/// quoting words when necessary. Consecutive words will be separated by a single space.
|
||||
///
|
||||
/// Uses default settings except that nul bytes are passed through, which [may be
|
||||
/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
|
||||
///
|
||||
/// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter).
|
||||
///
|
||||
/// (That configuration never returns `Err`, so this function does not panic.)
|
||||
///
|
||||
/// The string equivalent is [shlex::join].
|
||||
#[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")]
|
||||
pub fn join<'a, I: IntoIterator<Item = &'a [u8]>>(words: I) -> Vec<u8> {
|
||||
Quoter::new().allow_nul(true).join(words).unwrap()
|
||||
}
|
||||
|
||||
/// Convenience function that consumes an iterable of words and turns it into a single byte string,
|
||||
/// quoting words when necessary. Consecutive words will be separated by a single space.
|
||||
///
|
||||
/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
|
||||
///
|
||||
/// Equivalent to [`Quoter::new().join(words)`](Quoter).
|
||||
///
|
||||
/// The string equivalent is [shlex::try_join].
|
||||
pub fn try_join<'a, I: IntoIterator<Item = &'a [u8]>>(words: I) -> Result<Vec<u8>, QuoteError> {
|
||||
Quoter::new().join(words)
|
||||
}
|
||||
|
||||
/// Given a single word, return a string suitable to encode it as a shell argument.
|
||||
///
|
||||
/// Uses default settings except that nul bytes are passed through, which [may be
|
||||
/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
|
||||
///
|
||||
/// Equivalent to [`Quoter::new().allow_nul(true).quote(in_bytes).unwrap()`](Quoter).
|
||||
///
|
||||
/// (That configuration never returns `Err`, so this function does not panic.)
|
||||
///
|
||||
/// The string equivalent is [shlex::quote].
|
||||
#[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")]
|
||||
pub fn quote(in_bytes: &[u8]) -> Cow<[u8]> {
|
||||
Quoter::new().allow_nul(true).quote(in_bytes).unwrap()
|
||||
}
|
||||
|
||||
/// Given a single word, return a string suitable to encode it as a shell argument.
|
||||
///
|
||||
/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
|
||||
///
|
||||
/// Equivalent to [`Quoter::new().quote(in_bytes)`](Quoter).
|
||||
///
|
||||
/// (That configuration never returns `Err`, so this function does not panic.)
|
||||
///
|
||||
/// The string equivalent is [shlex::try_quote].
|
||||
pub fn try_quote(in_bytes: &[u8]) -> Result<Cow<[u8]>, QuoteError> {
|
||||
Quoter::new().quote(in_bytes)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
const INVALID_UTF8: &[u8] = b"\xa1";
|
||||
#[cfg(test)]
|
||||
const INVALID_UTF8_SINGLEQUOTED: &[u8] = b"'\xa1'";
|
||||
|
||||
#[test]
|
||||
#[allow(invalid_from_utf8)]
|
||||
fn test_invalid_utf8() {
|
||||
// Check that our test string is actually invalid UTF-8.
|
||||
assert!(core::str::from_utf8(INVALID_UTF8).is_err());
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
static SPLIT_TEST_ITEMS: &'static [(&'static [u8], Option<&'static [&'static [u8]]>)] = &[
|
||||
(b"foo$baz", Some(&[b"foo$baz"])),
|
||||
(b"foo baz", Some(&[b"foo", b"baz"])),
|
||||
(b"foo\"bar\"baz", Some(&[b"foobarbaz"])),
|
||||
(b"foo \"bar\"baz", Some(&[b"foo", b"barbaz"])),
|
||||
(b" foo \nbar", Some(&[b"foo", b"bar"])),
|
||||
(b"foo\\\nbar", Some(&[b"foobar"])),
|
||||
(b"\"foo\\\nbar\"", Some(&[b"foobar"])),
|
||||
(b"'baz\\$b'", Some(&[b"baz\\$b"])),
|
||||
(b"'baz\\\''", None),
|
||||
(b"\\", None),
|
||||
(b"\"\\", None),
|
||||
(b"'\\", None),
|
||||
(b"\"", None),
|
||||
(b"'", None),
|
||||
(b"foo #bar\nbaz", Some(&[b"foo", b"baz"])),
|
||||
(b"foo #bar", Some(&[b"foo"])),
|
||||
(b"foo#bar", Some(&[b"foo#bar"])),
|
||||
(b"foo\"#bar", None),
|
||||
(b"'\\n'", Some(&[b"\\n"])),
|
||||
(b"'\\\\n'", Some(&[b"\\\\n"])),
|
||||
(INVALID_UTF8, Some(&[INVALID_UTF8])),
|
||||
];
|
||||
|
||||
#[test]
|
||||
fn test_split() {
|
||||
for &(input, output) in SPLIT_TEST_ITEMS {
|
||||
assert_eq!(split(input), output.map(|o| o.iter().map(|&x| x.to_owned()).collect()));
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_lineno() {
|
||||
let mut sh = Shlex::new(b"\nfoo\nbar");
|
||||
while let Some(word) = sh.next() {
|
||||
if word == b"bar" {
|
||||
assert_eq!(sh.line_no, 3);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(deprecated)]
|
||||
fn test_quote() {
|
||||
// Validate behavior with invalid UTF-8:
|
||||
assert_eq!(quote(INVALID_UTF8), INVALID_UTF8_SINGLEQUOTED);
|
||||
// Replicate a few tests from lib.rs. No need to replicate all of them.
|
||||
assert_eq!(quote(b""), &b"''"[..]);
|
||||
assert_eq!(quote(b"foobar"), &b"foobar"[..]);
|
||||
assert_eq!(quote(b"foo bar"), &b"'foo bar'"[..]);
|
||||
assert_eq!(quote(b"'\""), &b"\"'\\\"\""[..]);
|
||||
assert_eq!(quote(b""), &b"''"[..]);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(deprecated)]
|
||||
fn test_join() {
|
||||
// Validate behavior with invalid UTF-8:
|
||||
assert_eq!(join(vec![INVALID_UTF8]), INVALID_UTF8_SINGLEQUOTED);
|
||||
// Replicate a few tests from lib.rs. No need to replicate all of them.
|
||||
assert_eq!(join(vec![]), &b""[..]);
|
||||
assert_eq!(join(vec![&b""[..]]), b"''");
|
||||
}
|
||||
401
third_party/rust/shlex/src/lib.rs
vendored
401
third_party/rust/shlex/src/lib.rs
vendored
@@ -3,20 +3,37 @@
|
||||
// the MIT license <https://opensource.org/licenses/MIT>, at your option. This file may not be
|
||||
// copied, modified, or distributed except according to those terms.
|
||||
|
||||
//! Same idea as (but implementation not directly based on) the Python shlex module. However, this
|
||||
//! implementation does not support any of the Python module's customization because it makes
|
||||
//! parsing slower and is fairly useless. You only get the default settings of shlex.split, which
|
||||
//! mimic the POSIX shell:
|
||||
//! <https://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html>
|
||||
//! Parse strings like, and escape strings for, POSIX shells.
|
||||
//!
|
||||
//! This implementation also deviates from the Python version in not treating `\r` specially, which
|
||||
//! I believe is more compliant.
|
||||
//!
|
||||
//! The algorithms in this crate are oblivious to UTF-8 high bytes, so they iterate over the bytes
|
||||
//! directly as a micro-optimization.
|
||||
//! Same idea as (but implementation not directly based on) the Python shlex module.
|
||||
//!
|
||||
//! Disabling the `std` feature (which is enabled by default) will allow the crate to work in
|
||||
//! `no_std` environments, where the `alloc` crate, and a global allocator, are available.
|
||||
//!
|
||||
//! ## <span style="color:red">Warning</span>
|
||||
//!
|
||||
//! The [`try_quote`]/[`try_join`] family of APIs does not quote control characters (because they
|
||||
//! cannot be quoted portably).
|
||||
//!
|
||||
//! This is fully safe in noninteractive contexts, like shell scripts and `sh -c` arguments (or
|
||||
//! even scripts `source`d from interactive shells).
|
||||
//!
|
||||
//! But if you are quoting for human consumption, you should keep in mind that ugly inputs produce
|
||||
//! ugly outputs (which may not be copy-pastable).
|
||||
//!
|
||||
//! And if by chance you are piping the output of [`try_quote`]/[`try_join`] directly to the stdin
|
||||
//! of an interactive shell, you should stop, because control characters can lead to arbitrary
|
||||
//! command injection.
|
||||
//!
|
||||
//! For more information, and for information about more minor issues, please see [quoting_warning].
|
||||
//!
|
||||
//! ## Compatibility
|
||||
//!
|
||||
//! This crate's quoting functionality tries to be compatible with **any POSIX-compatible shell**;
|
||||
//! it's tested against `bash`, `zsh`, `dash`, Busybox `ash`, and `mksh`, plus `fish` (which is not
|
||||
//! POSIX-compatible but close enough).
|
||||
//!
|
||||
//! It also aims to be compatible with Python `shlex` and C `wordexp`.
|
||||
|
||||
#![cfg_attr(not(feature = "std"), no_std)]
|
||||
|
||||
@@ -29,124 +46,45 @@ use alloc::vec;
|
||||
#[cfg(test)]
|
||||
use alloc::borrow::ToOwned;
|
||||
|
||||
pub mod bytes;
|
||||
#[cfg(all(doc, not(doctest)))]
|
||||
#[path = "quoting_warning.md"]
|
||||
pub mod quoting_warning;
|
||||
|
||||
/// An iterator that takes an input string and splits it into the words using the same syntax as
|
||||
/// the POSIX shell.
|
||||
pub struct Shlex<'a> {
|
||||
in_iter: core::str::Bytes<'a>,
|
||||
/// The number of newlines read so far, plus one.
|
||||
pub line_no: usize,
|
||||
/// An input string is erroneous if it ends while inside a quotation or right after an
|
||||
/// unescaped backslash. Since Iterator does not have a mechanism to return an error, if that
|
||||
/// happens, Shlex just throws out the last token, ends the iteration, and sets 'had_error' to
|
||||
/// true; best to check it after you're done iterating.
|
||||
pub had_error: bool,
|
||||
}
|
||||
///
|
||||
/// See [`bytes::Shlex`].
|
||||
pub struct Shlex<'a>(bytes::Shlex<'a>);
|
||||
|
||||
impl<'a> Shlex<'a> {
|
||||
pub fn new(in_str: &'a str) -> Self {
|
||||
Shlex {
|
||||
in_iter: in_str.bytes(),
|
||||
line_no: 1,
|
||||
had_error: false,
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_word(&mut self, mut ch: u8) -> Option<String> {
|
||||
let mut result: Vec<u8> = Vec::new();
|
||||
loop {
|
||||
match ch as char {
|
||||
'"' => if let Err(()) = self.parse_double(&mut result) {
|
||||
self.had_error = true;
|
||||
return None;
|
||||
},
|
||||
'\'' => if let Err(()) = self.parse_single(&mut result) {
|
||||
self.had_error = true;
|
||||
return None;
|
||||
},
|
||||
'\\' => if let Some(ch2) = self.next_char() {
|
||||
if ch2 != '\n' as u8 { result.push(ch2); }
|
||||
} else {
|
||||
self.had_error = true;
|
||||
return None;
|
||||
},
|
||||
' ' | '\t' | '\n' => { break; },
|
||||
_ => { result.push(ch as u8); },
|
||||
}
|
||||
if let Some(ch2) = self.next_char() { ch = ch2; } else { break; }
|
||||
}
|
||||
unsafe { Some(String::from_utf8_unchecked(result)) }
|
||||
}
|
||||
|
||||
fn parse_double(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
|
||||
loop {
|
||||
if let Some(ch2) = self.next_char() {
|
||||
match ch2 as char {
|
||||
'\\' => {
|
||||
if let Some(ch3) = self.next_char() {
|
||||
match ch3 as char {
|
||||
// \$ => $
|
||||
'$' | '`' | '"' | '\\' => { result.push(ch3); },
|
||||
// \<newline> => nothing
|
||||
'\n' => {},
|
||||
// \x => =x
|
||||
_ => { result.push('\\' as u8); result.push(ch3); }
|
||||
}
|
||||
} else {
|
||||
return Err(());
|
||||
}
|
||||
},
|
||||
'"' => { return Ok(()); },
|
||||
_ => { result.push(ch2); },
|
||||
}
|
||||
} else {
|
||||
return Err(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn parse_single(&mut self, result: &mut Vec<u8>) -> Result<(), ()> {
|
||||
loop {
|
||||
if let Some(ch2) = self.next_char() {
|
||||
match ch2 as char {
|
||||
'\'' => { return Ok(()); },
|
||||
_ => { result.push(ch2); },
|
||||
}
|
||||
} else {
|
||||
return Err(());
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn next_char(&mut self) -> Option<u8> {
|
||||
let res = self.in_iter.next();
|
||||
if res == Some('\n' as u8) { self.line_no += 1; }
|
||||
res
|
||||
Self(bytes::Shlex::new(in_str.as_bytes()))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Shlex<'a> {
|
||||
type Item = String;
|
||||
fn next(&mut self) -> Option<String> {
|
||||
if let Some(mut ch) = self.next_char() {
|
||||
// skip initial whitespace
|
||||
loop {
|
||||
match ch as char {
|
||||
' ' | '\t' | '\n' => {},
|
||||
'#' => {
|
||||
while let Some(ch2) = self.next_char() {
|
||||
if ch2 as char == '\n' { break; }
|
||||
}
|
||||
},
|
||||
_ => { break; }
|
||||
}
|
||||
if let Some(ch2) = self.next_char() { ch = ch2; } else { return None; }
|
||||
}
|
||||
self.parse_word(ch)
|
||||
} else { // no initial character
|
||||
None
|
||||
}
|
||||
self.0.next().map(|byte_word| {
|
||||
// Safety: given valid UTF-8, bytes::Shlex will always return valid UTF-8.
|
||||
unsafe { String::from_utf8_unchecked(byte_word) }
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> core::ops::Deref for Shlex<'a> {
|
||||
type Target = bytes::Shlex<'a>;
|
||||
|
||||
fn deref(&self) -> &Self::Target {
|
||||
&self.0
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> core::ops::DerefMut for Shlex<'a> {
|
||||
fn deref_mut(&mut self) -> &mut Self::Target {
|
||||
&mut self.0
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience function that consumes the whole string at once. Returns None if the input was
|
||||
@@ -157,38 +95,151 @@ pub fn split(in_str: &str) -> Option<Vec<String>> {
|
||||
if shl.had_error { None } else { Some(res) }
|
||||
}
|
||||
|
||||
/// Given a single word, return a string suitable to encode it as a shell argument.
|
||||
pub fn quote(in_str: &str) -> Cow<str> {
|
||||
if in_str.len() == 0 {
|
||||
"\"\"".into()
|
||||
} else if in_str.bytes().any(|c| match c as char {
|
||||
'|' | '&' | ';' | '<' | '>' | '(' | ')' | '$' | '`' | '\\' | '"' | '\'' | ' ' | '\t' |
|
||||
'\r' | '\n' | '*' | '?' | '[' | '#' | '~' | '=' | '%' => true,
|
||||
_ => false
|
||||
}) {
|
||||
let mut out: Vec<u8> = Vec::new();
|
||||
out.push('"' as u8);
|
||||
for c in in_str.bytes() {
|
||||
match c as char {
|
||||
'$' | '`' | '"' | '\\' => out.push('\\' as u8),
|
||||
_ => ()
|
||||
}
|
||||
out.push(c);
|
||||
/// Errors from [`Quoter::quote`], [`Quoter::join`], etc. (and their [`bytes`] counterparts).
|
||||
///
|
||||
/// By default, the only error that can be returned is [`QuoteError::Nul`]. If you call
|
||||
/// `allow_nul(true)`, then no errors can be returned at all. Any error variants added in the
|
||||
/// future will not be enabled by default; they will be enabled through corresponding non-default
|
||||
/// [`Quoter`] options.
|
||||
///
|
||||
/// ...In theory. In the unlikely event that additional classes of inputs are discovered that,
|
||||
/// like nul bytes, are fundamentally unsafe to quote even for non-interactive shells, the risk
|
||||
/// will be mitigated by adding corresponding [`QuoteError`] variants that *are* enabled by
|
||||
/// default.
|
||||
#[non_exhaustive]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
|
||||
pub enum QuoteError {
|
||||
/// The input contained a nul byte. In most cases, shells fundamentally [cannot handle strings
|
||||
/// containing nul bytes](quoting_warning#nul-bytes), no matter how they are quoted. But if
|
||||
/// you're sure you can handle nul bytes, you can call `allow_nul(true)` on the `Quoter` to let
|
||||
/// them pass through.
|
||||
Nul,
|
||||
}
|
||||
|
||||
impl core::fmt::Display for QuoteError {
|
||||
fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
|
||||
match self {
|
||||
QuoteError::Nul => f.write_str("cannot shell-quote string containing nul byte"),
|
||||
}
|
||||
out.push('"' as u8);
|
||||
unsafe { String::from_utf8_unchecked(out) }.into()
|
||||
} else {
|
||||
in_str.into()
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(feature = "std")]
|
||||
impl std::error::Error for QuoteError {}
|
||||
|
||||
/// A more configurable interface to quote strings. If you only want the default settings you can
|
||||
/// use the convenience functions [`try_quote`] and [`try_join`].
|
||||
///
|
||||
/// The bytes equivalent is [`bytes::Quoter`].
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct Quoter {
|
||||
inner: bytes::Quoter,
|
||||
}
|
||||
|
||||
impl Quoter {
|
||||
/// Create a new [`Quoter`] with default settings.
|
||||
#[inline]
|
||||
pub fn new() -> Self {
|
||||
Self::default()
|
||||
}
|
||||
|
||||
/// Set whether to allow [nul bytes](quoting_warning#nul-bytes). By default they are not
|
||||
/// allowed and will result in an error of [`QuoteError::Nul`].
|
||||
#[inline]
|
||||
pub fn allow_nul(mut self, allow: bool) -> Self {
|
||||
self.inner = self.inner.allow_nul(allow);
|
||||
self
|
||||
}
|
||||
|
||||
/// Convenience function that consumes an iterable of words and turns it into a single string,
|
||||
/// quoting words when necessary. Consecutive words will be separated by a single space.
|
||||
pub fn join<'a, I: IntoIterator<Item = &'a str>>(&self, words: I) -> Result<String, QuoteError> {
|
||||
// Safety: given valid UTF-8, bytes::join() will always return valid UTF-8.
|
||||
self.inner.join(words.into_iter().map(|s| s.as_bytes()))
|
||||
.map(|bytes| unsafe { String::from_utf8_unchecked(bytes) })
|
||||
}
|
||||
|
||||
/// Given a single word, return a string suitable to encode it as a shell argument.
|
||||
pub fn quote<'a>(&self, in_str: &'a str) -> Result<Cow<'a, str>, QuoteError> {
|
||||
Ok(match self.inner.quote(in_str.as_bytes())? {
|
||||
Cow::Borrowed(out) => {
|
||||
// Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
|
||||
unsafe { core::str::from_utf8_unchecked(out) }.into()
|
||||
}
|
||||
Cow::Owned(out) => {
|
||||
// Safety: given valid UTF-8, bytes::quote() will always return valid UTF-8.
|
||||
unsafe { String::from_utf8_unchecked(out) }.into()
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl From<bytes::Quoter> for Quoter {
|
||||
fn from(inner: bytes::Quoter) -> Quoter {
|
||||
Quoter { inner }
|
||||
}
|
||||
}
|
||||
|
||||
impl From<Quoter> for bytes::Quoter {
|
||||
fn from(quoter: Quoter) -> bytes::Quoter {
|
||||
quoter.inner
|
||||
}
|
||||
}
|
||||
|
||||
/// Convenience function that consumes an iterable of words and turns it into a single string,
|
||||
/// quoting words when necessary. Consecutive words will be separated by a single space.
|
||||
///
|
||||
/// Uses default settings except that nul bytes are passed through, which [may be
|
||||
/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
|
||||
///
|
||||
/// Equivalent to [`Quoter::new().allow_nul(true).join(words).unwrap()`](Quoter).
|
||||
///
|
||||
/// (That configuration never returns `Err`, so this function does not panic.)
|
||||
///
|
||||
/// The bytes equivalent is [bytes::join].
|
||||
#[deprecated(since = "1.3.0", note = "replace with `try_join(words)?` to avoid nul byte danger")]
|
||||
pub fn join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> String {
|
||||
words.into_iter()
|
||||
.map(quote)
|
||||
.collect::<Vec<_>>()
|
||||
.join(" ")
|
||||
Quoter::new().allow_nul(true).join(words).unwrap()
|
||||
}
|
||||
|
||||
/// Convenience function that consumes an iterable of words and turns it into a single string,
|
||||
/// quoting words when necessary. Consecutive words will be separated by a single space.
|
||||
///
|
||||
/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
|
||||
///
|
||||
/// Equivalent to [`Quoter::new().join(words)`](Quoter).
|
||||
///
|
||||
/// The bytes equivalent is [bytes::try_join].
|
||||
pub fn try_join<'a, I: IntoIterator<Item = &'a str>>(words: I) -> Result<String, QuoteError> {
|
||||
Quoter::new().join(words)
|
||||
}
|
||||
|
||||
/// Given a single word, return a string suitable to encode it as a shell argument.
|
||||
///
|
||||
/// Uses default settings except that nul bytes are passed through, which [may be
|
||||
/// dangerous](quoting_warning#nul-bytes), leading to this function being deprecated.
|
||||
///
|
||||
/// Equivalent to [`Quoter::new().allow_nul(true).quote(in_str).unwrap()`](Quoter).
|
||||
///
|
||||
/// (That configuration never returns `Err`, so this function does not panic.)
|
||||
///
|
||||
/// The bytes equivalent is [bytes::quote].
|
||||
#[deprecated(since = "1.3.0", note = "replace with `try_quote(str)?` to avoid nul byte danger")]
|
||||
pub fn quote(in_str: &str) -> Cow<str> {
|
||||
Quoter::new().allow_nul(true).quote(in_str).unwrap()
|
||||
}
|
||||
|
||||
/// Given a single word, return a string suitable to encode it as a shell argument.
|
||||
///
|
||||
/// Uses default settings. The only error that can be returned is [`QuoteError::Nul`].
|
||||
///
|
||||
/// Equivalent to [`Quoter::new().quote(in_str)`](Quoter).
|
||||
///
|
||||
/// (That configuration never returns `Err`, so this function does not panic.)
|
||||
///
|
||||
/// The bytes equivalent is [bytes::try_quote].
|
||||
pub fn try_quote(in_str: &str) -> Result<Cow<str>, QuoteError> {
|
||||
Quoter::new().quote(in_str)
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
@@ -233,17 +284,75 @@ fn test_lineno() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "std"), allow(unreachable_code, unused_mut))]
|
||||
fn test_quote() {
|
||||
assert_eq!(quote("foobar"), "foobar");
|
||||
assert_eq!(quote("foo bar"), "\"foo bar\"");
|
||||
assert_eq!(quote("\""), "\"\\\"\"");
|
||||
assert_eq!(quote(""), "\"\"");
|
||||
// This is a list of (unquoted, quoted) pairs.
|
||||
// But it's using a single long (raw) string literal with an ad-hoc format, just because it's
|
||||
// hard to read if we have to put the test strings through Rust escaping on top of the escaping
|
||||
// being tested. (Even raw string literals are noisy for short strings).
|
||||
// Ad-hoc: "NL" is replaced with a literal newline; no other escape sequences.
|
||||
let tests = r#"
|
||||
<> => <''>
|
||||
<foobar> => <foobar>
|
||||
<foo bar> => <'foo bar'>
|
||||
<"foo bar'"> => <"\"foo bar'\"">
|
||||
<'foo bar'> => <"'foo bar'">
|
||||
<"> => <'"'>
|
||||
<"'> => <"\"'">
|
||||
<hello!world> => <'hello!world'>
|
||||
<'hello!world> => <"'hello"'!world'>
|
||||
<'hello!> => <"'hello"'!'>
|
||||
<hello ^ world> => <'hello ''^ world'>
|
||||
<hello^> => <hello'^'>
|
||||
<!world'> => <'!world'"'">
|
||||
<{a, b}> => <'{a, b}'>
|
||||
<NL> => <'NL'>
|
||||
<^> => <'^'>
|
||||
<foo^bar> => <foo'^bar'>
|
||||
<NLx^> => <'NLx''^'>
|
||||
<NL^x> => <'NL''^x'>
|
||||
<NL ^x> => <'NL ''^x'>
|
||||
<{a,b}> => <'{a,b}'>
|
||||
<a,b> => <'a,b'>
|
||||
<a..b => <a..b>
|
||||
<'$> => <"'"'$'>
|
||||
<"^> => <'"''^'>
|
||||
"#;
|
||||
let mut ok = true;
|
||||
for test in tests.trim().split('\n') {
|
||||
let parts: Vec<String> = test
|
||||
.replace("NL", "\n")
|
||||
.split("=>")
|
||||
.map(|part| part.trim().trim_start_matches('<').trim_end_matches('>').to_owned())
|
||||
.collect();
|
||||
assert!(parts.len() == 2);
|
||||
let unquoted = &*parts[0];
|
||||
let quoted_expected = &*parts[1];
|
||||
let quoted_actual = try_quote(&parts[0]).unwrap();
|
||||
if quoted_expected != quoted_actual {
|
||||
#[cfg(not(feature = "std"))]
|
||||
panic!("FAIL: for input <{}>, expected <{}>, got <{}>",
|
||||
unquoted, quoted_expected, quoted_actual);
|
||||
#[cfg(feature = "std")]
|
||||
println!("FAIL: for input <{}>, expected <{}>, got <{}>",
|
||||
unquoted, quoted_expected, quoted_actual);
|
||||
ok = false;
|
||||
}
|
||||
}
|
||||
assert!(ok);
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[allow(deprecated)]
|
||||
fn test_join() {
|
||||
assert_eq!(join(vec![]), "");
|
||||
assert_eq!(join(vec![""]), "\"\"");
|
||||
assert_eq!(join(vec![""]), "''");
|
||||
assert_eq!(join(vec!["a", "b"]), "a b");
|
||||
assert_eq!(join(vec!["foo bar", "baz"]), "\"foo bar\" baz");
|
||||
assert_eq!(join(vec!["foo bar", "baz"]), "'foo bar' baz");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_fallible() {
|
||||
assert_eq!(try_join(vec!["\0"]), Err(QuoteError::Nul));
|
||||
assert_eq!(try_quote("\0"), Err(QuoteError::Nul));
|
||||
}
|
||||
|
||||
365
third_party/rust/shlex/src/quoting_warning.md
vendored
Normal file
365
third_party/rust/shlex/src/quoting_warning.md
vendored
Normal file
@@ -0,0 +1,365 @@
|
||||
// vim: textwidth=99
|
||||
/*
|
||||
Meta note: This file is loaded as a .rs file by rustdoc only.
|
||||
*/
|
||||
/*!
|
||||
|
||||
A more detailed version of the [warning at the top level](super#warning) about the `quote`/`join`
|
||||
family of APIs.
|
||||
|
||||
In general, passing the output of these APIs to a shell should recover the original string(s).
|
||||
This page lists cases where it fails to do so.
|
||||
|
||||
In noninteractive contexts, there are only minor issues. 'Noninteractive' includes shell scripts
|
||||
and `sh -c` arguments, or even scripts `source`d from interactive shells. The issues are:
|
||||
|
||||
- [Nul bytes](#nul-bytes)
|
||||
|
||||
- [Overlong commands](#overlong-commands)
|
||||
|
||||
If you are writing directly to the stdin of an interactive (`-i`) shell (i.e., if you are
|
||||
pretending to be a terminal), or if you are writing to a cooked-mode pty (even if the other end is
|
||||
noninteractive), then there is a **severe** security issue:
|
||||
|
||||
- [Control characters](#control-characters-interactive-contexts-only)
|
||||
|
||||
Finally, there are some [solved issues](#solved-issues).
|
||||
|
||||
# List of issues
|
||||
|
||||
## Nul bytes
|
||||
|
||||
For non-interactive shells, the most problematic input is nul bytes (bytes with value 0). The
|
||||
non-deprecated functions all default to returning [`QuoteError::Nul`] when encountering them, but
|
||||
the deprecated [`quote`] and [`join`] functions leave them as-is.
|
||||
|
||||
In Unix, nul bytes can't appear in command arguments, environment variables, or filenames. It's
|
||||
not a question of proper quoting; they just can't be used at all. This is a consequence of Unix's
|
||||
system calls all being designed around nul-terminated C strings.
|
||||
|
||||
Shells inherit that limitation. Most of them do not accept nul bytes in strings even internally.
|
||||
Even when they do, it's pretty much useless or even dangerous, since you can't pass them to
|
||||
external commands.
|
||||
|
||||
In some cases, you might fail to pass the nul byte to the shell in the first place. For example,
|
||||
the following code uses [`join`] to tunnel a command over an SSH connection:
|
||||
|
||||
```rust
|
||||
std::process::Command::new("ssh")
|
||||
.arg("myhost")
|
||||
.arg("--")
|
||||
.arg(join(my_cmd_args))
|
||||
```
|
||||
|
||||
If any argument in `my_cmd_args` contains a nul byte, then `join(my_cmd_args)` will contain a nul
|
||||
byte. But `join(my_cmd_args)` is itself being passed as an argument to a command (the ssh
|
||||
command), and command arguments can't contain nul bytes! So this will simply result in the
|
||||
`Command` failing to launch.
|
||||
|
||||
Still, there are other ways to smuggle nul bytes into a shell. How the shell reacts depends on the
|
||||
shell and the method of smuggling. For example, here is Bash 5.2.21 exhibiting three different
|
||||
behaviors:
|
||||
|
||||
- With ANSI-C quoting, the string is truncated at the first nul byte:
|
||||
```bash
|
||||
$ echo $'foo\0bar' | hexdump -C
|
||||
00000000 66 6f 6f 0a |foo.|
|
||||
```
|
||||
|
||||
- With command substitution, nul bytes are removed with a warning:
|
||||
```bash
|
||||
$ echo $(printf 'foo\0bar') | hexdump -C
|
||||
bash: warning: command substitution: ignored null byte in input
|
||||
00000000 66 6f 6f 62 61 72 0a |foobar.|
|
||||
```
|
||||
|
||||
- When a nul byte appears directly in a shell script, it's removed with no warning:
|
||||
```bash
|
||||
$ printf 'echo "foo\0bar"' | bash | hexdump -C
|
||||
00000000 66 6f 6f 62 61 72 0a |foobar.|
|
||||
```
|
||||
|
||||
Zsh, in contrast, actually allows nul bytes internally, in shell variables and even arguments to
|
||||
builtin commands. But if a variable is exported to the environment, or if an argument is used for
|
||||
an external command, then the child process will see it silently truncated at the first nul. This
|
||||
might actually be more dangerous, depending on the use case.
|
||||
|
||||
## Overlong commands
|
||||
|
||||
If you pass a long string into a shell, several things might happen:
|
||||
|
||||
- It might succeed, yet the shell might have trouble actually doing anything with it. For example:
|
||||
|
||||
```bash
|
||||
x=$(printf '%010000000d' 0); /bin/echo $x
|
||||
bash: /bin/echo: Argument list too long
|
||||
```
|
||||
|
||||
- If you're using certain shells (e.g. Busybox Ash) *and* using a pty for communication, then the
|
||||
shell will impose a line length limit, ignoring all input past the limit.
|
||||
|
||||
- If you're using a pty in cooked mode, then by default, if you write so many bytes as input that
|
||||
it fills the kernel's internal buffer, the kernel will simply drop those bytes, instead of
|
||||
blocking waiting for the shell to empty out the buffer. In other words, random bits of input can
|
||||
be lost, which is obviously insecure.
|
||||
|
||||
Future versions of this crate may add an option to [`Quoter`] to check the length for you.
|
||||
|
||||
## Control characters (*interactive contexts only*)
|
||||
|
||||
Control characters are the bytes from `\x00` to `\x1f`, plus `\x7f`. `\x00` (the nul byte) is
|
||||
discussed [above](#nul-bytes), but what about the rest? Well, many of them correspond to terminal
|
||||
keyboard shortcuts. For example, when you press Ctrl-A at a shell prompt, your terminal sends the
|
||||
byte `\x01`. The shell sees that byte and (if not configured differently) takes the standard
|
||||
action for Ctrl-A, which is to move the cursor to the beginning of the line.
|
||||
|
||||
This means that it's quite dangerous to pipe bytes to an interactive shell. For example, here is a
|
||||
program that tries to tell Bash to echo an arbitrary string, 'safely':
|
||||
```rust
|
||||
use std::process::{Command, Stdio};
|
||||
use std::io::Write;
|
||||
|
||||
let evil_string = "\x01do_something_evil; ";
|
||||
let quoted = shlex::try_quote(evil_string).unwrap();
|
||||
println!("quoted string is {:?}", quoted);
|
||||
|
||||
let mut bash = Command::new("bash")
|
||||
.arg("-i") // force interactive mode
|
||||
.stdin(Stdio::piped())
|
||||
.spawn()
|
||||
.unwrap();
|
||||
let stdin = bash.stdin.as_mut().unwrap();
|
||||
write!(stdin, "echo {}\n", quoted).unwrap();
|
||||
```
|
||||
|
||||
Here's the output of the program (with irrelevant bits removed):
|
||||
|
||||
```text
|
||||
quoted string is "'\u{1}do_something_evil; '"
|
||||
/tmp comex$ do_something_evil; 'echo '
|
||||
bash: do_something_evil: command not found
|
||||
bash: echo : command not found
|
||||
```
|
||||
|
||||
Even though we quoted it, Bash still ran an arbitrary command!
|
||||
|
||||
This is not because the quoting was insufficient, per se. In single quotes, all input is supposed
|
||||
to be treated as raw data until the closing single quote. And in fact, this would work fine
|
||||
without the `"-i"` argument.
|
||||
|
||||
But line input is a separate stage from shell syntax parsing. After all, if you type a single
|
||||
quote on the keyboard, you wouldn't expect it to disable all your keyboard shortcuts. So a control
|
||||
character always has its designated effect, no matter if it's quoted or backslash-escaped.
|
||||
|
||||
Also, some control characters are interpreted by the kernel tty layer instead, like CTRL-C to send
|
||||
SIGINT. These can be an issue even with noninteractive shells, but only if using a pty for
|
||||
communication, as opposed to a pipe.
|
||||
|
||||
To be safe, you just have to avoid sending them.
|
||||
|
||||
### Why not just use hex escapes?
|
||||
|
||||
In any normal programming languages, this would be no big deal.
|
||||
|
||||
Any normal language has a way to escape arbitrary characters in strings by writing out their
|
||||
numeric values. For example, Rust lets you write them in hexadecimal, like `"\x4f"` (or
|
||||
`"\u{1d546}"` for Unicode). In this way, arbitrary strings can be represented using only 'nice'
|
||||
simple characters. Any remotely suspicious character can be replaced with a numeric escape
|
||||
sequence, where the escape sequence itself consists only of alphanumeric characters and some
|
||||
punctuation. The result may not be the most readable[^choices], but it's quite safe from being
|
||||
misinterpreted or corrupted in transit.
|
||||
|
||||
Shell is not normal. It has no numeric escape sequences.
|
||||
|
||||
There are a few different ways to quote characters (unquoted, unquoted-with-backslash, single
|
||||
quotes, double quotes), but all of them involve writing the character itself. If the input
|
||||
contains a control character, the output must contain that same character.
|
||||
|
||||
### Mitigation: terminal filters
|
||||
|
||||
In practice, automating interactive shells like in the above example is pretty uncommon these days.
|
||||
In most cases, the only way for a programmatically generated string to make its way to the input of
|
||||
an interactive shell is if a human copies and pastes it into their terminal.
|
||||
|
||||
And many terminals detect when you paste a string containing control characters. iTerm2 strips
|
||||
them out; gnome-terminal replaces them with alternate characters[^gr]; Kitty outright prompts for
|
||||
confirmation. This mitigates the risk.
|
||||
|
||||
But it's not perfect. Some other terminals don't implement this check or implement it incorrectly.
|
||||
Also, these checks tend to not filter the tab character, which could trigger tab completion. In
|
||||
most cases that's a non-issue, because most shells support paste bracketing, which disables tab and
|
||||
some other control characters[^bracketing] within pasted text. But in some cases paste bracketing
|
||||
gets disabled.
|
||||
|
||||
### Future possibility: ANSI-C quoting
|
||||
|
||||
I said that shell syntax has no numeric escapes, but that only applies to *portable* shell syntax.
|
||||
Bash and Zsh support an obscure alternate quoting style with the syntax `$'foo'`. It's called
|
||||
["ANSI-C quoting"][ansic], and inside it you can use all the escape sequences supported by C,
|
||||
including hex escapes:
|
||||
|
||||
```bash
|
||||
$ echo $'\x41\n\x42'
|
||||
A
|
||||
B
|
||||
```
|
||||
|
||||
But other shells don't support it — including Dash, a popular choice for `/bin/sh`, and Busybox's
|
||||
Ash, frequently seen on stripped-down embedded systems. This crate's quoting functionality [tries
|
||||
to be compatible](crate#compatibility) with those shells, plus all other POSIX-compatible shells.
|
||||
That makes ANSI-C quoting a no-go.
|
||||
|
||||
Still, future versions of this crate may provide an option to enable ANSI-C quoting, at the cost of
|
||||
reduced portability.
|
||||
|
||||
### Future possibility: printf
|
||||
|
||||
Another option would be to invoke the `printf` command, which is required by POSIX to support octal
|
||||
escapes. For example, you could 'escape' the Rust string `"\x01"` into the shell syntax `"$(printf
|
||||
'\001')"`. The shell will execute the command `printf` with the first argument being literally a
|
||||
backslash followed by three digits; `printf` will output the actual byte with value 1; and the
|
||||
shell will substitute that back into the original command.
|
||||
|
||||
The problem is that 'escaping' a string into a command substitution just feels too surprising. If
|
||||
nothing else, it only works with an actual shell; [other languages' shell parsing
|
||||
routines](crate#compatibility) wouldn't understand it. Neither would this crate's own parser,
|
||||
though that could be fixed.
|
||||
|
||||
Future versions of this crate may provide an option to use `printf` for quoting.
|
||||
|
||||
### Special note: newlines
|
||||
|
||||
Did you know that `\r` and `\n` are control characters? They aren't as dangerous as other control
|
||||
characters (if quoted properly). But there's still an issue with them in interactive contexts.
|
||||
|
||||
Namely, in some cases, interactive shells and/or the tty layer will 'helpfully' translate between
|
||||
different line ending conventions. The possibilities include replacing `\r` with `\n`, replacing
|
||||
`\n` with `\r\n`, and others. This can't result in command injection, but it's still a lossy
|
||||
transformation which can result in a failure to round-trip (i.e. the shell sees a different string
|
||||
from what was originally passed to `quote`).
|
||||
|
||||
Numeric escapes would solve this as well.
|
||||
|
||||
# Solved issues
|
||||
|
||||
## Solved: Past vulnerability (GHSA-r7qv-8r2h-pg27 / RUSTSEC-2024-XXX)
|
||||
|
||||
Versions of this crate before 1.3.0 did not quote `{`, `}`, and `\xa0`.
|
||||
|
||||
See:
|
||||
- <https://github.com/advisories/GHSA-r7qv-8r2h-pg27>
|
||||
- (TODO: Add Rustsec link)
|
||||
|
||||
## Solved: `!` and `^`
|
||||
|
||||
There are two non-control characters which have a special meaning in interactive contexts only: `!` and
|
||||
`^`. Luckily, these can be escaped adequately.
|
||||
|
||||
The `!` character triggers [history expansion][he]; the `^` character can trigger a variant of
|
||||
history expansion known as [Quick Substitution][qs]. Both of these characters get expanded even
|
||||
inside of double-quoted strings\!
|
||||
|
||||
If we're in a double-quoted string, then we can't just escape these characters with a backslash.
|
||||
Only a specific set of characters can be backslash-escaped inside double quotes; the set of
|
||||
supported characters depends on the shell, but it often doesn't include `!` and `^`.[^escbs]
|
||||
Trying to backslash-escape an unsupported character produces a literal backslash:
|
||||
```bash
|
||||
$ echo "\!"
|
||||
\!
|
||||
```
|
||||
|
||||
However, these characters don't get expanded in single-quoted strings, so this crate just
|
||||
single-quotes them.
|
||||
|
||||
But there's a Bash bug where `^` actually does get partially expanded in single-quoted strings:
|
||||
```bash
|
||||
$ echo '
|
||||
> ^a^b
|
||||
> '
|
||||
|
||||
!!:s^a^b
|
||||
```
|
||||
|
||||
To work around that, this crate forces `^` to appear right after an opening single quote. For
|
||||
example, the string `"^` is quoted into `'"''^'` instead of `'"^'`. This restriction is overkill,
|
||||
since `^` is only meaningful right after a newline, but it's a sufficient restriction (after all, a
|
||||
`^` character can't be preceded by a newline if it's forced to be preceded by a single quote), and
|
||||
for now it simplifies things.
|
||||
|
||||
## Solved: `\xa0`
|
||||
|
||||
The byte `\xa0` may be treated as a shell word separator, specifically on Bash on macOS when using
|
||||
the default UTF-8 locale, only when the input is invalid UTF-8. This crate handles the issue by
|
||||
always using quotes for arguments containing this byte.
|
||||
|
||||
In fact, this crate always uses quotes for arguments containing any non-ASCII bytes. This may be
|
||||
changed in the future, since it's a bit unfriendly to non-English users. But for now it
|
||||
minimizes risk, especially considering the large number of different legacy single-byte locales
|
||||
someone might hypothetically be running their shell in.
|
||||
|
||||
### Demonstration
|
||||
|
||||
```bash
|
||||
$ echo -e 'ls a\xa0b' | bash
|
||||
ls: a: No such file or directory
|
||||
ls: b: No such file or directory
|
||||
```
|
||||
The normal behavior would be to output a single line, e.g.:
|
||||
```bash
|
||||
$ echo -e 'ls a\xa0b' | bash
|
||||
ls: cannot access 'a'$'\240''b': No such file or directory
|
||||
```
|
||||
(The specific quoting in the error doesn't matter.)
|
||||
|
||||
### Cause
|
||||
|
||||
Just for fun, here's why this behavior occurs:
|
||||
|
||||
Bash decides which bytes serve as word separators based on the libc function [`isblank`][isblank].
|
||||
On macOS on UTF-8 locales, this passes for `\xa0`, corresponding to U+00A0 NO-BREAK SPACE.
|
||||
|
||||
This is doubly unique compared to the other systems I tested (Linux/glibc, Linux/musl, and
|
||||
Windows/MSVC). First, the other systems don't allow bytes in the range [0x80, 0xFF] to pass
|
||||
<code>is<i>foo</i></code> functions in UTF-8 locales, even if the corresponding Unicode codepoint
|
||||
does pass, as determined by the wide-character equivalent function, <code>isw<i>foo</i></code>.
|
||||
Second, the other systems don't treat U+00A0 as blank (even using `iswblank`).
|
||||
|
||||
Meanwhile, Bash checks for multi-byte sequences and forbids them from being treated as special
|
||||
characters, so the proper UTF-8 encoding of U+00A0, `b"\xc2\xa0"`, is not treated as a word
|
||||
separator. Treatment as a word separator only happens for `b"\xa0"` alone, which is illegal UTF-8.
|
||||
|
||||
[ansic]: https://www.gnu.org/software/bash/manual/html_node/ANSI_002dC-Quoting.html
|
||||
[he]: https://www.gnu.org/software/bash/manual/html_node/History-Interaction.html
|
||||
[qs]: https://www.gnu.org/software/bash/manual/html_node/Event-Designators.html
|
||||
[isblank]: https://man7.org/linux/man-pages/man3/isblank.3p.html
|
||||
[nul]: #nul-bytes
|
||||
|
||||
[^choices]: This can lead to tough choices over which
|
||||
characters to escape and which to leave as-is, especially when Unicode gets involved and you
|
||||
have to balance the risk of confusion with the benefit of properly supporting non-English
|
||||
languages.
|
||||
<br>
|
||||
<br>
|
||||
We don't have the luxury of those choices.
|
||||
|
||||
[^gr]: For example, backspace (in Unicode lingo, U+0008 BACKSPACE) turns into U+2408 SYMBOL FOR BACKSPACE.
|
||||
|
||||
[^bracketing]: It typically disables almost all handling of control characters by the shell proper,
|
||||
but one necessary exception is the end-of-paste sequence itself (which starts with the control
|
||||
character `\x1b`). In addition, paste bracketing does not suppress handling of control
|
||||
characters by the kernel tty layer, such as `\x03` sending SIGINT (which typically clears the
|
||||
currently typed command, making it dangerous in a similar way to `\x01`).
|
||||
|
||||
[^escbs]: For example, Dash doesn't remove the backslash from `"\!"` because it simply doesn't know
|
||||
anything about `!` as a special character: it doesn't support history expansion. On the other
|
||||
end of the spectrum, Zsh supports history expansion and does remove the backslash — though only
|
||||
in interactive mode. Bash's behavior is weirder. It supports history expansion, and if you
|
||||
write `"\!"`, the backslash does prevent history expansion from occurring — but it doesn't get
|
||||
removed!
|
||||
|
||||
*/
|
||||
|
||||
// `use` declarations to make auto links work:
|
||||
use ::{quote, join, Shlex, Quoter, QuoteError};
|
||||
|
||||
// TODO: add more about copy-paste and human readability.
|
||||
Reference in New Issue
Block a user