Skip to content

Commit

Permalink
add option to encode space to '%20' as per url standard
Browse files Browse the repository at this point in the history
Per default the space character is exclusively encoded to '+'. This is
wrong, as the URL Standard [0] specifies that the default is '%20'.
PR servo#928 fixes this behavior, but is obviously a breaking change. To
introduce this feature early, add a new function that sets the correct
behavior. This way, we can use it without causing a breaking change.

[0]: https://url.spec.whatwg.org/#string-percent-encode-after-encoding

Fixes: servo#927
Fixes: servo#888

Signed-off-by: Gabriel Goller <[email protected]>
  • Loading branch information
kaffarell committed Feb 17, 2025
1 parent 4b9f1e6 commit a824f5c
Showing 1 changed file with 57 additions and 3 deletions.
60 changes: 57 additions & 3 deletions form_urlencoded/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -115,17 +115,34 @@ impl Iterator for ParseIntoOwned<'_> {
}

/// The [`application/x-www-form-urlencoded` byte serializer](
/// https://url.spec.whatwg.org/#concept-urlencoded-byte-serializer).
/// https://url.spec.whatwg.org/#string-percent-encode-after-encoding).
/// Converts spaces (b' ') to plus signs (b'+').
///
/// Return an iterator of `&str` slices.
pub fn byte_serialize(input: &[u8]) -> ByteSerialize<'_> {
ByteSerialize { bytes: input }
ByteSerialize {
bytes: input,
space_as_plus: true,
}
}

/// The [`application/x-www-form-urlencoded` byte serializer](
/// https://url.spec.whatwg.org/#string-percent-encode-after-encoding).
/// Converts spaces (b' ') to the percent-encoded equivalent ("%20").
///
/// Return an iterator of `&str` slices.
pub fn byte_serialize_percent_encoded(input: &[u8]) -> ByteSerialize<'_> {
ByteSerialize {
bytes: input,
space_as_plus: false,
}
}

/// Return value of `byte_serialize()`.
#[derive(Debug)]
pub struct ByteSerialize<'a> {
bytes: &'a [u8],
space_as_plus: bool,
}

fn byte_serialized_unchanged(byte: u8) -> bool {
Expand All @@ -139,7 +156,7 @@ impl<'a> Iterator for ByteSerialize<'a> {
if let Some((&first, tail)) = self.bytes.split_first() {
if !byte_serialized_unchanged(first) {
self.bytes = tail;
return Some(if first == b' ' {
return Some(if first == b' ' && self.space_as_plus {
"+"
} else {
percent_encode_byte(first)
Expand Down Expand Up @@ -428,3 +445,40 @@ pub(crate) fn decode_utf8_lossy(input: Cow<'_, [u8]>) -> Cow<'_, str> {
}

pub type EncodingOverride<'a> = Option<&'a dyn Fn(&str) -> Cow<'_, [u8]>>;

#[cfg(test)]
mod tests {
use alloc::string::String;

use crate::{byte_serialize, byte_serialize_percent_encoded};

#[test]
fn byte_serializer() {
let in_1 = "c ool/org";
let out_1 = "c+ool%2Forg";

let in_2 = "a🔒nother&bu=ck?et ";
let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet+";

assert_eq!(byte_serialize(in_1.as_bytes()).collect::<String>(), out_1);
assert_eq!(byte_serialize(in_2.as_bytes()).collect::<String>(), out_2);
}

#[test]
fn byte_serializer_percent_encoded() {
let in_1 = "c ool/org";
let out_1 = "c%20ool%2Forg";

let in_2 = "a🔒nother&bu=ck?et ";
let out_2 = "a%F0%9F%94%92nother%26bu%3Dck%3Fet%20";

assert_eq!(
byte_serialize_percent_encoded(in_1.as_bytes()).collect::<String>(),
out_1
);
assert_eq!(
byte_serialize_percent_encoded(in_2.as_bytes()).collect::<String>(),
out_2
);
}
}

0 comments on commit a824f5c

Please sign in to comment.