set pathname to /monkey/..//>
- set pathname to
- set pathname to
- set pathname to /p>
- set pathname to
diff --git a/url/tests/unit.rs b/url/tests/unit.rs
index 6a9430bd..57465395 100644
--- a/url/tests/unit.rs
+++ b/url/tests/unit.rs
@@ -1387,3 +1387,43 @@ fn serde_error_message() {
r#"relative URL without a base: "§invalid#+#*Ä" at line 1 column 25"#
);
}
+
+#[test]
+fn test_fuzzing_uri_failures() {
+ use url::quirks;
+ let mut url = Url::parse("data:/.dummy.path").unwrap();
+ assert!(!url.cannot_be_a_base());
+
+ url.set_path(".dummy.path");
+ assert_eq!(url.as_str(), "data:/.dummy.path");
+ assert_eq!(url.path(), "/.dummy.path");
+ url.check_invariants().unwrap();
+
+ url.path_segments_mut()
+ .expect("should have path segments")
+ .push(".another.dummy.path");
+ assert_eq!(url.as_str(), "data:/.dummy.path/.another.dummy.path");
+ assert_eq!(url.path(), "/.dummy.path/.another.dummy.path");
+ url.check_invariants().unwrap();
+
+ url = Url::parse("web+demo:/").unwrap();
+ assert!(!url.cannot_be_a_base());
+
+ url.set_path("//.dummy.path");
+ assert_eq!(url.path(), "//.dummy.path");
+
+ let segments: Vec<_> = url
+ .path_segments()
+ .expect("should have path segments")
+ .collect();
+ assert_eq!(segments, vec!["", ".dummy.path"]);
+ assert_eq!(url.as_str(), "web+demo:/.//.dummy.path");
+
+ quirks::set_hostname(&mut url, ".dummy.host").unwrap();
+ assert_eq!(url.as_str(), "web+demo://.dummy.host//.dummy.path");
+ url.check_invariants().unwrap();
+
+ quirks::set_hostname(&mut url, "").unwrap();
+ assert_eq!(url.as_str(), "web+demo:////.dummy.path");
+ url.check_invariants().unwrap();
+}
From 86bd58fe2b6968488a856c99cea924b345791046 Mon Sep 17 00:00:00 2001
From: Sean Kim <40130483+theskim@users.noreply.github.com>
Date: Tue, 17 Dec 2024 17:18:54 -0500
Subject: [PATCH 2/3] Fix mutating path of URL without authority (idempotency,
empty path segments)
---
url/src/lib.rs | 13 +++++++--
url/src/path_segments.rs | 42 ++++++++++++++++++++++++--
url/tests/expected_failures.txt | 2 --
url/tests/unit.rs | 52 +++++++++++++++++++++++++++++++++
4 files changed, 103 insertions(+), 6 deletions(-)
diff --git a/url/src/lib.rs b/url/src/lib.rs
index 8432e48a..35006aeb 100644
--- a/url/src/lib.rs
+++ b/url/src/lib.rs
@@ -2128,7 +2128,7 @@ impl Url {
} else {
self.host_end
};
- let suffix = self.slice(old_suffix_pos..).to_owned();
+ let mut suffix = self.slice(old_suffix_pos..).to_owned();
self.serialization.truncate(self.host_start as usize);
if !self.has_authority() {
debug_assert!(self.slice(self.scheme_end..self.host_start) == ":");
@@ -2148,7 +2148,16 @@ impl Url {
write!(&mut self.serialization, ":{}", port).unwrap();
}
}
- let new_suffix_pos = to_u32(self.serialization.len()).unwrap();
+ let mut new_suffix_pos = to_u32(self.serialization.len()).unwrap();
+
+ // Remove starting "/." for empty path segment followed by the host
+ if suffix.starts_with("/.//") {
+ let adjustment: usize = "/.".len();
+ suffix.drain(..adjustment);
+ // pathname should be "//p" not "p" given that the first segment was empty
+ new_suffix_pos -= adjustment as u32;
+ }
+
self.serialization.push_str(&suffix);
let adjust = |index: &mut u32| {
diff --git a/url/src/path_segments.rs b/url/src/path_segments.rs
index e6363c5c..5da8af0d 100644
--- a/url/src/path_segments.rs
+++ b/url/src/path_segments.rs
@@ -239,7 +239,7 @@ impl PathSegmentsMut<'_> {
I::Item: AsRef,
{
let scheme_type = SchemeType::from(self.url.scheme());
- let path_start = self.url.path_start as usize;
+ let mut path_start = self.url.path_start as usize;
self.url.mutate(|parser| {
parser.context = parser::Context::PathSegmentSetter;
for segment in segments {
@@ -253,7 +253,44 @@ impl PathSegmentsMut<'_> {
{
parser.serialization.push('/');
}
- let mut has_host = true; // FIXME account for this?
+
+ let mut path_empty = false;
+
+ // Check ':' and then see if the next character is '/'
+ let mut has_host = if let Some(index) = parser.serialization.find(":") {
+ if parser.serialization.len() > index + 1
+ && parser.serialization.as_bytes().get(index + 1) == Some(&b'/')
+ {
+ let rest = &parser.serialization[(index + ":/".len())..];
+ let host_part = rest.split('/').next().unwrap_or("");
+ path_empty = rest.is_empty();
+ !host_part.is_empty() && !host_part.contains('@')
+ } else {
+ false
+ }
+ } else {
+ false
+ };
+
+ // For cases where normalization is applied across both the serialization and the path.
+ // Append "/." immediately after the scheme (up to ":")
+ // This is done if three conditions are met.
+ // https://url.spec.whatwg.org/#url-serializing
+ // 1. The host is null
+ // 2. The url's path length is greater than 1
+ // 3. the first segment of the URL's path is an empty string
+ if !has_host && segment.len() > 1 && path_empty {
+ if let Some(index) = parser.serialization.find(":") {
+ if parser.serialization.len() == index + 2
+ && parser.serialization.as_bytes().get(index + 1) == Some(&b'/')
+ {
+ // Append an extra '/' to ensure that "/./path" becomes "/.//path"
+ parser.serialization.insert_str(index + ":".len(), "/./");
+ path_start += "/.".len();
+ }
+ }
+ }
+
parser.parse_path(
scheme_type,
&mut has_host,
@@ -262,6 +299,7 @@ impl PathSegmentsMut<'_> {
);
}
});
+ self.url.path_start = path_start as u32;
self
}
}
diff --git a/url/tests/expected_failures.txt b/url/tests/expected_failures.txt
index d1ed726c..9bf60b34 100644
--- a/url/tests/expected_failures.txt
+++ b/url/tests/expected_failures.txt
@@ -36,8 +36,6 @@
set hostname to
set hostname to
- set hostname to
- set hostname to <>
set pathname to <>
set href to
set pathname to <\\\\>
diff --git a/url/tests/unit.rs b/url/tests/unit.rs
index 57465395..8312efe6 100644
--- a/url/tests/unit.rs
+++ b/url/tests/unit.rs
@@ -1427,3 +1427,55 @@ fn test_fuzzing_uri_failures() {
assert_eq!(url.as_str(), "web+demo:////.dummy.path");
url.check_invariants().unwrap();
}
+
+#[test]
+fn test_can_be_a_base_with_set_path() {
+ use url::quirks;
+ let mut url = Url::parse("web+demo:/").unwrap();
+ assert!(!url.cannot_be_a_base());
+
+ url.set_path("//not-a-host");
+ assert_eq!(url.path(), "//not-a-host");
+
+ let segments: Vec<_> = url
+ .path_segments()
+ .expect("should have path segments")
+ .collect();
+
+ assert_eq!(segments, vec!["", "not-a-host"]);
+
+ url.set_query(Some("query"));
+ url.set_fragment(Some("frag"));
+
+ assert_eq!(url.as_str(), "web+demo:/.//not-a-host?query#frag");
+ quirks::set_hostname(&mut url, "test").unwrap();
+ assert_eq!(url.as_str(), "web+demo://test//not-a-host?query#frag");
+ url.check_invariants().unwrap();
+ quirks::set_hostname(&mut url, "").unwrap();
+ assert_eq!(url.as_str(), "web+demo:////not-a-host?query#frag");
+ url.check_invariants().unwrap();
+}
+
+#[test]
+fn test_can_be_a_base_with_path_segments_mut() {
+ let mut url = Url::parse("web+demo:/").unwrap();
+ assert!(!url.cannot_be_a_base());
+
+ url.path_segments_mut()
+ .expect("should have path segments")
+ .push("")
+ .push("not-a-host");
+
+ url.set_query(Some("query"));
+ url.set_fragment(Some("frag"));
+
+ assert_eq!(url.as_str(), "web+demo:/.//not-a-host?query#frag");
+ assert_eq!(url.path(), "//not-a-host");
+ url.check_invariants().unwrap();
+
+ let segments: Vec<_> = url
+ .path_segments()
+ .expect("should have path segments")
+ .collect();
+ assert_eq!(segments, vec!["", "not-a-host"]);
+}
From 2c307f16b78eb5c02356945ca7093e95822ef9a2 Mon Sep 17 00:00:00 2001
From: Sean Kim <40130483+theskim@users.noreply.github.com>
Date: Wed, 18 Dec 2024 12:30:33 -0500
Subject: [PATCH 3/3] Improve testing for host, path, query, hash when mutating
path
---
url/tests/unit.rs | 32 ++++++++++++++++++++++++++++++++
1 file changed, 32 insertions(+)
diff --git a/url/tests/unit.rs b/url/tests/unit.rs
index 8312efe6..85b7f1f0 100644
--- a/url/tests/unit.rs
+++ b/url/tests/unit.rs
@@ -1479,3 +1479,35 @@ fn test_can_be_a_base_with_path_segments_mut() {
.collect();
assert_eq!(segments, vec!["", "not-a-host"]);
}
+
+#[test]
+fn test_valid_indices_after_set_path() {
+ // Testing everything
+ let mut url = Url::parse("moz:/").unwrap();
+ assert!(!url.cannot_be_a_base());
+
+ url.set_path("/.//p");
+ url.set_host(Some("host")).unwrap();
+ url.set_query(Some("query"));
+ url.set_fragment(Some("frag"));
+ assert_eq!(url.as_str(), "moz://host//p?query#frag");
+ assert_eq!(url.host(), Some(Host::Domain("host")));
+ assert_eq!(url.path(), "//p");
+ assert_eq!(url.query(), Some("query"));
+ assert_eq!(url.fragment(), Some("frag"));
+ url.check_invariants().unwrap();
+
+ url = Url::parse("moz:/.//").unwrap();
+ assert!(!url.cannot_be_a_base());
+
+ url.set_path("p");
+ url.set_host(Some("host")).unwrap();
+ url.set_query(Some("query"));
+ url.set_fragment(Some("frag"));
+ assert_eq!(url.as_str(), "moz://host/p?query#frag");
+ assert_eq!(url.host(), Some(Host::Domain("host")));
+ assert_eq!(url.path(), "/p");
+ assert_eq!(url.query(), Some("query"));
+ assert_eq!(url.fragment(), Some("frag"));
+ url.check_invariants().unwrap();
+}