Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Remove unsafe from PathNormalizer #56805

Merged
merged 13 commits into from
Aug 5, 2024
261 changes: 91 additions & 170 deletions src/Servers/Kestrel/Core/src/Internal/Http/PathNormalizer.cs
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
// The .NET Foundation licenses this file to you under the MIT license.

using System.Diagnostics;
using System.Runtime.InteropServices;
using System.Text;
using Microsoft.AspNetCore.Internal;
using Microsoft.AspNetCore.Server.Kestrel.Core.Internal.Infrastructure;
Expand Down Expand Up @@ -50,193 +51,113 @@ public static string DecodePath(Span<byte> path, bool pathEncoded, string rawTar
}

// In-place implementation of the algorithm from https://tools.ietf.org/html/rfc3986#section-5.2.4
public static unsafe int RemoveDotSegments(Span<byte> input)
public static int RemoveDotSegments(Span<byte> src)
{
fixed (byte* start = input)
{
var end = start + input.Length;
return RemoveDotSegments(start, end);
}
}

public static unsafe int RemoveDotSegments(byte* start, byte* end)
{
if (!ContainsDotSegments(start, end))
{
return (int)(end - start);
}
Debug.Assert(src[0] == '/', "Path segment must always start with a '/'");
ReadOnlySpan<byte> dotSlash = "./"u8;
ReadOnlySpan<byte> slashDot = "/."u8;

var src = start;
var dst = start;
var writtenLength = 0;
var readPointer = 0;

while (src < end)
while (src.Length > readPointer)
{
var ch1 = *src;
Debug.Assert(ch1 == '/', "Path segment must always start with a '/'");

byte ch2, ch3, ch4;

switch (end - src)
var currentSrc = src[readPointer..];
var nextDotSegmentIndex = currentSrc.IndexOf(slashDot);
if (nextDotSegmentIndex < 0 && readPointer == 0)
{
case 1:
break;
case 2:
ch2 = *(src + 1);

if (ch2 == ByteDot)
{
// B. if the input buffer begins with a prefix of "/./" or "/.",
// where "." is a complete path segment, then replace that
// prefix with "/" in the input buffer; otherwise,
src += 1;
*src = ByteSlash;
continue;
}

break;
case 3:
ch2 = *(src + 1);
ch3 = *(src + 2);

if (ch2 == ByteDot && ch3 == ByteDot)
{
// C. if the input buffer begins with a prefix of "/../" or "/..",
// where ".." is a complete path segment, then replace that
// prefix with "/" in the input buffer and remove the last
// segment and its preceding "/" (if any) from the output
// buffer; otherwise,
src += 2;
*src = ByteSlash;

if (dst > start)
{
do
{
dst--;
} while (dst > start && *dst != ByteSlash);
}

continue;
}
else if (ch2 == ByteDot && ch3 == ByteSlash)
{
// B. if the input buffer begins with a prefix of "/./" or "/.",
// where "." is a complete path segment, then replace that
// prefix with "/" in the input buffer; otherwise,
src += 2;
continue;
}

break;
default:
ch2 = *(src + 1);
ch3 = *(src + 2);
ch4 = *(src + 3);

if (ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash)
{
// C. if the input buffer begins with a prefix of "/../" or "/..",
// where ".." is a complete path segment, then replace that
// prefix with "/" in the input buffer and remove the last
// segment and its preceding "/" (if any) from the output
// buffer; otherwise,
src += 3;

if (dst > start)
{
do
{
dst--;
} while (dst > start && *dst != ByteSlash);
}

continue;
}
else if (ch2 == ByteDot && ch3 == ByteSlash)
{
// B. if the input buffer begins with a prefix of "/./" or "/.",
// where "." is a complete path segment, then replace that
// prefix with "/" in the input buffer; otherwise,
src += 2;
continue;
}

break;
return src.Length;
}

// E. move the first path segment in the input buffer to the end of
// the output buffer, including the initial "/" character (if
// any) and any subsequent characters up to, but not including,
// the next "/" character or the end of the input buffer.
do
if (nextDotSegmentIndex < 0)
{
*dst++ = ch1;
ch1 = *++src;
} while (src < end && ch1 != ByteSlash);
}

if (dst == start)
{
*dst++ = ByteSlash;
}

return (int)(dst - start);
}

public static unsafe bool ContainsDotSegments(byte* start, byte* end)
{
var src = start;
while (src < end)
{
var ch1 = *src;
Debug.Assert(ch1 == '/', "Path segment must always start with a '/'");

byte ch2, ch3, ch4;

switch (end - src)
// Copy the remianing src to dst, and return.
currentSrc.CopyTo(src[writtenLength..]);
writtenLength += src.Length - readPointer;
return writtenLength;
}
else if (nextDotSegmentIndex > 0)
{
case 1:
break;
case 2:
ch2 = *(src + 1);
// Copy until the next segment excluding the trailer.
currentSrc[..nextDotSegmentIndex].CopyTo(src[writtenLength..]);
writtenLength += nextDotSegmentIndex;
readPointer += nextDotSegmentIndex;
}

if (ch2 == ByteDot)
{
return true;
}
var remainingLength = src.Length - readPointer;

break;
case 3:
ch2 = *(src + 1);
ch3 = *(src + 2);
// Case of /../ or /./ or non-dot segments.
if (remainingLength > 3)
{
var nextIndex = readPointer + 2;

if (src[nextIndex] == ByteSlash)
{
// Case: /./
readPointer = nextIndex;
}
else if (MemoryMarshal.CreateSpan(ref src[nextIndex], 2).StartsWith(dotSlash))
{
// Case: /../
// Remove the last segment and replace the path with /
var lastIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash);

// Move write pointer to the end of the previous segment without / or to start position
writtenLength = int.Max(0, lastIndex);

// Move the read pointer to the next segments beginning including /
readPointer += 3;
}
else
{
// Not a dot segment e.g. /.a, copy the matched /. and bump the read pointer
slashDot.CopyTo(src[writtenLength..]);
writtenLength += 2;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we technically copy 3 values since we know the 3rd character isn't a . or /?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think yes, though it also requires then to increment the read pointer.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure, but that's fine?
In the case below where we have else if (remainingLength == 3) then we can avoid an extra iteration of the loop.

Copy link
Contributor Author

@ladeak ladeak Aug 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That is a very good point for the if (remainingLength == 3) chars, let me update it.

I will also check the >=3 case and measure it.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Updated as suggested and updated the performance measurements. Please feel free to have a look, it looks reasonable to me.

readPointer = nextIndex;
}
}

if ((ch2 == ByteDot && ch3 == ByteDot) ||
(ch2 == ByteDot && ch3 == ByteSlash))
// Ending with /.. or /./ or non-dot segments.
else if (remainingLength == 3)
{
var nextIndex = readPointer + 2;
if (src[nextIndex] == ByteSlash)
{
// Case: /./ Replace the /./ segment with a closing /
src[writtenLength++] = ByteSlash;
return writtenLength;
}
else if (src[nextIndex] == ByteDot)
{
// Case: /.. Remove the last segment and replace the path with /
var lastSlashIndex = MemoryMarshal.CreateSpan(ref src[0], writtenLength).LastIndexOf(ByteSlash);

// If this was the beginning of the string, then return /
if (lastSlashIndex < 0)
{
return true;
Debug.Assert(src[0] == '/');
return 1;
}

break;
default:
ch2 = *(src + 1);
ch3 = *(src + 2);
ch4 = *(src + 3);

if ((ch2 == ByteDot && ch3 == ByteDot && ch4 == ByteSlash) ||
(ch2 == ByteDot && ch3 == ByteSlash))
else
{
return true;
writtenLength = lastSlashIndex + 1;
}

break;
return writtenLength;
}
else
{
// Not a dot segment e.g. /.a, copy the /. and bump the read pointer.
slashDot.CopyTo(src[writtenLength..]);
writtenLength += 2;
readPointer = nextIndex;
}
}

do
// Ending with /.
else if (remainingLength == 2)
{
ch1 = *++src;
} while (src < end && ch1 != ByteSlash);
src[writtenLength++] = ByteSlash;
return writtenLength;
}
}

return false;
return writtenLength;
}
}
25 changes: 24 additions & 1 deletion src/Servers/Kestrel/Core/test/PathNormalizerTests.cs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Licensed to the .NET Foundation under one or more agreements.
// Licensed to the .NET Foundation under one or more agreements.
// The .NET Foundation licenses this file to you under the MIT license.

using System;
Expand Down Expand Up @@ -54,6 +54,29 @@ public class PathNormalizerTests
[InlineData("/", "/")]
[InlineData("/no/segments", "/no/segments")]
[InlineData("/no/segments/", "/no/segments/")]
[InlineData("/././", "/")]
[InlineData("/./.", "/")]
[InlineData("/../..", "/")]
[InlineData("/../../", "/")]
[InlineData("/../.", "/")]
[InlineData("/./..", "/")]
[InlineData("/.././", "/")]
[InlineData("/./../", "/")]
[InlineData("/..", "/")]
[InlineData("/.", "/")]
[InlineData("/a/abc/../abc/../b", "/a/b")]
[InlineData("/a/abc/.a", "/a/abc/.a")]
[InlineData("/a/abc/..a", "/a/abc/..a")]
[InlineData("/a/.b/c", "/a/.b/c")]
[InlineData("/a/.b/../c", "/a/c")]
[InlineData("/a/../.b/./c", "/.b/c")]
[InlineData("/a/.b/./c", "/a/.b/c")]
[InlineData("/a/./.b/./c", "/a/.b/c")]
[InlineData("/a/..b/c", "/a/..b/c")]
[InlineData("/a/..b/../c", "/a/c")]
[InlineData("/a/../..b/./c", "/..b/c")]
[InlineData("/a/..b/./c", "/a/..b/c")]
[InlineData("/a/./..b/./c", "/a/..b/c")]
public void RemovesDotSegments(string input, string expected)
{
var data = Encoding.ASCII.GetBytes(input);
Expand Down
Loading
Loading