Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

⚡️ Speed up function unwrap by 539% #367

Open
wants to merge 1 commit into
base: main
Choose a base branch
from

Conversation

misrasaurabh1
Copy link

📄 539% (5.39x) speedup for unwrap in pyinstrument/util.py

⏱️ Runtime : 1.22 millisecond 191 microseconds (best of 437 runs)

📝 Explanation and details

To optimize the unwrap function, we can avoid using regular expressions and minimize the number of operations by directly replacing multiple spaces and newlines in a single pass. Here's the updated version of the unwrap function.

Explanation.

  1. The split function (without arguments) splits the string at any whitespace and filters out any empty strings automatically.
  2. The join function then concatenates these parts with a single space.

This approach avoids the overhead associated with regular expressions and performs the string transformations in a more efficient manner.

Correctness verification report:

Test Status
⚙️ Existing Unit Tests 🔘 None Found
🌀 Generated Regression Tests 46 Passed
⏪ Replay Tests 🔘 None Found
🔎 Concolic Coverage Tests 1 Passed
📊 Tests Coverage 100.0%
🌀 Generated Regression Tests Details
import re

# imports
import pytest  # used for our unit tests
from pyinstrument.util import unwrap

# unit tests

def test_basic_single_newline():
    codeflash_output = unwrap("Hello\nWorld")

def test_basic_multiple_newlines():
    codeflash_output = unwrap("Hello\n\nWorld")

def test_basic_newline_with_spaces():
    codeflash_output = unwrap("Hello \n World")

def test_leading_whitespace():
    codeflash_output = unwrap("  Hello World")

def test_trailing_whitespace():
    codeflash_output = unwrap("Hello World  ")

def test_leading_and_trailing_whitespace():
    codeflash_output = unwrap("  Hello World  ")

def test_multiple_spaces_between_words():
    codeflash_output = unwrap("Hello   World")

def test_tabs_and_spaces():
    codeflash_output = unwrap("Hello\t\tWorld")

def test_mixed_whitespace_characters():
    codeflash_output = unwrap("Hello \t World")

def test_empty_string():
    codeflash_output = unwrap("")

def test_single_space():
    codeflash_output = unwrap(" ")

def test_single_non_whitespace_character():
    codeflash_output = unwrap("a")

def test_string_with_only_spaces():
    codeflash_output = unwrap("     ")

def test_string_with_only_newline_characters():
    codeflash_output = unwrap("\n\n\n")

def test_string_with_mixed_whitespace_characters():
    codeflash_output = unwrap("\t \n \t")

def test_large_string_no_newlines():
    codeflash_output = unwrap("a" * 10000)

def test_large_string_many_newlines():
    codeflash_output = unwrap("a\n" * 5000)

def test_large_string_mixed_content():
    codeflash_output = unwrap(("a\n b \t c\n" * 1000))

def test_string_with_special_characters():
    codeflash_output = unwrap("Hello\nWorld!@#$%^&*()")

def test_string_with_non_ascii_characters():
    codeflash_output = unwrap("Hello\n世界")

def test_string_with_no_whitespace():
    codeflash_output = unwrap("HelloWorld")

def test_carriage_return_and_newline():
    codeflash_output = unwrap("Hello\r\nWorld")

def test_carriage_return_only():
    codeflash_output = unwrap("Hello\rWorld")

def test_string_with_numbers_and_letters():
    codeflash_output = unwrap("123\n456\n789")

def test_string_with_punctuation_and_letters():
    codeflash_output = unwrap("Hello,\nworld!")

def test_string_with_embedded_newlines_and_spaces():
    codeflash_output = unwrap("Hello \n world \n")

def test_string_with_embedded_newlines_and_tabs():
    codeflash_output = unwrap("Hello\n\tworld")

# Run the tests
if __name__ == "__main__":
    pytest.main()
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

import re

# imports
import pytest  # used for our unit tests
from pyinstrument.util import unwrap

# unit tests

def test_basic_single_line_no_newlines():
    # Single line with no newlines or extra spaces
    codeflash_output = unwrap("Hello, World!")

def test_basic_multiple_lines_with_newlines():
    # Multiple lines with newlines
    codeflash_output = unwrap("Hello,\nWorld!")

def test_basic_multiple_lines_with_mixed_whitespace():
    # Multiple lines with mixed whitespace
    codeflash_output = unwrap("Hello, \n  World! \nHow are you?")

def test_leading_whitespace():
    # Leading whitespace
    codeflash_output = unwrap("   Hello, World!")

def test_trailing_whitespace():
    # Trailing whitespace
    codeflash_output = unwrap("Hello, World!   ")

def test_leading_and_trailing_whitespace():
    # Leading and trailing whitespace
    codeflash_output = unwrap("   Hello, World!   ")

def test_excessive_internal_whitespace():
    # Multiple spaces between words
    codeflash_output = unwrap("Hello,    World!")

def test_tabs_and_spaces_between_words():
    # Tabs and spaces between words
    codeflash_output = unwrap("Hello,\t   World!")

def test_edge_empty_string():
    # Empty string
    codeflash_output = unwrap("")

def test_edge_only_newlines():
    # String with only newlines
    codeflash_output = unwrap("\n\n\n")

def test_edge_only_whitespace():
    # String with only whitespace
    codeflash_output = unwrap("    ")

def test_mixed_content_punctuation_special_characters():
    # String with punctuation and special characters
    codeflash_output = unwrap("Hello, \nWorld! How's \tgoing?")

def test_mixed_content_numbers():
    # String with numbers
    codeflash_output = unwrap("123 \n 456  789")

def test_large_scale_very_long_string():
    # Very long string
    codeflash_output = unwrap("a" * 10000 + "\n" + "b" * 10000)

def test_large_scale_many_newlines_and_spaces():
    # String with many newlines and spaces
    input_str = ("a\n" * 1000 + " " * 1000 + "b\n" * 1000)
    expected_output = ("a " * 999 + "a " + "b " * 999 + "b").strip()
    codeflash_output = unwrap(input_str)

def test_special_characters_unicode():
    # String with Unicode characters
    codeflash_output = unwrap("你好, \n世界!")

def test_special_characters_emojis():
    # String with Emojis
    codeflash_output = unwrap("Hello, \n😊 World!")

def test_mixed_line_endings():
    # String with different line endings
    codeflash_output = unwrap("Hello,\r\nWorld!\nHow are you?")

def test_complex_whitespace_patterns():
    # String with mixed whitespace patterns
    codeflash_output = unwrap("Hello, \n \t World! \n\t How are \t you?")

# Run the tests
if __name__ == "__main__":
    pytest.main()
# codeflash_output is used to check that the output of the original code is the same as that of the optimized code.

from pyinstrument.util import unwrap

def test_unwrap():
    assert unwrap('') == ''

Codeflash

Certainly! To optimize the `unwrap` function, we can avoid using regular expressions and minimize the number of operations by directly replacing multiple spaces and newlines in a single pass. Here's the updated version of the `unwrap` function.



Explanation.
1. The `split` function (without arguments) splits the string at any whitespace and filters out any empty strings automatically.
2. The `join` function then concatenates these parts with a single space.

This approach avoids the overhead associated with regular expressions and performs the string transformations in a more efficient manner.
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

1 participant