Skip to content

Update str related tests from 3.13.5 #5953

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 3 commits into from
Jul 12, 2025
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Next Next commit
Update str related tests from 3.13.5
  • Loading branch information
ShaharNaveh committed Jul 12, 2025
commit 46c7bf77c4377677b1dce1d370f2cff33ee1f9d2
120 changes: 82 additions & 38 deletions Lib/test/string_tests.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,26 +8,20 @@
from collections import UserList
import random


class Sequence:
def __init__(self, seq='wxyz'): self.seq = seq
def __len__(self): return len(self.seq)
def __getitem__(self, i): return self.seq[i]

class BadSeq1(Sequence):
def __init__(self): self.seq = [7, 'hello', 123]
def __str__(self): return '{0} {1} {2}'.format(*self.seq)

class BadSeq2(Sequence):
def __init__(self): self.seq = ['a', 'b', 'c']
def __len__(self): return 8

class BaseTest:
# These tests are for buffers of values (bytes) and not
# specific to character interpretation, used for bytes objects
# and various string implementations

# The type to be tested
# Change in subclasses to change the behaviour of fixtesttype()
# Change in subclasses to change the behaviour of fixtype()
type2test = None

# Whether the "contained items" of the container are integers in
Expand All @@ -36,7 +30,7 @@ class BaseTest:
contains_bytes = False

# All tests pass their arguments to the testing methods
# as str objects. fixtesttype() can be used to propagate
# as str objects. fixtype() can be used to propagate
# these arguments to the appropriate type
def fixtype(self, obj):
if isinstance(obj, str):
Expand Down Expand Up @@ -160,6 +154,12 @@ def test_count(self):
self.assertEqual(rem, 0, '%s != 0 for %s' % (rem, i))
self.assertEqual(r1, r2, '%s != %s for %s' % (r1, r2, i))

def test_count_keyword(self):
self.assertEqual('aa'.replace('a', 'b', 0), 'aa'.replace('a', 'b', count=0))
self.assertEqual('aa'.replace('a', 'b', 1), 'aa'.replace('a', 'b', count=1))
self.assertEqual('aa'.replace('a', 'b', 2), 'aa'.replace('a', 'b', count=2))
self.assertEqual('aa'.replace('a', 'b', 3), 'aa'.replace('a', 'b', count=3))

def test_find(self):
self.checkequal(0, 'abcdefghiabc', 'find', 'abc')
self.checkequal(9, 'abcdefghiabc', 'find', 'abc', 1)
Expand Down Expand Up @@ -327,11 +327,12 @@ def reference_find(p, s):
for i in range(len(s)):
if s.startswith(p, i):
return i
if p == '' and s == '':
return 0
return -1

rr = random.randrange
choices = random.choices
for _ in range(1000):
def check_pattern(rr):
choices = random.choices
p0 = ''.join(choices('abcde', k=rr(10))) * rr(10, 20)
p = p0[:len(p0) - rr(10)] # pop off some characters
left = ''.join(choices('abcdef', k=rr(2000)))
Expand All @@ -341,6 +342,49 @@ def reference_find(p, s):
self.checkequal(reference_find(p, text),
text, 'find', p)

rr = random.randrange
for _ in range(1000):
check_pattern(rr)

# Test that empty string always work:
check_pattern(lambda *args: 0)

def test_find_many_lengths(self):
haystack_repeats = [a * 10**e for e in range(6) for a in (1,2,5)]
haystacks = [(n, self.fixtype("abcab"*n + "da")) for n in haystack_repeats]

needle_repeats = [a * 10**e for e in range(6) for a in (1, 3)]
needles = [(m, self.fixtype("abcab"*m + "da")) for m in needle_repeats]

for n, haystack1 in haystacks:
haystack2 = haystack1[:-1]
for m, needle in needles:
answer1 = 5 * (n - m) if m <= n else -1
self.assertEqual(haystack1.find(needle), answer1, msg=(n,m))
self.assertEqual(haystack2.find(needle), -1, msg=(n,m))

def test_adaptive_find(self):
# This would be very slow for the naive algorithm,
# but str.find() should be O(n + m).
for N in 1000, 10_000, 100_000, 1_000_000:
A, B = 'a' * N, 'b' * N
haystack = A + A + B + A + A
needle = A + B + B + A
self.checkequal(-1, haystack, 'find', needle)
self.checkequal(0, haystack, 'count', needle)
self.checkequal(len(haystack), haystack + needle, 'find', needle)
self.checkequal(1, haystack + needle, 'count', needle)

def test_find_with_memory(self):
# Test the "Skip with memory" path in the two-way algorithm.
for N in 1000, 3000, 10_000, 30_000:
needle = 'ab' * N
haystack = ('ab'*(N-1) + 'b') * 2
self.checkequal(-1, haystack, 'find', needle)
self.checkequal(0, haystack, 'count', needle)
self.checkequal(len(haystack), haystack + needle, 'find', needle)
self.checkequal(1, haystack + needle, 'count', needle)

def test_find_shift_table_overflow(self):
"""When the table of 8-bit shifts overflows."""
N = 2**8 + 100
Expand Down Expand Up @@ -394,8 +438,7 @@ def test_expandtabs(self):

self.checkraises(TypeError, 'hello', 'expandtabs', 42, 42)
# This test is only valid when sizeof(int) == sizeof(void*) == 4.
# XXX RUSTPYTHON TODO: expandtabs overflow checks
if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4 and False:
if sys.maxsize < (1 << 32) and struct.calcsize('P') == 4:
self.checkraises(OverflowError,
'\ta\n\tb', 'expandtabs', sys.maxsize)

Expand Down Expand Up @@ -724,7 +767,18 @@ def test_replace(self):
self.checkraises(TypeError, 'hello', 'replace', 42, 'h')
self.checkraises(TypeError, 'hello', 'replace', 'h', 42)

@unittest.skip("TODO: RUSTPYTHON, may only apply to 32-bit platforms")
def test_replace_uses_two_way_maxcount(self):
# Test that maxcount works in _two_way_count in fastsearch.h
A, B = "A"*1000, "B"*1000
AABAA = A + A + B + A + A
ABBA = A + B + B + A
self.checkequal(AABAA + ABBA,
AABAA + ABBA, 'replace', ABBA, "ccc", 0)
self.checkequal(AABAA + "ccc",
AABAA + ABBA, 'replace', ABBA, "ccc", 1)
self.checkequal(AABAA + "ccc",
AABAA + ABBA, 'replace', ABBA, "ccc", 2)

@unittest.skipIf(sys.maxsize > (1 << 32) or struct.calcsize('P') != 4,
'only applies to 32-bit platforms')
def test_replace_overflow(self):
Expand All @@ -734,8 +788,6 @@ def test_replace_overflow(self):
self.checkraises(OverflowError, A2_16, "replace", "A", A2_16)
self.checkraises(OverflowError, A2_16, "replace", "AA", A2_16+A2_16)


# Python 3.9
def test_removeprefix(self):
self.checkequal('am', 'spam', 'removeprefix', 'sp')
self.checkequal('spamspam', 'spamspamspam', 'removeprefix', 'spam')
Expand All @@ -754,7 +806,6 @@ def test_removeprefix(self):
self.checkraises(TypeError, 'hello', 'removeprefix', 'h', 42)
self.checkraises(TypeError, 'hello', 'removeprefix', ("he", "l"))

# Python 3.9
def test_removesuffix(self):
self.checkequal('sp', 'spam', 'removesuffix', 'am')
self.checkequal('spamspam', 'spamspamspam', 'removesuffix', 'spam')
Expand Down Expand Up @@ -1053,7 +1104,7 @@ def test_splitlines(self):
self.checkraises(TypeError, 'abc', 'splitlines', 42, 42)


class CommonTest(BaseTest):
class StringLikeTest(BaseTest):
# This testcase contains tests that can be used in all
# stringlike classes. Currently this is str and UserString.

Expand Down Expand Up @@ -1084,11 +1135,6 @@ def test_capitalize_nonascii(self):
self.checkequal('\u019b\u1d00\u1d86\u0221\u1fb7',
'\u019b\u1d00\u1d86\u0221\u1fb7', 'capitalize')


class MixinStrUnicodeUserStringTest:
# additional tests that only work for
# stringlike objects, i.e. str, UserString

def test_startswith(self):
self.checkequal(True, 'hello', 'startswith', 'he')
self.checkequal(True, 'hello', 'startswith', 'hello')
Expand Down Expand Up @@ -1200,9 +1246,6 @@ def test___contains__(self):
self.checkequal(False, 'asd', '__contains__', 'asdf')
self.checkequal(False, '', '__contains__', 'asdf')


# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_subscript(self):
self.checkequal('a', 'abc', '__getitem__', 0)
self.checkequal('c', 'abc', '__getitem__', -1)
Expand Down Expand Up @@ -1273,8 +1316,11 @@ def test_join(self):
self.checkequal(((('a' * i) + '-') * i)[:-1], '-', 'join',
('a' * i,) * i)

#self.checkequal(str(BadSeq1()), ' ', 'join', BadSeq1())
self.checkequal('a b c', ' ', 'join', BadSeq2())
class LiesAboutLengthSeq(Sequence):
def __init__(self): self.seq = ['a', 'b', 'c']
def __len__(self): return 8

self.checkequal('a b c', ' ', 'join', LiesAboutLengthSeq())

self.checkraises(TypeError, ' ', 'join')
self.checkraises(TypeError, ' ', 'join', None)
Expand Down Expand Up @@ -1453,25 +1499,23 @@ def test_none_arguments(self):
self.checkequal(True, s, 'startswith', 'h', None, -2)
self.checkequal(False, s, 'startswith', 'x', None, None)

# TODO: RUSTPYTHON
@unittest.expectedFailure
def test_find_etc_raise_correct_error_messages(self):
# issue 11828
s = 'hello'
x = 'x'
self.assertRaisesRegex(TypeError, r'^find\(', s.find,
self.assertRaisesRegex(TypeError, r'^find\b', s.find,
x, None, None, None)
self.assertRaisesRegex(TypeError, r'^rfind\(', s.rfind,
self.assertRaisesRegex(TypeError, r'^rfind\b', s.rfind,
x, None, None, None)
self.assertRaisesRegex(TypeError, r'^index\(', s.index,
self.assertRaisesRegex(TypeError, r'^index\b', s.index,
x, None, None, None)
self.assertRaisesRegex(TypeError, r'^rindex\(', s.rindex,
self.assertRaisesRegex(TypeError, r'^rindex\b', s.rindex,
x, None, None, None)
self.assertRaisesRegex(TypeError, r'^count\(', s.count,
self.assertRaisesRegex(TypeError, r'^count\b', s.count,
x, None, None, None)
self.assertRaisesRegex(TypeError, r'^startswith\(', s.startswith,
self.assertRaisesRegex(TypeError, r'^startswith\b', s.startswith,
x, None, None, None)
self.assertRaisesRegex(TypeError, r'^endswith\(', s.endswith,
self.assertRaisesRegex(TypeError, r'^endswith\b', s.endswith,
x, None, None, None)

# issue #15534
Expand Down
Loading