From ff4ca5cf07f2de5b60f4c4618e04508b7cd918ae Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 17:43:20 +0200 Subject: [PATCH 1/9] Only allow 0-9 digits in MIME parameter section numbers --- Lib/email/_header_value_parser.py | 9 +++++++-- Lib/test/test_email/test__header_value_parser.py | 10 ++++++++++ 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 91243378dc0441..347d75b480a8b0 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2398,17 +2398,22 @@ def get_section(value): The caller should already have dealt with leading CFWS. """ + def is_allowed_digit(c): + # We don't use str.isdigit because only 0-9 are accepted, not + # super-script and other types of digits. + return c in {'0','1','2','3','4','5','6','7','8','9'} + section = Section() if not value or value[0] != '*': raise errors.HeaderParseError("Expected section but found {}".format( value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - if not value or not value[0].isdigit(): + if not value or not is_allowed_digit(value[0]): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and value[0].isdigit(): + while value and is_allowed_digit(value[0]): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 179e236ecdfd7f..716a4381fffbde 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2982,6 +2982,16 @@ def mime_parameters_as_value(self, 'r*=\'a\'"', [('r', '"')], [errors.InvalidHeaderDefect]*2), + + # bpo-42946: Unicode super-script digits (and others) are not allowed + # as section numbers. + 'non_allowed_digits': ( + 'foo*0=bar; foo*²=baz', + ' foo="bar"', + 'foo*0=bar; foo*²=baz', + [('foo', 'bar')], + [errors.InvalidHeaderDefect]), + } @parameterize From 656b6fb5e1ce3972409c0a580524875a555b7cdb Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 17:45:20 +0200 Subject: [PATCH 2/9] Use gh bug nubmer --- Lib/test/test_email/test__header_value_parser.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 716a4381fffbde..2f259e9d1c0dc0 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2983,7 +2983,7 @@ def mime_parameters_as_value(self, [('r', '"')], [errors.InvalidHeaderDefect]*2), - # bpo-42946: Unicode super-script digits (and others) are not allowed + # gh-87112: Unicode super-script digits (and others) are not allowed # as section numbers. 'non_allowed_digits': ( 'foo*0=bar; foo*²=baz', From 75636a5d3d3bd6b40d60057b1c5b1b3b9710dbe2 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 18:00:05 +0200 Subject: [PATCH 3/9] Add news entry --- .../next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst diff --git a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst new file mode 100644 index 00000000000000..25fe38a2e36812 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst @@ -0,0 +1,2 @@ +Do not fail when non-0-9 digit (e.g. super-script digit) is used as section +number in MIME parameter. From b15c4045927baf9ba763b29df9acbcbc0d80a7aa Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 19:17:49 +0200 Subject: [PATCH 4/9] Comments from PR review, remove inner function --- Lib/email/_header_value_parser.py | 11 ++++------- Lib/test/test_email/test__header_value_parser.py | 1 - 2 files changed, 4 insertions(+), 8 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index 347d75b480a8b0..cff3dd4aa9aec7 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2398,22 +2398,19 @@ def get_section(value): The caller should already have dealt with leading CFWS. """ - def is_allowed_digit(c): - # We don't use str.isdigit because only 0-9 are accepted, not - # super-script and other types of digits. - return c in {'0','1','2','3','4','5','6','7','8','9'} - section = Section() if not value or value[0] != '*': raise errors.HeaderParseError("Expected section but found {}".format( value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - if not value or not is_allowed_digit(value[0]): + # We don't use str.isdigit because only 0-9 are accepted, not super-script + # and other types of digits. + if not value or not '0' <= value[0] <= '9': raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and is_allowed_digit(value[0]): + while value and '0' <= value[0] <= '9': digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index 2f259e9d1c0dc0..e4a383f259e847 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2991,7 +2991,6 @@ def mime_parameters_as_value(self, 'foo*0=bar; foo*²=baz', [('foo', 'bar')], [errors.InvalidHeaderDefect]), - } @parameterize From 7cba42f982a73b4ec8892c81dfd17fc9652487d8 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 19:20:37 +0200 Subject: [PATCH 5/9] Rephrase News entry --- .../Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst index 25fe38a2e36812..cb72b8eea1a3d3 100644 --- a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst +++ b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst @@ -1,2 +1,2 @@ -Do not fail when non-0-9 digit (e.g. super-script digit) is used as section -number in MIME parameter. +Ensure that only ASCII digits are accepted as section number in MIME header +parameter. From d2d59eee08a60353f622c8de670ef250ea01ef71 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 19:35:30 +0200 Subject: [PATCH 6/9] Comments from PR review, improve phrasing --- Lib/email/_header_value_parser.py | 7 +++---- Lib/test/test_email/test__header_value_parser.py | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index cff3dd4aa9aec7..ed5224d6349f4f 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2404,13 +2404,12 @@ def get_section(value): value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - # We don't use str.isdigit because only 0-9 are accepted, not super-script - # and other types of digits. - if not value or not '0' <= value[0] <= '9': + # We don't use str.isdigit because only ASCII digits are allowed. + if not value or not ('0' <= value[0] <= '9'): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and '0' <= value[0] <= '9': + while value and ('0' <= value[0] <= '9'): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index e4a383f259e847..b150196f60527a 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2983,8 +2983,7 @@ def mime_parameters_as_value(self, [('r', '"')], [errors.InvalidHeaderDefect]*2), - # gh-87112: Unicode super-script digits (and others) are not allowed - # as section numbers. + # gh-87112: Only ASCII digits can be section numbers. 'non_allowed_digits': ( 'foo*0=bar; foo*²=baz', ' foo="bar"', From 54bbbb752c2ae0b7dc8dba04a66ca5b3df850cd9 Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 20 Jul 2025 19:50:10 +0200 Subject: [PATCH 7/9] Use separate function --- Lib/email/_header_value_parser.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index ed5224d6349f4f..b8c639e4a70ced 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2398,18 +2398,21 @@ def get_section(value): The caller should already have dealt with leading CFWS. """ + def is_ascii_digit(d): + # We don't use str.isdigit because only ASCII digits are allowed. + return '0' <= d <= '9' + section = Section() if not value or value[0] != '*': raise errors.HeaderParseError("Expected section but found {}".format( value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - # We don't use str.isdigit because only ASCII digits are allowed. - if not value or not ('0' <= value[0] <= '9'): + if not value or not is_ascii_digit(value[0]): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and ('0' <= value[0] <= '9'): + while value and is_ascii_digit(value[0]): digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': From f12e424b92a0722258b85c6553ad9e94e218155c Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 27 Jul 2025 14:16:49 +0200 Subject: [PATCH 8/9] Accept digits that are convertible to int for backwards compatibility --- Lib/email/_header_value_parser.py | 20 ++++++++++++++----- .../test_email/test__header_value_parser.py | 11 ++++++++-- 2 files changed, 24 insertions(+), 7 deletions(-) diff --git a/Lib/email/_header_value_parser.py b/Lib/email/_header_value_parser.py index b8c639e4a70ced..8684bdb8dfd5c6 100644 --- a/Lib/email/_header_value_parser.py +++ b/Lib/email/_header_value_parser.py @@ -2398,9 +2398,16 @@ def get_section(value): The caller should already have dealt with leading CFWS. """ - def is_ascii_digit(d): - # We don't use str.isdigit because only ASCII digits are allowed. - return '0' <= d <= '9' + def is_accepted_digit(d): + # While only ASCII digits are allowed by the RFC, we accept any digit + # that can be converted to an int for backwards compatibility purposes. + # We don't use str.isdigit() as some Unicode digits are not convertible + # (e.g. superscript digits). + try: + int(d) + return True + except ValueError: + return False section = Section() if not value or value[0] != '*': @@ -2408,11 +2415,14 @@ def is_ascii_digit(d): value)) section.append(ValueTerminal('*', 'section-marker')) value = value[1:] - if not value or not is_ascii_digit(value[0]): + if not value or not is_accepted_digit(value[0]): raise errors.HeaderParseError("Expected section number but " "found {}".format(value)) digits = '' - while value and is_ascii_digit(value[0]): + while value and is_accepted_digit(value[0]): + if not '0' <= value[0] <= '9': + section.defects.append(errors.InvalidHeaderDefect( + "section number has a non-ASCII digit {}".format(value[0]))) digits += value[0] value = value[1:] if digits[0] == '0' and digits != '0': diff --git a/Lib/test/test_email/test__header_value_parser.py b/Lib/test/test_email/test__header_value_parser.py index b150196f60527a..2eaf73bad89afa 100644 --- a/Lib/test/test_email/test__header_value_parser.py +++ b/Lib/test/test_email/test__header_value_parser.py @@ -2983,13 +2983,20 @@ def mime_parameters_as_value(self, [('r', '"')], [errors.InvalidHeaderDefect]*2), - # gh-87112: Only ASCII digits can be section numbers. - 'non_allowed_digits': ( + # gh-87112: Only digits convertible to integers can be section numbers. + 'non_accepted_digit': ( 'foo*0=bar; foo*²=baz', ' foo="bar"', 'foo*0=bar; foo*²=baz', [('foo', 'bar')], [errors.InvalidHeaderDefect]), + + 'non_ascii_digit_backwards_compatibility': ( + 'foo*0=bar; foo*߁=baz', # NKO digit '1' + ' foo="barbaz"', + 'foo*0=bar; foo*߁=baz', + [('foo', 'barbaz')], + [errors.InvalidHeaderDefect]), } @parameterize From b3643399e60bcec00d6756f04e032f95fe3c288e Mon Sep 17 00:00:00 2001 From: Matthieu Caneill Date: Sun, 27 Jul 2025 14:18:24 +0200 Subject: [PATCH 9/9] Update news message --- .../Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst index cb72b8eea1a3d3..ba4e72de85e317 100644 --- a/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst +++ b/Misc/NEWS.d/next/Library/2025-07-20-17-57-39.gh-issue-87112.sKU2V8.rst @@ -1,2 +1,2 @@ -Ensure that only ASCII digits are accepted as section number in MIME header -parameter. +Ensure that only digits convertible to integers are accepted as section number +in MIME header parameter.