Skip to content

Commit 53adcc9

Browse files
author
Antonio Yang
committed
str.isprintable
- check unicode type by unicode_categories
1 parent 1b7088c commit 53adcc9

File tree

3 files changed

+36
-0
lines changed

3 files changed

+36
-0
lines changed

tests/snippets/strings.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -216,3 +216,10 @@ def try_mutate_str():
216216
for s, b, e in zip(ss, bs, ['u8', 'U8', 'utf-8', 'UTF-8', 'utf_8']):
217217
assert s.encode(e) == b
218218
# assert s.encode(encoding=e) == b
219+
220+
# str.isisprintable
221+
assert "".isprintable()
222+
assert " ".isprintable()
223+
assert "abcdefg".isprintable()
224+
assert not "abcdefg\n".isprintable()
225+
assert "ʹ".isprintable()

vm/Cargo.toml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@ hexf = "0.1.0"
3131
indexmap = "1.0.2"
3232
crc = "^1.0.0"
3333
bincode = "1.1.4"
34+
unicode_categories = "0.1.1"
3435

3536

3637
# TODO: release and publish to crates.io

vm/src/obj/objstr.rs

Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
extern crate unicode_categories;
12
extern crate unicode_xid;
23

34
use std::fmt;
@@ -27,6 +28,9 @@ use super::objsequence::PySliceableSequence;
2728
use super::objslice::PySlice;
2829
use super::objtype::{self, PyClassRef};
2930

31+
use unicode_categories::UnicodeCategories;
32+
33+
3034
/// str(object='') -> str
3135
/// str(bytes_or_buffer[, encoding[, errors]]) -> str
3236
///
@@ -519,6 +523,30 @@ impl PyString {
519523
}
520524
}
521525

526+
/// Return true if all characters in the string are printable or the string is empty,
527+
/// false otherwise. Nonprintable characters are those characters defined in the
528+
/// Unicode character database as `Other` or `Separator`,
529+
/// excepting the ASCII space (0x20) which is considered printable.
530+
///
531+
/// All characters except those characters defined in the Unicode character
532+
/// database as following categories are considered printable.
533+
/// * Cc (Other, Control)
534+
/// * Cf (Other, Format)
535+
/// * Cs (Other, Surrogate)
536+
/// * Co (Other, Private Use)
537+
/// * Cn (Other, Not Assigned)
538+
/// * Zl Separator, Line ('\u2028', LINE SEPARATOR)
539+
/// * Zp Separator, Paragraph ('\u2029', PARAGRAPH SEPARATOR)
540+
/// * Zs (Separator, Space) other than ASCII space('\x20').
541+
#[pymethod]
542+
fn isprintable(&self, _vm: &VirtualMachine) -> bool {
543+
self.value.is_empty()
544+
|| self.value.chars().all(|c| match c {
545+
'\u{0020}' => true,
546+
_ => !(c.is_other_control() | c.is_separator()),
547+
})
548+
}
549+
522550
// cpython's isspace ignores whitespace, including \t and \n, etc, unless the whole string is empty
523551
// which is why isspace is using is_ascii_whitespace. Same for isupper & islower
524552
#[pymethod]

0 commit comments

Comments
 (0)