Skip to content
Merged

Py new #6402

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
59 changes: 54 additions & 5 deletions crates/vm/src/builtins/bytes.rs
Original file line number Diff line number Diff line change
Expand Up @@ -92,15 +92,64 @@ pub(crate) fn init(context: &Context) {
}

impl Constructor for PyBytes {
type Args = ByteInnerNewOptions;
type Args = Vec<u8>;

fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult {
let options: Self::Args = args.bind(vm)?;
options.get_bytes(cls, vm).to_pyresult(vm)
let options: ByteInnerNewOptions = args.bind(vm)?;

// Optimizations for exact bytes type
if cls.is(vm.ctx.types.bytes_type) {
// Return empty bytes singleton
if options.source.is_missing()
&& options.encoding.is_missing()
&& options.errors.is_missing()
{
return Ok(vm.ctx.empty_bytes.clone().into());
}

// Return exact bytes as-is
if let OptionalArg::Present(ref obj) = options.source
&& options.encoding.is_missing()
&& options.errors.is_missing()
&& let Ok(b) = obj.clone().downcast_exact::<PyBytes>(vm)
{
return Ok(b.into_pyref().into());
}
}

// Handle __bytes__ method - may return PyBytes directly
if let OptionalArg::Present(ref obj) = options.source
&& options.encoding.is_missing()
&& options.errors.is_missing()
&& let Some(bytes_method) = vm.get_method(obj.clone(), identifier!(vm, __bytes__))
{
let bytes = bytes_method?.call((), vm)?;
// If exact bytes type and __bytes__ returns bytes, use it directly
if cls.is(vm.ctx.types.bytes_type)
&& let Ok(b) = bytes.clone().downcast::<PyBytes>()
{
return Ok(b.into());
}
// Otherwise convert to Vec<u8>
let inner = PyBytesInner::try_from_borrowed_object(vm, &bytes)?;
let payload = Self::py_new(&cls, inner.elements, vm)?;
return payload.into_ref_with_type(vm, cls).map(Into::into);
}
Comment on lines +120 to +137
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

⚠️ Potential issue | 🟡 Minor

🧩 Analysis chain

🏁 Script executed:

# Find the PyBytesInner::try_from_borrowed_object implementation
rg -A 20 "fn try_from_borrowed_object" crates/vm/src/builtins/bytes.rs

Repository: RustPython/RustPython

Length of output: 238


🏁 Script executed:

# Search for __bytes__ related tests or comments
rg -B 3 -A 3 "__bytes__" crates/vm/src/builtins/bytes.rs

Repository: RustPython/RustPython

Length of output: 856


🏁 Script executed:

# Check if there are any tests for bytes constructor with __bytes__
fd -e rs -x rg -l "__bytes__" {} \; | head -20

Repository: RustPython/RustPython

Length of output: 222


🏁 Script executed:

# Look at the downcast usage pattern in this file
rg "downcast" crates/vm/src/builtins/bytes.rs

Repository: RustPython/RustPython

Length of output: 794


🏁 Script executed:

# Find PyBytesInner definition and try_from_borrowed_object impl
rg -B 5 -A 20 "impl.*PyBytesInner" crates/vm/src/builtins/bytes.rs | head -80

Repository: RustPython/RustPython

Length of output: 527


🏁 Script executed:

# Search for PyBytesInner enum/struct definition
rg "^(pub )?struct PyBytesInner|^(pub )?enum PyBytesInner" crates/vm/src/builtins/bytes.rs

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Find where PyBytesInner::try_from_borrowed_object is actually implemented
ast-grep --pattern 'impl PyBytesInner {
  $$$
  fn try_from_borrowed_object($$$) {
    $$$
  }
  $$$
}'

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Look for the actual implementation - search more broadly
rg "fn try_from_borrowed_object" crates/vm/src/builtins/bytes.rs -B 2 -A 30 | head -100

Repository: RustPython/RustPython

Length of output: 288


🏁 Script executed:

# Look at the beginning/middle of bytes.rs for PyBytesInner impl
head -300 crates/vm/src/builtins/bytes.rs | tail -200

Repository: RustPython/RustPython

Length of output: 5642


🏁 Script executed:

# Search for PyBytesInner impl with try_from_borrowed_object more carefully
rg -B 3 "PyBytesInner::try_from_borrowed_object" crates/vm/src/builtins/bytes.rs

Repository: RustPython/RustPython

Length of output: 812


🏁 Script executed:

# Look at the entire impl block for PyBytesInner
sed -n '/^impl PyBytesInner/,/^impl /p' crates/vm/src/builtins/bytes.rs | head -100

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Search across entire vm/src for PyBytesInner::try_from_borrowed_object implementation
rg -B 2 -A 20 "impl.*PyBytesInner.*\{" crates/vm/src/builtins/bytes.rs | grep -A 25 "try_from_borrowed_object"

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# PyBytesInner might be defined as a type alias - search for its definition
rg "type PyBytesInner|struct PyBytesInner|enum PyBytesInner" crates/vm/src/

Repository: RustPython/RustPython

Length of output: 119


🏁 Script executed:

# Search in function/buffer.rs since learnings mention it
rg -B 2 -A 25 "try_from_borrowed_object" crates/vm/src/function/buffer.rs | head -80

Repository: RustPython/RustPython

Length of output: 2557


🏁 Script executed:

# Look at PyBytesInner implementation in bytes_inner.rs
rg -B 2 -A 25 "fn try_from_borrowed_object" crates/vm/src/bytes_inner.rs

Repository: RustPython/RustPython

Length of output: 881


🏁 Script executed:

# Get context around PyBytesInner impl
rg -B 5 -A 30 "impl PyBytesInner" crates/vm/src/bytes_inner.rs | head -100

Repository: RustPython/RustPython

Length of output: 1354


🏁 Script executed:

# Find bytes_from_object function
rg -B 3 -A 40 "fn bytes_from_object" crates/vm/src/bytes_inner.rs

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Search entire codebase for bytes_from_object
rg -B 3 -A 40 "fn bytes_from_object"

Repository: RustPython/RustPython

Length of output: 1345


🏁 Script executed:

# Check if it's imported in bytes_inner.rs
rg "bytes_from_object" crates/vm/src/bytes_inner.rs

Repository: RustPython/RustPython

Length of output: 201


🌐 Web query:

CPython bytes constructor __bytes__ return type error message TypeError

💡 Result:

When you call bytes(x), CPython will call x.bytes() (if present) and that method must return a bytes object. If it returns some other type, CPython raises a TypeError such as:

  • "TypeError: bytes returned non-bytes (type X)" (e.g. X == int or str). [1][2]

Fix: make bytes return a bytes object (e.g. return b"..." or return some_str.encode('utf-8')). [3]

Sources:

  • object.bytes doc / pydoc requirement that bytes should return bytes. [1]
  • Python tracker / example error message showing "bytes returned non-bytes (type ...)". [2]
  • Explanation and example usage showing the TypeError and correct return type. [3]

🏁 Script executed:

# Search for tests of __bytes__ returning non-bytes
rg -B 5 -A 10 "__bytes__.*non" crates/

Repository: RustPython/RustPython

Length of output: 47


🏁 Script executed:

# Look for __bytes__ error tests
fd -e py -x rg -l "__bytes__" {} \; | xargs rg -l "TypeError" | head -5

Repository: RustPython/RustPython

Length of output: 731


🏁 Script executed:

# Check the test file for __bytes__ tests
cat extra_tests/snippets/builtin_bytes.py

Repository: RustPython/RustPython

Length of output: 20952


🏁 Script executed:

# Look at lines 120-137 more carefully with context
sed -n '115,145p' crates/vm/src/builtins/bytes.rs

Repository: RustPython/RustPython

Length of output: 1364


🏁 Script executed:

# Check if there's specific __bytes__ error validation
rg -B 5 -A 5 "returned non-bytes" crates/

Repository: RustPython/RustPython

Length of output: 47


The downcast usage on line 129 is correct; consider adding a specific error for __bytes__ returning non-bytes.

Line 129 correctly uses downcast::<PyBytes>() (which accepts subclasses) rather than downcast_exact. This matches CPython semantics: bytes() returns the __bytes__ result directly if it's any bytes instance, including subclasses.

However, when __bytes__ returns a non-bytes type, the current code delegates to PyBytesInner::try_from_borrowed_object, which raises a generic error: "can assign only bytes, buffers, or iterables of ints in range(0, 256)". CPython raises a more specific error: "TypeError: bytes returned non-bytes (type X)". Consider adding explicit validation to match CPython's error message.


// Fallback to get_bytearray_inner
let elements = options.get_bytearray_inner(vm)?.elements;

// Return empty bytes singleton for exact bytes types
if elements.is_empty() && cls.is(vm.ctx.types.bytes_type) {
return Ok(vm.ctx.empty_bytes.clone().into());
}

let payload = Self::py_new(&cls, elements, vm)?;
payload.into_ref_with_type(vm, cls).map(Into::into)
}

fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
unreachable!("use slot_new")
fn py_new(_cls: &Py<PyType>, elements: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
Ok(Self::from(elements))
}
}

Expand Down
44 changes: 26 additions & 18 deletions crates/vm/src/builtins/set.rs
Original file line number Diff line number Diff line change
Expand Up @@ -918,35 +918,43 @@ impl Representable for PySet {
}

impl Constructor for PyFrozenSet {
type Args = OptionalArg<PyObjectRef>;
type Args = Vec<PyObjectRef>;

fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult {
let iterable: Self::Args = args.bind(vm)?;
let elements = if let OptionalArg::Present(iterable) = iterable {
let iterable = if cls.is(vm.ctx.types.frozenset_type) {
match iterable.downcast_exact::<Self>(vm) {
Ok(fs) => return Ok(fs.into_pyref().into()),
Err(iterable) => iterable,
}
} else {
iterable
};
let iterable: OptionalArg<PyObjectRef> = args.bind(vm)?;

// Optimizations for exact frozenset type
if cls.is(vm.ctx.types.frozenset_type) {
// Return exact frozenset as-is
if let OptionalArg::Present(ref input) = iterable
&& let Ok(fs) = input.clone().downcast_exact::<PyFrozenSet>(vm)
{
return Ok(fs.into_pyref().into());
}

// Return empty frozenset singleton
if iterable.is_missing() {
return Ok(vm.ctx.empty_frozenset.clone().into());
}
}

let elements: Vec<PyObjectRef> = if let OptionalArg::Present(iterable) = iterable {
iterable.try_to_value(vm)?
} else {
vec![]
};

// Return empty fs if iterable passed is empty and only for exact fs types.
// Return empty frozenset singleton for exact frozenset types (when iterable was empty)
if elements.is_empty() && cls.is(vm.ctx.types.frozenset_type) {
Ok(vm.ctx.empty_frozenset.clone().into())
} else {
Self::from_iter(vm, elements)
.and_then(|o| o.into_ref_with_type(vm, cls).map(Into::into))
return Ok(vm.ctx.empty_frozenset.clone().into());
}

let payload = Self::py_new(&cls, elements, vm)?;
payload.into_ref_with_type(vm, cls).map(Into::into)
}

fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
unreachable!("use slot_new")
fn py_new(_cls: &Py<PyType>, elements: Self::Args, vm: &VirtualMachine) -> PyResult<Self> {
Self::from_iter(vm, elements)
}
}

Expand Down
39 changes: 21 additions & 18 deletions crates/vm/src/builtins/str.rs
Original file line number Diff line number Diff line change
Expand Up @@ -351,36 +351,39 @@ impl Constructor for PyStr {
type Args = StrArgs;

fn slot_new(cls: PyTypeRef, func_args: FuncArgs, vm: &VirtualMachine) -> PyResult {
// Optimization: return exact str as-is (only when no encoding/errors provided)
if cls.is(vm.ctx.types.str_type)
&& func_args.args.len() == 1
&& func_args.kwargs.is_empty()
&& func_args.args[0].class().is(vm.ctx.types.str_type)
{
return Ok(func_args.args[0].clone());
}

let args: Self::Args = func_args.bind(vm)?;
let string: PyRef<PyStr> = match args.object {
let payload = Self::py_new(&cls, args, vm)?;
payload.into_ref_with_type(vm, cls).map(Into::into)
}

fn py_new(_cls: &Py<PyType>, args: Self::Args, vm: &VirtualMachine) -> PyResult<Self> {
match args.object {
OptionalArg::Present(input) => {
if let OptionalArg::Present(enc) = args.encoding {
vm.state.codec_registry.decode_text(
let s = vm.state.codec_registry.decode_text(
input,
enc.as_str(),
args.errors.into_option(),
vm,
)?
)?;
Ok(Self::from(s.as_wtf8().to_owned()))
} else {
input.str(vm)?
let s = input.str(vm)?;
Ok(Self::from(s.as_wtf8().to_owned()))
}
}
OptionalArg::Missing => {
Self::from(String::new()).into_ref_with_type(vm, cls.clone())?
}
};
if string.class().is(&cls) {
Ok(string.into())
} else {
Self::from(string.as_wtf8())
.into_ref_with_type(vm, cls)
.map(Into::into)
OptionalArg::Missing => Ok(Self::from(String::new())),
}
}

fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
unreachable!("use slot_new")
}
}

impl PyStr {
Expand Down
48 changes: 28 additions & 20 deletions crates/vm/src/builtins/tuple.rs
Original file line number Diff line number Diff line change
Expand Up @@ -110,37 +110,45 @@ impl_from_into_pytuple!(A, B, C, D, E, F, G);
pub type PyTupleRef = PyRef<PyTuple>;

impl Constructor for PyTuple {
type Args = OptionalArg<PyObjectRef>;
type Args = Vec<PyObjectRef>;

fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult {
let iterable: Self::Args = args.bind(vm)?;
let iterable: OptionalArg<PyObjectRef> = args.bind(vm)?;

// Optimizations for exact tuple type
if cls.is(vm.ctx.types.tuple_type) {
// Return exact tuple as-is
if let OptionalArg::Present(ref input) = iterable
&& let Ok(tuple) = input.clone().downcast_exact::<PyTuple>(vm)
{
return Ok(tuple.into_pyref().into());
}

// Return empty tuple singleton
if iterable.is_missing() {
return Ok(vm.ctx.empty_tuple.clone().into());
}
}

let elements = if let OptionalArg::Present(iterable) = iterable {
let iterable = if cls.is(vm.ctx.types.tuple_type) {
match iterable.downcast_exact::<Self>(vm) {
Ok(tuple) => return Ok(tuple.into_pyref().into()),
Err(iterable) => iterable,
}
} else {
iterable
};
iterable.try_to_value(vm)?
} else {
vec![]
};
// Return empty tuple only for exact tuple types if the iterable is empty.

// Return empty tuple singleton for exact tuple types (when iterable was empty)
if elements.is_empty() && cls.is(vm.ctx.types.tuple_type) {
Ok(vm.ctx.empty_tuple.clone().into())
} else {
Self {
elements: elements.into_boxed_slice(),
}
.into_ref_with_type(vm, cls)
.map(Into::into)
return Ok(vm.ctx.empty_tuple.clone().into());
}

let payload = Self::py_new(&cls, elements, vm)?;
payload.into_ref_with_type(vm, cls).map(Into::into)
}

fn py_new(_cls: &Py<PyType>, _args: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
unreachable!("use slot_new")
fn py_new(_cls: &Py<PyType>, elements: Self::Args, _vm: &VirtualMachine) -> PyResult<Self> {
Ok(Self {
elements: elements.into_boxed_slice(),
})
}
}

Expand Down
42 changes: 2 additions & 40 deletions crates/vm/src/bytes_inner.rs
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
// spell-checker:ignore unchunked
use crate::{
AsObject, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine,
AsObject, PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, VirtualMachine,
anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper},
builtins::{
PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, PyStrRef,
PyTypeRef, pystr,
pystr,
},
byte::bytes_from_object,
cformat::cformat_bytes,
common::hash,
function::{ArgIterable, Either, OptionalArg, OptionalOption, PyComparisonValue},
identifier,
literal::escape::Escape,
protocol::PyBuffer,
sequence::{SequenceExt, SequenceMutExt},
Expand Down Expand Up @@ -91,43 +90,6 @@ impl ByteInnerNewOptions {
})
}

pub fn get_bytes(self, cls: PyTypeRef, vm: &VirtualMachine) -> PyResult<PyBytesRef> {
let inner = match (&self.source, &self.encoding, &self.errors) {
(OptionalArg::Present(obj), OptionalArg::Missing, OptionalArg::Missing) => {
let obj = obj.clone();
// construct an exact bytes from an exact bytes do not clone
let obj = if cls.is(vm.ctx.types.bytes_type) {
match obj.downcast_exact::<PyBytes>(vm) {
Ok(b) => return Ok(b.into_pyref()),
Err(obj) => obj,
}
} else {
obj
};

if let Some(bytes_method) = vm.get_method(obj, identifier!(vm, __bytes__)) {
// construct an exact bytes from __bytes__ slot.
// if __bytes__ return a bytes, use the bytes object except we are the subclass of the bytes
let bytes = bytes_method?.call((), vm)?;
let bytes = if cls.is(vm.ctx.types.bytes_type) {
match bytes.downcast::<PyBytes>() {
Ok(b) => return Ok(b),
Err(bytes) => bytes,
}
} else {
bytes
};
Some(PyBytesInner::try_from_borrowed_object(vm, &bytes))
} else {
None
}
}
_ => None,
}
.unwrap_or_else(|| self.get_bytearray_inner(vm))?;
PyBytes::from(inner).into_ref_with_type(vm, cls)
}

pub fn get_bytearray_inner(self, vm: &VirtualMachine) -> PyResult<PyBytesInner> {
match (self.source, self.encoding, self.errors) {
(OptionalArg::Present(obj), OptionalArg::Missing, OptionalArg::Missing) => {
Expand Down
17 changes: 6 additions & 11 deletions crates/vm/src/protocol/object.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,16 @@
use crate::{
AsObject, Py, PyObject, PyObjectRef, PyRef, PyResult, TryFromObject, VirtualMachine,
builtins::{
PyAsyncGen, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple, PyTupleRef,
PyType, PyTypeRef, PyUtf8Str, pystr::AsPyStr,
PyAsyncGen, PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple,
PyTupleRef, PyType, PyTypeRef, PyUtf8Str, pystr::AsPyStr,
},
bytes_inner::ByteInnerNewOptions,
common::{hash::PyHash, str::to_ascii},
convert::{ToPyObject, ToPyResult},
dict_inner::DictKey,
function::{Either, OptionalArg, PyArithmeticValue, PySetterValue},
function::{Either, FuncArgs, PyArithmeticValue, PySetterValue},
object::PyPayload,
protocol::{PyIter, PyMapping, PySequence},
types::PyComparisonOp,
types::{Constructor, PyComparisonOp},
};

// RustPython doesn't need these items
Expand All @@ -37,12 +36,8 @@ impl PyObjectRef {
match self.downcast_exact::<PyInt>(vm) {
Ok(int) => Err(vm.new_downcast_type_error(bytes_type, &int)),
Err(obj) => {
let options = ByteInnerNewOptions {
source: OptionalArg::Present(obj),
encoding: OptionalArg::Missing,
errors: OptionalArg::Missing,
};
options.get_bytes(bytes_type.to_owned(), vm).map(Into::into)
let args = FuncArgs::from(vec![obj]);
<PyBytes as Constructor>::slot_new(bytes_type.to_owned(), args, vm)
}
}
}
Expand Down
Loading