From f9e2fb4d32fc09b053ca0f573716a7cfaaedbc33 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Thu, 11 Dec 2025 20:09:14 +0900 Subject: [PATCH 1/2] set/tuple --- crates/vm/src/builtins/set.rs | 44 +++++++++++++++++------------- crates/vm/src/builtins/tuple.rs | 48 +++++++++++++++++++-------------- 2 files changed, 54 insertions(+), 38 deletions(-) diff --git a/crates/vm/src/builtins/set.rs b/crates/vm/src/builtins/set.rs index 429fd94c2a..7fde8d3278 100644 --- a/crates/vm/src/builtins/set.rs +++ b/crates/vm/src/builtins/set.rs @@ -918,35 +918,43 @@ impl Representable for PySet { } impl Constructor for PyFrozenSet { - type Args = OptionalArg; + type Args = Vec; fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - let iterable: Self::Args = args.bind(vm)?; - let elements = if let OptionalArg::Present(iterable) = iterable { - let iterable = if cls.is(vm.ctx.types.frozenset_type) { - match iterable.downcast_exact::(vm) { - Ok(fs) => return Ok(fs.into_pyref().into()), - Err(iterable) => iterable, - } - } else { - iterable - }; + let iterable: OptionalArg = args.bind(vm)?; + + // Optimizations for exact frozenset type + if cls.is(vm.ctx.types.frozenset_type) { + // Return exact frozenset as-is + if let OptionalArg::Present(ref input) = iterable + && let Ok(fs) = input.clone().downcast_exact::(vm) + { + return Ok(fs.into_pyref().into()); + } + + // Return empty frozenset singleton + if iterable.is_missing() { + return Ok(vm.ctx.empty_frozenset.clone().into()); + } + } + + let elements: Vec = if let OptionalArg::Present(iterable) = iterable { iterable.try_to_value(vm)? } else { vec![] }; - // Return empty fs if iterable passed is empty and only for exact fs types. + // Return empty frozenset singleton for exact frozenset types (when iterable was empty) if elements.is_empty() && cls.is(vm.ctx.types.frozenset_type) { - Ok(vm.ctx.empty_frozenset.clone().into()) - } else { - Self::from_iter(vm, elements) - .and_then(|o| o.into_ref_with_type(vm, cls).map(Into::into)) + return Ok(vm.ctx.empty_frozenset.clone().into()); } + + let payload = Self::py_new(&cls, elements, vm)?; + payload.into_ref_with_type(vm, cls).map(Into::into) } - fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { - unreachable!("use slot_new") + fn py_new(_cls: &Py, elements: Self::Args, vm: &VirtualMachine) -> PyResult { + Self::from_iter(vm, elements) } } diff --git a/crates/vm/src/builtins/tuple.rs b/crates/vm/src/builtins/tuple.rs index 61a9cf414c..fada8840bb 100644 --- a/crates/vm/src/builtins/tuple.rs +++ b/crates/vm/src/builtins/tuple.rs @@ -110,37 +110,45 @@ impl_from_into_pytuple!(A, B, C, D, E, F, G); pub type PyTupleRef = PyRef; impl Constructor for PyTuple { - type Args = OptionalArg; + type Args = Vec; fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - let iterable: Self::Args = args.bind(vm)?; + let iterable: OptionalArg = args.bind(vm)?; + + // Optimizations for exact tuple type + if cls.is(vm.ctx.types.tuple_type) { + // Return exact tuple as-is + if let OptionalArg::Present(ref input) = iterable + && let Ok(tuple) = input.clone().downcast_exact::(vm) + { + return Ok(tuple.into_pyref().into()); + } + + // Return empty tuple singleton + if iterable.is_missing() { + return Ok(vm.ctx.empty_tuple.clone().into()); + } + } + let elements = if let OptionalArg::Present(iterable) = iterable { - let iterable = if cls.is(vm.ctx.types.tuple_type) { - match iterable.downcast_exact::(vm) { - Ok(tuple) => return Ok(tuple.into_pyref().into()), - Err(iterable) => iterable, - } - } else { - iterable - }; iterable.try_to_value(vm)? } else { vec![] }; - // Return empty tuple only for exact tuple types if the iterable is empty. + + // Return empty tuple singleton for exact tuple types (when iterable was empty) if elements.is_empty() && cls.is(vm.ctx.types.tuple_type) { - Ok(vm.ctx.empty_tuple.clone().into()) - } else { - Self { - elements: elements.into_boxed_slice(), - } - .into_ref_with_type(vm, cls) - .map(Into::into) + return Ok(vm.ctx.empty_tuple.clone().into()); } + + let payload = Self::py_new(&cls, elements, vm)?; + payload.into_ref_with_type(vm, cls).map(Into::into) } - fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { - unreachable!("use slot_new") + fn py_new(_cls: &Py, elements: Self::Args, _vm: &VirtualMachine) -> PyResult { + Ok(Self { + elements: elements.into_boxed_slice(), + }) } } From c5abc44e2145845fc055f2ac56d1d1be05d0ee49 Mon Sep 17 00:00:00 2001 From: Jeong YunWon Date: Thu, 11 Dec 2025 20:30:37 +0900 Subject: [PATCH 2/2] bytes/str --- crates/vm/src/builtins/bytes.rs | 59 +++++++++++++++++++++++++++++--- crates/vm/src/builtins/str.rs | 39 +++++++++++---------- crates/vm/src/bytes_inner.rs | 42 ++--------------------- crates/vm/src/protocol/object.rs | 17 ++++----- 4 files changed, 83 insertions(+), 74 deletions(-) diff --git a/crates/vm/src/builtins/bytes.rs b/crates/vm/src/builtins/bytes.rs index 3da1b68ef0..70a3340127 100644 --- a/crates/vm/src/builtins/bytes.rs +++ b/crates/vm/src/builtins/bytes.rs @@ -92,15 +92,64 @@ pub(crate) fn init(context: &Context) { } impl Constructor for PyBytes { - type Args = ByteInnerNewOptions; + type Args = Vec; fn slot_new(cls: PyTypeRef, args: FuncArgs, vm: &VirtualMachine) -> PyResult { - let options: Self::Args = args.bind(vm)?; - options.get_bytes(cls, vm).to_pyresult(vm) + let options: ByteInnerNewOptions = args.bind(vm)?; + + // Optimizations for exact bytes type + if cls.is(vm.ctx.types.bytes_type) { + // Return empty bytes singleton + if options.source.is_missing() + && options.encoding.is_missing() + && options.errors.is_missing() + { + return Ok(vm.ctx.empty_bytes.clone().into()); + } + + // Return exact bytes as-is + if let OptionalArg::Present(ref obj) = options.source + && options.encoding.is_missing() + && options.errors.is_missing() + && let Ok(b) = obj.clone().downcast_exact::(vm) + { + return Ok(b.into_pyref().into()); + } + } + + // Handle __bytes__ method - may return PyBytes directly + if let OptionalArg::Present(ref obj) = options.source + && options.encoding.is_missing() + && options.errors.is_missing() + && let Some(bytes_method) = vm.get_method(obj.clone(), identifier!(vm, __bytes__)) + { + let bytes = bytes_method?.call((), vm)?; + // If exact bytes type and __bytes__ returns bytes, use it directly + if cls.is(vm.ctx.types.bytes_type) + && let Ok(b) = bytes.clone().downcast::() + { + return Ok(b.into()); + } + // Otherwise convert to Vec + let inner = PyBytesInner::try_from_borrowed_object(vm, &bytes)?; + let payload = Self::py_new(&cls, inner.elements, vm)?; + return payload.into_ref_with_type(vm, cls).map(Into::into); + } + + // Fallback to get_bytearray_inner + let elements = options.get_bytearray_inner(vm)?.elements; + + // Return empty bytes singleton for exact bytes types + if elements.is_empty() && cls.is(vm.ctx.types.bytes_type) { + return Ok(vm.ctx.empty_bytes.clone().into()); + } + + let payload = Self::py_new(&cls, elements, vm)?; + payload.into_ref_with_type(vm, cls).map(Into::into) } - fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { - unreachable!("use slot_new") + fn py_new(_cls: &Py, elements: Self::Args, _vm: &VirtualMachine) -> PyResult { + Ok(Self::from(elements)) } } diff --git a/crates/vm/src/builtins/str.rs b/crates/vm/src/builtins/str.rs index 86c3ab9c81..9b05e19572 100644 --- a/crates/vm/src/builtins/str.rs +++ b/crates/vm/src/builtins/str.rs @@ -351,36 +351,39 @@ impl Constructor for PyStr { type Args = StrArgs; fn slot_new(cls: PyTypeRef, func_args: FuncArgs, vm: &VirtualMachine) -> PyResult { + // Optimization: return exact str as-is (only when no encoding/errors provided) + if cls.is(vm.ctx.types.str_type) + && func_args.args.len() == 1 + && func_args.kwargs.is_empty() + && func_args.args[0].class().is(vm.ctx.types.str_type) + { + return Ok(func_args.args[0].clone()); + } + let args: Self::Args = func_args.bind(vm)?; - let string: PyRef = match args.object { + let payload = Self::py_new(&cls, args, vm)?; + payload.into_ref_with_type(vm, cls).map(Into::into) + } + + fn py_new(_cls: &Py, args: Self::Args, vm: &VirtualMachine) -> PyResult { + match args.object { OptionalArg::Present(input) => { if let OptionalArg::Present(enc) = args.encoding { - vm.state.codec_registry.decode_text( + let s = vm.state.codec_registry.decode_text( input, enc.as_str(), args.errors.into_option(), vm, - )? + )?; + Ok(Self::from(s.as_wtf8().to_owned())) } else { - input.str(vm)? + let s = input.str(vm)?; + Ok(Self::from(s.as_wtf8().to_owned())) } } - OptionalArg::Missing => { - Self::from(String::new()).into_ref_with_type(vm, cls.clone())? - } - }; - if string.class().is(&cls) { - Ok(string.into()) - } else { - Self::from(string.as_wtf8()) - .into_ref_with_type(vm, cls) - .map(Into::into) + OptionalArg::Missing => Ok(Self::from(String::new())), } } - - fn py_new(_cls: &Py, _args: Self::Args, _vm: &VirtualMachine) -> PyResult { - unreachable!("use slot_new") - } } impl PyStr { diff --git a/crates/vm/src/bytes_inner.rs b/crates/vm/src/bytes_inner.rs index 58d272e1cf..8593f16fcd 100644 --- a/crates/vm/src/bytes_inner.rs +++ b/crates/vm/src/bytes_inner.rs @@ -1,16 +1,15 @@ // spell-checker:ignore unchunked use crate::{ - AsObject, PyObject, PyObjectRef, PyPayload, PyResult, TryFromBorrowedObject, VirtualMachine, + AsObject, PyObject, PyObjectRef, PyResult, TryFromBorrowedObject, VirtualMachine, anystr::{self, AnyStr, AnyStrContainer, AnyStrWrapper}, builtins::{ PyBaseExceptionRef, PyByteArray, PyBytes, PyBytesRef, PyInt, PyIntRef, PyStr, PyStrRef, - PyTypeRef, pystr, + pystr, }, byte::bytes_from_object, cformat::cformat_bytes, common::hash, function::{ArgIterable, Either, OptionalArg, OptionalOption, PyComparisonValue}, - identifier, literal::escape::Escape, protocol::PyBuffer, sequence::{SequenceExt, SequenceMutExt}, @@ -91,43 +90,6 @@ impl ByteInnerNewOptions { }) } - pub fn get_bytes(self, cls: PyTypeRef, vm: &VirtualMachine) -> PyResult { - let inner = match (&self.source, &self.encoding, &self.errors) { - (OptionalArg::Present(obj), OptionalArg::Missing, OptionalArg::Missing) => { - let obj = obj.clone(); - // construct an exact bytes from an exact bytes do not clone - let obj = if cls.is(vm.ctx.types.bytes_type) { - match obj.downcast_exact::(vm) { - Ok(b) => return Ok(b.into_pyref()), - Err(obj) => obj, - } - } else { - obj - }; - - if let Some(bytes_method) = vm.get_method(obj, identifier!(vm, __bytes__)) { - // construct an exact bytes from __bytes__ slot. - // if __bytes__ return a bytes, use the bytes object except we are the subclass of the bytes - let bytes = bytes_method?.call((), vm)?; - let bytes = if cls.is(vm.ctx.types.bytes_type) { - match bytes.downcast::() { - Ok(b) => return Ok(b), - Err(bytes) => bytes, - } - } else { - bytes - }; - Some(PyBytesInner::try_from_borrowed_object(vm, &bytes)) - } else { - None - } - } - _ => None, - } - .unwrap_or_else(|| self.get_bytearray_inner(vm))?; - PyBytes::from(inner).into_ref_with_type(vm, cls) - } - pub fn get_bytearray_inner(self, vm: &VirtualMachine) -> PyResult { match (self.source, self.encoding, self.errors) { (OptionalArg::Present(obj), OptionalArg::Missing, OptionalArg::Missing) => { diff --git a/crates/vm/src/protocol/object.rs b/crates/vm/src/protocol/object.rs index 744eba9c55..66f1483c2e 100644 --- a/crates/vm/src/protocol/object.rs +++ b/crates/vm/src/protocol/object.rs @@ -4,17 +4,16 @@ use crate::{ AsObject, Py, PyObject, PyObjectRef, PyRef, PyResult, TryFromObject, VirtualMachine, builtins::{ - PyAsyncGen, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple, PyTupleRef, - PyType, PyTypeRef, PyUtf8Str, pystr::AsPyStr, + PyAsyncGen, PyBytes, PyDict, PyDictRef, PyGenericAlias, PyInt, PyList, PyStr, PyTuple, + PyTupleRef, PyType, PyTypeRef, PyUtf8Str, pystr::AsPyStr, }, - bytes_inner::ByteInnerNewOptions, common::{hash::PyHash, str::to_ascii}, convert::{ToPyObject, ToPyResult}, dict_inner::DictKey, - function::{Either, OptionalArg, PyArithmeticValue, PySetterValue}, + function::{Either, FuncArgs, PyArithmeticValue, PySetterValue}, object::PyPayload, protocol::{PyIter, PyMapping, PySequence}, - types::PyComparisonOp, + types::{Constructor, PyComparisonOp}, }; // RustPython doesn't need these items @@ -37,12 +36,8 @@ impl PyObjectRef { match self.downcast_exact::(vm) { Ok(int) => Err(vm.new_downcast_type_error(bytes_type, &int)), Err(obj) => { - let options = ByteInnerNewOptions { - source: OptionalArg::Present(obj), - encoding: OptionalArg::Missing, - errors: OptionalArg::Missing, - }; - options.get_bytes(bytes_type.to_owned(), vm).map(Into::into) + let args = FuncArgs::from(vec![obj]); + ::slot_new(bytes_type.to_owned(), args, vm) } } }