Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions compiler/codegen/src/ir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,8 @@ use crate::{IndexMap, IndexSet, error::InternalError};
use rustpython_compiler_core::{
OneIndexed, SourceLocation,
bytecode::{
CodeFlags, CodeObject, CodeUnit, ConstantData, InstrDisplayContext, Instruction, Label,
OpArg, PyCodeLocationInfoKind,
CodeFlags, CodeObject, CodeUnit, CodeUnits, ConstantData, InstrDisplayContext, Instruction,
Label, OpArg, PyCodeLocationInfoKind,
},
};

Expand Down Expand Up @@ -214,7 +214,7 @@ impl CodeInfo {
qualname: qualname.unwrap_or(obj_name),

max_stackdepth,
instructions: instructions.into_boxed_slice(),
instructions: CodeUnits::from(instructions),
locations: locations.into_boxed_slice(),
constants: constants.into_iter().collect(),
names: name_cache.into_iter().collect(),
Expand Down
73 changes: 67 additions & 6 deletions compiler/core/src/bytecode.rs
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
//! Implement python as a virtual machine with bytecode. This module
//! implements bytecode structure.

use crate::{OneIndexed, SourceLocation};
use crate::{
marshal::MarshalError,
{OneIndexed, SourceLocation},
};
use bitflags::bitflags;
use itertools::Itertools;
use malachite_bigint::BigInt;
use num_complex::Complex64;
use rustpython_wtf8::{Wtf8, Wtf8Buf};
use std::{collections::BTreeSet, fmt, hash, marker::PhantomData, mem};
use std::{collections::BTreeSet, fmt, hash, marker::PhantomData, mem, ops::Deref};

#[derive(Copy, Clone, Debug, Hash, PartialEq, Eq)]
#[repr(i8)]
Expand Down Expand Up @@ -195,7 +198,7 @@ impl ConstantBag for BasicBag {
/// a code object. Also a module has a code object.
#[derive(Clone)]
pub struct CodeObject<C: Constant = ConstantData> {
pub instructions: Box<[CodeUnit]>,
pub instructions: CodeUnits,
pub locations: Box<[SourceLocation]>,
pub flags: CodeFlags,
/// Number of positional-only arguments
Expand Down Expand Up @@ -257,6 +260,12 @@ impl OpArgByte {
}
}

impl From<u8> for OpArgByte {
fn from(raw: u8) -> Self {
Self(raw)
}
}

impl fmt::Debug for OpArgByte {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
self.0.fmt(f)
Expand Down Expand Up @@ -808,14 +817,14 @@ impl From<Instruction> for u8 {
}

impl TryFrom<u8> for Instruction {
type Error = crate::marshal::MarshalError;
type Error = MarshalError;

#[inline]
fn try_from(value: u8) -> Result<Self, crate::marshal::MarshalError> {
fn try_from(value: u8) -> Result<Self, MarshalError> {
if value <= u8::from(LAST_INSTRUCTION) {
Ok(unsafe { std::mem::transmute::<u8, Self>(value) })
} else {
Err(crate::marshal::MarshalError::InvalidBytecode)
Err(MarshalError::InvalidBytecode)
}
}
}
Expand All @@ -835,6 +844,58 @@ impl CodeUnit {
}
}

impl TryFrom<&[u8]> for CodeUnit {
type Error = MarshalError;

fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
match value.len() {
2 => Ok(Self::new(value[0].try_into()?, value[1].into())),
_ => Err(Self::Error::InvalidBytecode),
}
}
}

#[derive(Clone)]
pub struct CodeUnits(Box<[CodeUnit]>);

impl TryFrom<&[u8]> for CodeUnits {
type Error = MarshalError;

fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
if !value.len().is_multiple_of(2) {
return Err(Self::Error::InvalidBytecode);
}

value.chunks_exact(2).map(CodeUnit::try_from).collect()
}
}

impl<const N: usize> From<[CodeUnit; N]> for CodeUnits {
fn from(value: [CodeUnit; N]) -> Self {
Self(Box::from(value))
}
}

impl From<Vec<CodeUnit>> for CodeUnits {
fn from(value: Vec<CodeUnit>) -> Self {
Self(value.into_boxed_slice())
}
}

impl FromIterator<CodeUnit> for CodeUnits {
fn from_iter<T: IntoIterator<Item = CodeUnit>>(iter: T) -> Self {
Self(iter.into_iter().collect())
}
}

impl Deref for CodeUnits {
type Target = [CodeUnit];

fn deref(&self) -> &Self::Target {
&self.0
}
}

use self::Instruction::*;

bitflags! {
Expand Down
17 changes: 2 additions & 15 deletions compiler/core/src/marshal.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,19 +165,6 @@ impl<'a> ReadBorrowed<'a> for &'a [u8] {
}
}

/// Parses bytecode bytes into CodeUnit instructions.
/// Each instruction is 2 bytes: opcode and argument.
pub fn parse_instructions_from_bytes(bytes: &[u8]) -> Result<Box<[CodeUnit]>> {
bytes
.chunks_exact(2)
.map(|cu| {
let op = Instruction::try_from(cu[0])?;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Where is this used in new code?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let arg = OpArgByte(cu[1]);
Ok(CodeUnit { op, arg })
})
.collect()
}

pub struct Cursor<B> {
pub data: B,
pub position: usize,
Expand All @@ -197,8 +184,8 @@ pub fn deserialize_code<R: Read, Bag: ConstantBag>(
bag: Bag,
) -> Result<CodeObject<Bag::Constant>> {
let len = rdr.read_u32()?;
let instructions = rdr.read_slice(len * 2)?;
let instructions = parse_instructions_from_bytes(instructions)?;
let raw_instructions = rdr.read_slice(len * 2)?;
let instructions = CodeUnits::try_from(raw_instructions)?;

let len = rdr.read_u32()?;
let locations = (0..len)
Expand Down
29 changes: 5 additions & 24 deletions vm/src/builtins/code.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,10 @@
/*! Infamous code object. The python class `code`

*/
//! Infamous code object. The python class `code`

use super::{PyBytesRef, PyStrRef, PyTupleRef, PyType, PyTypeRef};
use crate::{
AsObject, Context, Py, PyObject, PyObjectRef, PyPayload, PyResult, VirtualMachine,
builtins::PyStrInterned,
bytecode::{self, AsBag, BorrowedConstant, CodeFlags, CodeUnit, Constant, ConstantBag},
bytecode::{self, AsBag, BorrowedConstant, CodeFlags, Constant, ConstantBag},
class::{PyClassImpl, StaticType},
convert::ToPyObject,
frozen,
Expand All @@ -15,11 +13,7 @@ use crate::{
};
use malachite_bigint::BigInt;
use num_traits::Zero;
use rustpython_compiler_core::{
OneIndexed,
bytecode::PyCodeLocationInfoKind,
marshal::{MarshalError, parse_instructions_from_bytes},
};
use rustpython_compiler_core::{OneIndexed, bytecode::CodeUnits, bytecode::PyCodeLocationInfoKind};
use std::{borrow::Borrow, fmt, ops::Deref};

/// State for iterating through code address ranges
Expand Down Expand Up @@ -457,7 +451,7 @@ impl Constructor for PyCode {

// Parse and validate bytecode from bytes
let bytecode_bytes = args.co_code.as_bytes();
let instructions = parse_bytecode(bytecode_bytes)
let instructions = CodeUnits::try_from(bytecode_bytes)
.map_err(|e| vm.new_value_error(format!("invalid bytecode: {}", e)))?;

// Convert constants
Expand Down Expand Up @@ -925,7 +919,7 @@ impl PyCode {
let instructions = match co_code {
OptionalArg::Present(code_bytes) => {
// Parse and validate bytecode from bytes
parse_bytecode(code_bytes.as_bytes())
CodeUnits::try_from(code_bytes.as_bytes())
.map_err(|e| vm.new_value_error(format!("invalid bytecode: {}", e)))?
}
OptionalArg::Missing => self.code.instructions.clone(),
Expand Down Expand Up @@ -1033,19 +1027,6 @@ impl ToPyObject for bytecode::CodeObject {
}
}

/// Validates and parses bytecode bytes into CodeUnit instructions.
/// Returns MarshalError if bytecode is invalid (odd length or contains invalid opcodes).
/// Note: Returning MarshalError is not necessary at this point because this is not a part of marshalling API.
/// However, we (temporarily) reuse MarshalError for simplicity.
fn parse_bytecode(bytecode_bytes: &[u8]) -> Result<Box<[CodeUnit]>, MarshalError> {
// Bytecode must have even length (each instruction is 2 bytes)
if !bytecode_bytes.len().is_multiple_of(2) {
return Err(MarshalError::InvalidBytecode);
}

parse_instructions_from_bytes(bytecode_bytes)
}

// Helper struct for reading linetable
struct LineTableReader<'a> {
data: &'a [u8],
Expand Down
Loading