From c72cb30655216db4818ec4e3e3f175b55b5b6cd7 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Wed, 24 Apr 2024 15:59:30 -0700 Subject: [PATCH 01/19] Disorganized but working c --- pgml-sdks/pgml/.gitignore | 5 + pgml-sdks/pgml/Cargo.lock | 2 +- pgml-sdks/pgml/Cargo.toml | 4 + pgml-sdks/pgml/go/Makefile | 31 ++ pgml-sdks/pgml/go/go.mod | 3 + pgml-sdks/pgml/go/pgml.go | 23 + pgml-sdks/pgml/go/test.c | 37 ++ pgml-sdks/pgml/src/builtins.rs | 5 +- pgml-sdks/pgml/src/collection.rs | 32 ++ pgml-sdks/pgml/src/languages/c.rs | 18 + pgml-sdks/pgml/src/languages/mod.rs | 3 + pgml-sdks/pgml/src/model.rs | 5 +- pgml-sdks/pgml/src/open_source_ai.rs | 17 +- pgml-sdks/pgml/src/pipeline.rs | 3 + pgml-sdks/pgml/src/query_runner.rs | 21 +- pgml-sdks/pgml/src/splitter.rs | 5 +- pgml-sdks/pgml/src/transformer_pipeline.rs | 5 +- .../rust-bridge/rust-bridge-macros/src/c.rs | 448 ++++++++++++++++++ .../rust-bridge/rust-bridge-macros/src/lib.rs | 5 + .../rust-bridge/rust-bridge-traits/src/c.rs | 157 ++++++ .../rust-bridge/rust-bridge-traits/src/lib.rs | 4 +- 21 files changed, 804 insertions(+), 29 deletions(-) create mode 100644 pgml-sdks/pgml/go/Makefile create mode 100644 pgml-sdks/pgml/go/go.mod create mode 100644 pgml-sdks/pgml/go/pgml.go create mode 100644 pgml-sdks/pgml/go/test.c create mode 100644 pgml-sdks/pgml/src/languages/c.rs create mode 100644 pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs create mode 100644 pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs diff --git a/pgml-sdks/pgml/.gitignore b/pgml-sdks/pgml/.gitignore index 2d5a692e0..e82a5d1fb 100644 --- a/pgml-sdks/pgml/.gitignore +++ b/pgml-sdks/pgml/.gitignore @@ -167,3 +167,8 @@ cython_debug/ # local scratch pad scratch.sql scratch.py + +# Some SDK specific things +expanded.rs +test +pgml.h diff --git a/pgml-sdks/pgml/Cargo.lock b/pgml-sdks/pgml/Cargo.lock index 11128b907..202436a71 100644 --- a/pgml-sdks/pgml/Cargo.lock +++ b/pgml-sdks/pgml/Cargo.lock @@ -1531,7 +1531,7 @@ checksum = "e3148f5046208a5d56bcfc03053e3ca6334e51da8dfb19b6cdc8b306fae3283e" [[package]] name = "pgml" -version = "1.0.0" +version = "1.0.2" dependencies = [ "anyhow", "async-trait", diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml index e78e7413a..89c2f6275 100644 --- a/pgml-sdks/pgml/Cargo.toml +++ b/pgml-sdks/pgml/Cargo.toml @@ -13,6 +13,9 @@ keywords = ["postgres", "machine learning", "vector databases", "embeddings"] name = "pgml" crate-type = ["lib", "cdylib"] +[rust-analyzer.checkOnSave] +extraArgs = ["--target-dir", "/path/to/proect/target/check"] + [dependencies] rust_bridge = {path = "../rust-bridge/rust-bridge", version = "0.1.0"} sqlx = { version = "0.7.3", features = [ "runtime-tokio-rustls", "postgres", "json", "time", "uuid"] } @@ -50,3 +53,4 @@ once_cell = "1.19.0" default = [] python = ["dep:pyo3", "dep:pyo3-asyncio"] javascript = ["dep:neon"] +c = [] diff --git a/pgml-sdks/pgml/go/Makefile b/pgml-sdks/pgml/go/Makefile new file mode 100644 index 000000000..a8d614023 --- /dev/null +++ b/pgml-sdks/pgml/go/Makefile @@ -0,0 +1,31 @@ +BINARY_NAME=pgml + +build: + cargo build --features c + cargo expand --features c > expanded.rs + cbindgen --lang C -o pgml.h expanded.rs + # GOARCH=amd64 GOOS=darwin go build -o ${BINARY_NAME}-darwin main.go + GOARCH=amd64 GOOS=linux go build -o ${BINARY_NAME}-linux pgml.go + # GOARCH=amd64 GOOS=windows go build -o ${BINARY_NAME}-windows main.go + +build_test: + cargo build --features c + cargo expand --features c > expanded.rs + cbindgen --lang C -o pgml.h expanded.rs + gcc test.c -o test -l pgml -L ./../target/debug + +test: build_test + LD_LIBRARY_PATH=./../target/debug ./test + +test_c: + gcc test.c -o test -l pgml -L ./../target/debug + LD_LIBRARY_PATH=./../target/debug ./test + +run: build + LD_LIBRARY_PATH=./../target/debug ./${BINARY_NAME}-linux + +clean: + go clean + # rm ${BINARY_NAME}-darwin + rm ${BINARY_NAME}-linux + # rm ${BINARY_NAME}-windows diff --git a/pgml-sdks/pgml/go/go.mod b/pgml-sdks/pgml/go/go.mod new file mode 100644 index 000000000..6b1511192 --- /dev/null +++ b/pgml-sdks/pgml/go/go.mod @@ -0,0 +1,3 @@ +module pgml + +go 1.22.2 diff --git a/pgml-sdks/pgml/go/pgml.go b/pgml-sdks/pgml/go/pgml.go new file mode 100644 index 000000000..e22b91dd6 --- /dev/null +++ b/pgml-sdks/pgml/go/pgml.go @@ -0,0 +1,23 @@ +package main + +/* +#cgo LDFLAGS: -l pgml -L ./../target/debug +#include "pgml.h" +*/ +import "C" + +import ( + "unsafe" +) + +type Collection struct { + collection *C.CollectionC +} + +func main() { + c_string_p := C.CString("Test CString") + defer C.free(unsafe.Pointer(c_string_p)) + collection := C.new_collection(c_string_p) + C.test_collection(collection) + defer C.free_collection(collection) +} diff --git a/pgml-sdks/pgml/go/test.c b/pgml-sdks/pgml/go/test.c new file mode 100644 index 000000000..9992fd19d --- /dev/null +++ b/pgml-sdks/pgml/go/test.c @@ -0,0 +1,37 @@ +#include + +#include "pgml.h" + +int main() { + // Create the Collection and Pipeline + CollectionC * collection = CollectionC_new("test_c", NULL); + PipelineC * pipeline = PipelineC_new("test_c", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}"); + + // Add the Pipeline to the Collection + CollectionC_add_pipeline(collection, pipeline); + + // Upsert the documents + char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; + CollectionC_upsert_documents(collection, documents_to_upsert, 2, NULL); + + // Retrieve the documents + unsigned long r_size = 0; + char** documents = CollectionC_get_documents(collection, NULL, &r_size); + + // Print the documents + printf("\n\nPrinting documents:\n"); + int i; + for (i = 0; i < r_size; i++) { + printf("Document %u -> %s\n", i, documents[i]); + } + + // Search over the documents + r_size = 0; + char** results = CollectionC_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Test query!\"}}}, \"limit\": 5}", pipeline, &r_size); + printf("\n\nPrinting results:\n"); + for (i = 0; i < r_size; i++) { + printf("Result %u -> %s\n", i, results[i]); + } + + return 0; +} diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs index 652bf0b8c..b23f4d699 100644 --- a/pgml-sdks/pgml/src/builtins.rs +++ b/pgml-sdks/pgml/src/builtins.rs @@ -3,7 +3,8 @@ use sqlx::Row; use tracing::instrument; /// Provides access to builtin database methods -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct Builtins { database_url: Option, } @@ -13,7 +14,7 @@ use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json}; #[cfg(feature = "python")] use crate::{query_runner::QueryRunnerPython, types::JsonPython}; -#[alias_methods(new, query, transform)] +// #[alias_methods(new, query, transform)] impl Builtins { pub fn new(database_url: Option) -> Self { Self { database_url } diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index f8107d050..ba24420ab 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -33,6 +33,9 @@ use crate::{ #[cfg(feature = "python")] use crate::{pipeline::PipelinePython, query_builder::QueryBuilderPython, types::JsonPython}; +#[cfg(feature = "c")] +use crate::{languages::c::JsonC, pipeline::PipelineC, query_builder::QueryBuilderC}; + /// Our project tasks #[derive(Debug, Clone)] pub enum ProjectTask { @@ -99,6 +102,35 @@ pub(crate) struct CollectionDatabaseData { pub project_info: ProjectInfo, } +// #[repr(C)] +// pub struct CollectionC { +// pub collection: *mut Collection, +// } + +// #[no_mangle] +// pub unsafe extern "C" fn new_collection(name: *const std::ffi::c_char) -> *mut CollectionC { +// let name = std::ffi::CStr::from_ptr(name).to_str().unwrap(); +// println!("Nice one Silas: {}", name); +// let collection = Box::into_raw(Box::new(Collection::new(name, None).unwrap())); +// Box::into_raw(Box::new(CollectionC { collection })) +// } + +// #[no_mangle] +// pub unsafe extern "C" fn free_collection(collection: *mut CollectionC) { +// if collection.is_null() { +// return; +// } +// drop(Box::from_raw(collection)); +// } + +// #[no_mangle] +// pub unsafe extern "C" fn test_collection(collection: *mut CollectionC) { +// let collection: *mut Collection = (*collection).collection; +// let collection: Collection = (*collection).clone(); +// println!("Nice one Silas x two: {}", collection.name); +// println!("test"); +// } + /// A collection of documents #[derive(alias, Debug, Clone)] pub struct Collection { diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs new file mode 100644 index 000000000..3babf097b --- /dev/null +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -0,0 +1,18 @@ +use crate::types::{DateTime, GeneralJsonAsyncIterator, GeneralJsonIterator, Json}; +use rust_bridge::c::CustomInto; + +pub type JsonC = std::ffi::c_char; + +unsafe impl CustomInto for *mut JsonC { + unsafe fn custom_into(self) -> Json { + let s = std::ffi::CStr::from_ptr(self).to_str().unwrap(); + serde_json::from_str::(s).unwrap().into() + } +} + +unsafe impl CustomInto<*mut JsonC> for Json { + unsafe fn custom_into(self) -> *mut JsonC { + let s = serde_json::to_string(&self).unwrap(); + std::ffi::CString::new(s).unwrap().into_raw() + } +} diff --git a/pgml-sdks/pgml/src/languages/mod.rs b/pgml-sdks/pgml/src/languages/mod.rs index dda671ec1..43340b02b 100644 --- a/pgml-sdks/pgml/src/languages/mod.rs +++ b/pgml-sdks/pgml/src/languages/mod.rs @@ -3,3 +3,6 @@ pub mod javascript; #[cfg(feature = "python")] pub mod python; + +#[cfg(feature = "c")] +pub mod c; diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs index 432654298..a5eb75552 100644 --- a/pgml-sdks/pgml/src/model.rs +++ b/pgml-sdks/pgml/src/model.rs @@ -52,7 +52,8 @@ pub(crate) struct ModelDatabaseData { } /// A model used for embedding, inference, etc... -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct Model { pub(crate) name: String, pub(crate) runtime: ModelRuntime, @@ -66,7 +67,7 @@ impl Default for Model { } } -#[alias_methods(new, transform)] +// #[alias_methods(new, transform)] impl Model { /// Creates a new [Model] pub fn new(name: Option, source: Option, parameters: Option) -> Self { diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs index e21397a31..7687c289f 100644 --- a/pgml-sdks/pgml/src/open_source_ai.rs +++ b/pgml-sdks/pgml/src/open_source_ai.rs @@ -14,7 +14,8 @@ use crate::{ use crate::types::{GeneralJsonAsyncIteratorPython, GeneralJsonIteratorPython, JsonPython}; /// A drop in replacement for OpenAI -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct OpenSourceAI { database_url: Option, } @@ -162,13 +163,13 @@ impl Iterator for AsyncToSyncJsonIterator { } } -#[alias_methods( - new, - chat_completions_create, - chat_completions_create_async, - chat_completions_create_stream, - chat_completions_create_stream_async -)] +// #[alias_methods( +// new, +// chat_completions_create, +// chat_completions_create_async, +// chat_completions_create_stream, +// chat_completions_create_stream_async +// )] impl OpenSourceAI { /// Creates a new [OpenSourceAI] /// diff --git a/pgml-sdks/pgml/src/pipeline.rs b/pgml-sdks/pgml/src/pipeline.rs index 02b059db3..e082e9e4b 100644 --- a/pgml-sdks/pgml/src/pipeline.rs +++ b/pgml-sdks/pgml/src/pipeline.rs @@ -19,6 +19,9 @@ use crate::{ #[cfg(feature = "python")] use crate::types::JsonPython; +#[cfg(feature = "c")] +use crate::languages::c::JsonC; + type ParsedSchema = HashMap; #[derive(Deserialize)] diff --git a/pgml-sdks/pgml/src/query_runner.rs b/pgml-sdks/pgml/src/query_runner.rs index 623a09662..e4c8df750 100644 --- a/pgml-sdks/pgml/src/query_runner.rs +++ b/pgml-sdks/pgml/src/query_runner.rs @@ -17,22 +17,23 @@ enum BindValue { Json(Json), } -#[derive(alias, Clone, Debug)] +// #[derive(alias, Clone, Debug)] +#[derive(Clone, Debug)] pub struct QueryRunner { query: String, bind_values: Vec, database_url: Option, } -#[alias_methods( - fetch_all, - execute, - bind_string, - bind_int, - bind_float, - bind_bool, - bind_json -)] +// #[alias_methods( +// fetch_all, +// execute, +// bind_string, +// bind_int, +// bind_float, +// bind_bool, +// bind_json +// )] impl QueryRunner { pub fn new(query: &str, database_url: Option) -> Self { Self { diff --git a/pgml-sdks/pgml/src/splitter.rs b/pgml-sdks/pgml/src/splitter.rs index a0847c879..091e1a21a 100644 --- a/pgml-sdks/pgml/src/splitter.rs +++ b/pgml-sdks/pgml/src/splitter.rs @@ -19,7 +19,8 @@ pub(crate) struct SplitterDatabaseData { } /// A text splitter -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct Splitter { pub(crate) name: String, pub(crate) parameters: Json, @@ -32,7 +33,7 @@ impl Default for Splitter { } } -#[alias_methods(new)] +// #[alias_methods(new)] impl Splitter { /// Creates a new [Splitter] /// diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index 7a6141675..bd50844c2 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -4,7 +4,8 @@ use sqlx::Row; use tracing::instrument; /// Provides access to builtin database methods -#[derive(alias, Debug, Clone)] +// #[derive(alias, Debug, Clone)] +#[derive(Debug, Clone)] pub struct TransformerPipeline { task: Json, database_url: Option, @@ -16,7 +17,7 @@ use crate::{get_or_initialize_pool, types::Json}; #[cfg(feature = "python")] use crate::types::{GeneralJsonAsyncIteratorPython, JsonPython}; -#[alias_methods(new, transform, transform_stream)] +// #[alias_methods(new, transform, transform_stream)] impl TransformerPipeline { /// Creates a new [TransformerPipeline] /// diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs new file mode 100644 index 000000000..4cd6921cf --- /dev/null +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -0,0 +1,448 @@ +use proc_macro2::Ident; +use quote::{format_ident, quote, ToTokens}; +use std::{ + io::{Read, Write}, + str::FromStr, +}; +use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; + +use crate::{ + common::{AttributeArgs, GetImplMethod}, + types::{OutputType, SupportedType}, +}; + +pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { + let name_ident = format_ident!("{}C", parsed.ident); + let wrapped_type_ident = parsed.ident; + let wrapped_type_name = wrapped_type_ident.to_string(); + + let expanded = quote! { + #[repr(C)] + #[cfg(feature = "c")] + pub struct #name_ident { + pub wrapped: *mut #wrapped_type_ident + } + + #[cfg(feature = "c")] + unsafe impl rust_bridge::c::CustomInto<*mut #name_ident> for #wrapped_type_ident { + unsafe fn custom_into(self) -> *mut #name_ident { + Box::into_raw(Box::new( + #name_ident { + wrapped: Box::into_raw(Box::new(self)) + } + )) + } + } + + #[cfg(feature = "c")] + unsafe impl rust_bridge::c::CustomInto<&'static mut #wrapped_type_ident> for *mut #name_ident { + unsafe fn custom_into(self) -> &'static mut #wrapped_type_ident { + let c = Box::leak(Box::from_raw(self)); + Box::leak(Box::from_raw(c.wrapped)) + } + } + + #[cfg(feature = "c")] + unsafe impl rust_bridge::c::CustomInto<&'static #wrapped_type_ident> for *mut #name_ident { + unsafe fn custom_into(self) -> &'static #wrapped_type_ident { + let c = Box::leak(Box::from_raw(self)); + &*Box::leak(Box::from_raw(c.wrapped)) + } + } + }; + + proc_macro::TokenStream::from(expanded) +} + +pub fn generate_c_methods( + parsed: ItemImpl, + attribute_args: &AttributeArgs, +) -> proc_macro::TokenStream { + let mut methods = Vec::new(); + + let wrapped_type_ident = match *parsed.self_ty { + Type::Path(p) => p.path.segments.first().unwrap().ident.clone(), + _ => panic!("Error getting struct ident for impl block"), + }; + let name_ident = format_ident!("{}C", wrapped_type_ident); + + for item in parsed.items { + // We only create methods for functions listed in the attribute args + match &item { + syn::ImplItem::Fn(f) => { + let method_name = f.sig.ident.to_string(); + if !attribute_args.args.contains(&method_name) { + continue; + } + } + _ => continue, + } + + // Get ImplMethod details - see: https://docs.rs/syn/latest/syn/visit/index.html + let mut method = GetImplMethod::default(); + method.visit_impl_item(&item); + if !method.exists { + continue; + } + let method_ident = method.method_ident.clone(); + + let ( + go_function_arguments, + go_arguments_prep, + mut c_function_arguments, + c_argument_prep, + rust_function_arguments, + ) = get_method_arguments(&wrapped_type_ident, &name_ident, &method); + + let method_name = format_ident!("{}_{}", name_ident, method_ident); + + let (return_part, augment_r_size) = + rust_output_to_c_output(&wrapped_type_ident, &method.output_type); + + if augment_r_size { + c_function_arguments.extend(quote! { + , r_size: *mut std::ffi::c_ulong + }) + } + + let async_part = if method.is_async { + quote! { .await } + } else { + quote! {} + }; + + let (ret_part, augment_part) = if augment_r_size { + ( + quote! { let (ret, ar_size) }, + quote! {*r_size = ar_size as std::ffi::c_ulong; }, + ) + } else { + (quote! { let ret }, quote! {}) + }; + + let rust_call_part = match &method.output_type { + crate::types::OutputType::Result(_) => { + quote! { + #ret_part = #wrapped_type_ident::#method_ident(#rust_function_arguments)#async_part.unwrap().custom_into(); + #augment_part + ret + } + } + crate::types::OutputType::Default => quote! { + #wrapped_type_ident::#method_ident(#rust_function_arguments)#async_part; + }, + crate::types::OutputType::Other(_) => quote! { + #ret_part = #wrapped_type_ident::#method_ident(#rust_function_arguments)#async_part.custom_into(); + #augment_part + ret + }, + }; + + let method = if method.is_async { + quote! { + #[cfg(feature = "c")] + #[no_mangle] + pub unsafe extern "C" fn #method_name(#c_function_arguments) #return_part { + use rust_bridge::c::CustomInto; + use rust_bridge::c::CustomIntoVec; + crate::get_or_set_runtime().block_on(async move { + #c_argument_prep + #rust_call_part + }) + } + } + } else { + quote! { + #[cfg(feature = "c")] + #[no_mangle] + pub unsafe extern "C" fn #method_name(#c_function_arguments) #return_part { + use rust_bridge::c::CustomInto; + use rust_bridge::c::CustomIntoVec; + #c_argument_prep + #rust_call_part + } + } + }; + + methods.push(method); + } + + proc_macro::TokenStream::from(quote! { + #(#methods)* + }) +} + +fn get_method_arguments( + wrapped_type_ident: &Ident, + name_ident: &Ident, + method: &GetImplMethod, +) -> ( + proc_macro2::TokenStream, + proc_macro2::TokenStream, + proc_macro2::TokenStream, + proc_macro2::TokenStream, + proc_macro2::TokenStream, +) { + let mut go_function_arguments = Vec::new(); + let mut go_arguments_prep = Vec::new(); + let mut c_function_arguments = Vec::new(); + let mut c_argument_prep = Vec::new(); + let mut rust_function_arguments = Vec::new(); + + if let Some(_receiver) = &method.receiver { + c_function_arguments.push(format!("s: *mut {name_ident}")); + c_argument_prep.push(format!( + "let s: &mut {wrapped_type_ident} = s.custom_into();" + )); + rust_function_arguments.push("s".to_string()); + } + + for (argument_name, argument_type) in &method.method_arguments { + let ( + go_function_arguments_, + go_arguments_prep_, + c_function_arguments_, + c_argument_prep_, + rust_function_arguments_, + ) = get_c_types(argument_name, argument_type); + + go_function_arguments.push(go_function_arguments_); + go_arguments_prep.push(go_arguments_prep_); + c_function_arguments.push(c_function_arguments_); + c_argument_prep.push(c_argument_prep_); + rust_function_arguments.push(rust_function_arguments_); + } + + ( + proc_macro2::TokenStream::from_str(&go_function_arguments.join("\n")).unwrap(), + proc_macro2::TokenStream::from_str(&go_arguments_prep.join("\n")).unwrap(), + proc_macro2::TokenStream::from_str(&c_function_arguments.join(",")).unwrap(), + proc_macro2::TokenStream::from_str(&c_argument_prep.join("\n")).unwrap(), + proc_macro2::TokenStream::from_str(&rust_function_arguments.join(",")).unwrap(), + ) +} + +// Need: +// - go function arguments +// - go function argument prep for calling c function +// - go conversion from c returned value - For custom types this is always a wrapper for everything else this is a primitve type +// - c function arguments +// - c function arguments prep for calling rust function +// - arguments to call rust function with +// - c conversion from rust returned value - This is done with the into trait +fn get_c_types( + argument_name: &str, + ty: &SupportedType, +) -> (String, String, String, String, String) { + let t = ty.to_language_string(&None); + let c_to_rust = format!("let {argument_name}: {t} = {argument_name}.custom_into();"); + match ty { + SupportedType::Reference(r) => { + let ( + go_function_arguments, + go_argument_prep, + c_function_arguments, + c_argument_prep, + rust_function_arguments, + ) = get_c_types(argument_name, &r.ty); + ( + "".to_string(), + "".to_string(), + c_function_arguments, + c_to_rust, + argument_name.to_string(), + ) + } + SupportedType::str | SupportedType::String => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: *mut std::ffi::c_char"), + c_to_rust, + argument_name.to_string(), + ), + SupportedType::Option(r) => { + let ( + go_function_arguments, + go_argument_prep, + mut c_function_arguments, + c_argument_prep, + rust_function_arguments, + ) = get_c_types(argument_name, &r); + + ( + "".to_string(), + "".to_string(), + c_function_arguments, + c_to_rust, + argument_name.to_string(), + ) + } + SupportedType::bool => ( + "".to_string(), + "".to_string(), + "bool".to_string(), + "".to_string(), + argument_name.to_string(), + ), + SupportedType::Vec(v) => { + let ( + go_function_arguments, + go_argument_prep, + mut c_function_arguments, + mut c_argument_prep, + rust_function_arguments, + ) = get_c_types(argument_name, v); + + let mut c_function_arguments = c_function_arguments.replacen("*mut", "*mut *mut", 1); + c_function_arguments.push_str(", v_size: std::ffi::c_ulong"); + c_argument_prep = "let v_size: usize = v_size as usize;".to_string(); + let c_to_rust = + format!("{c_argument_prep}\nlet {argument_name}: {t} = {argument_name}.custom_into_vec(v_size);"); + + ( + "".to_string(), + "".to_string(), + c_function_arguments, + c_to_rust, + argument_name.to_string(), + ) + } + SupportedType::HashMap(_) => panic!("HashMap arguments not supported in c"), + SupportedType::Tuple(_) => panic!("Tuple arguments not supported in c"), + SupportedType::S => unreachable!(), + SupportedType::i64 => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: std::ffi::c_longlong"), + format!("let {argument_name}: {t} = {argument_name} as {t};"), + argument_name.to_string(), + ), + SupportedType::u64 => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: std::ffi::c_ulonglong"), + format!("let {argument_name}: {t} = {argument_name} as {t};"), + argument_name.to_string(), + ), + SupportedType::i32 => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: std::ffi::c_long"), + format!("let {argument_name}: {t} = {argument_name} as {t};"), + argument_name.to_string(), + ), + SupportedType::f64 => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: std::ffi::c_double"), + format!("let {argument_name}: {t} = {argument_name} as {t};"), + argument_name.to_string(), + ), + SupportedType::CustomType(s) => ( + "".to_string(), + "".to_string(), + format!("{argument_name}: *mut {s}C"), + c_to_rust, + argument_name.to_string(), + ), + _ => todo!(), + } +} + +// fn get_c_types(argument_name: &str, ty: &SupportedType) -> (String, Option) { +// match ty { +// SupportedType::Reference(r) => get_c_types(&r.ty), +// SupportedType::str | SupportedType::String => ("*mut std::ffi::c_char".to_string(), None), +// SupportedType::bool => ("bool".to_string(), None), +// SupportedType::Vec(v) => { +// let mut v = get_c_types(v); +// if !v.0.contains('*') { +// v.0 = format!("*mut {}", v.0); +// } +// if v.1.is_some() { +// panic!("Vec> not supported in c"); +// } +// (v.0, Some("std::ffi::c_ulong".to_string())) +// } +// SupportedType::HashMap(_) => panic!("HashMap arguments not supported in c"), +// SupportedType::Option(r) => { +// let mut t = get_c_types(r); +// if !t.0.contains('*') { +// t.0 = format!("*mut {}", t.0); +// } +// t +// } +// SupportedType::Tuple(_) => panic!("Tuple arguments not supported in c"), +// SupportedType::S => unreachable!(), +// SupportedType::i64 => ("std::ffi::c_longlong".to_string(), None), +// SupportedType::u64 => ("std::ffi::c_ulonglong".to_string(), None), +// SupportedType::i32 => ("std::ffi::c_long".to_string(), None), +// SupportedType::f64 => ("std::ffi::c_double".to_string(), None), +// SupportedType::CustomType(s) => (format!("*mut {s}"), None), +// } +// } + +fn rust_type_to_c_type( + wrapped_type_ident: &Ident, + ty: &SupportedType, +) -> Option<(proc_macro2::TokenStream, bool)> { + match ty { + // SupportedType::Reference(r) => rust_type_to_c_type(wrapped_type_ident, &r.ty), + SupportedType::str | SupportedType::String => Some((quote! {*mut std::ffi::c_char}, false)), + SupportedType::bool => Some((quote! { bool }, false)), + SupportedType::Vec(v) => { + let (ty, _) = rust_type_to_c_type(wrapped_type_ident, v).unwrap(); + Some((quote! { *mut #ty }, true)) + } + // SupportedType::HashMap(_) => panic!("HashMap arguments not supported in c"), + // SupportedType::Option(r) => { + // let mut t = get_c_types(r); + // if !t.0.contains('*') { + // t.0 = format!("*mut {}", t.0); + // } + // t + // } + SupportedType::Tuple(t) => { + if !t.is_empty() { + panic!("Tuple arguments not supported in c") + } else { + None + } + } + SupportedType::S => { + let ty = format_ident!("{wrapped_type_ident}C"); + Some((quote! { *mut #ty }, false)) + } // SupportedType::i64 => ("std::ffi::c_longlong".to_string(), None), + // SupportedType::u64 => ("std::ffi::c_ulonglong".to_string(), None), + // SupportedType::i32 => ("std::ffi::c_long".to_string(), None), + // SupportedType::f64 => ("std::ffi::c_double".to_string(), None), + SupportedType::CustomType(s) => { + let ty = format_ident!("{s}C"); + Some((quote! {*mut #ty}, false)) + } + _ => panic!("rust_type_to_c_type not implemented for {:?}", ty), + } +} + +fn rust_output_to_c_output( + wrapped_type_ident: &Ident, + output: &OutputType, +) -> (proc_macro2::TokenStream, bool) { + match output { + crate::types::OutputType::Result(r) => { + if let Some((ty, augment_r_size)) = rust_type_to_c_type(wrapped_type_ident, r) { + (quote! { -> #ty }, augment_r_size) + } else { + (quote! {}, false) + } + } + crate::types::OutputType::Default => (quote! {}, false), + crate::types::OutputType::Other(r) => { + if let Some((ty, augment_r_size)) = rust_type_to_c_type(wrapped_type_ident, r) { + (quote! { -> #ty }, augment_r_size) + } else { + (quote! {}, false) + } + } + } +} diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/lib.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/lib.rs index e6dc81c73..467fcf08f 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/lib.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/lib.rs @@ -1,5 +1,6 @@ use syn::{parse_macro_input, DeriveInput, ItemImpl}; +mod c; mod common; mod javascript; mod python; @@ -11,9 +12,11 @@ pub fn alias(input: proc_macro::TokenStream) -> proc_macro::TokenStream { let parsed = parse_macro_input!(input as DeriveInput); let python_tokens = python::generate_python_alias(parsed.clone()); + let c_tokens = c::generate_c_alias(parsed.clone()); let javascript_tokens = javascript::generate_javascript_alias(parsed); output.extend(python_tokens); + output.extend(c_tokens); output.extend(javascript_tokens); output } @@ -29,9 +32,11 @@ pub fn alias_methods( let parsed: ItemImpl = syn::parse(input).unwrap(); let python_tokens = python::generate_python_methods(parsed.clone(), &attribute_args); + let c_tokens = c::generate_c_methods(parsed.clone(), &attribute_args); let javascript_tokens = javascript::generate_javascript_methods(parsed, &attribute_args); output.extend(python_tokens); + output.extend(c_tokens); output.extend(javascript_tokens); output } diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs new file mode 100644 index 000000000..ddb7f3650 --- /dev/null +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs @@ -0,0 +1,157 @@ +use std::collections::HashMap; + +/// Very similar to the `Into` trait, but we can implement it on foreign types. +pub unsafe trait CustomInto { + unsafe fn custom_into(self) -> T; +} + +pub unsafe trait CustomIntoVec { + unsafe fn custom_into_vec(self, size: usize) -> Vec; +} + +// unsafe impl> CustomIntoVec for *mut T2 { +unsafe impl CustomIntoVec for *mut *mut T2 +where + *mut T2: CustomInto, +{ + unsafe fn custom_into_vec(self, size: usize) -> Vec { + let mut result = vec![]; + let strings = std::slice::from_raw_parts_mut(self, size); + for s in strings { + let res = s.custom_into(); + result.push(res) + } + result + } +} + +unsafe impl<'a> CustomInto<&'a str> for *mut std::ffi::c_char { + unsafe fn custom_into(self) -> &'a str { + std::ffi::CStr::from_ptr(self).to_str().unwrap() + } +} + +unsafe impl CustomInto for *mut std::ffi::c_char { + unsafe fn custom_into(self) -> String { + std::ffi::CStr::from_ptr(self).to_str().unwrap().to_string() + } +} + +unsafe impl CustomInto<*mut std::ffi::c_char> for String { + unsafe fn custom_into(self) -> *mut std::ffi::c_char { + std::ffi::CString::new(self).unwrap().into_raw() + } +} + +unsafe impl CustomInto> for *mut T2 +where + *mut T2: CustomInto, +{ + unsafe fn custom_into(self) -> Option { + if self.is_null() { + None + } else { + Some(self.custom_into()) + } + } +} + +unsafe impl CustomInto<(*mut T1, usize)> for Vec +where + T2: CustomInto, +{ + unsafe fn custom_into(self) -> (*mut T1, usize) { + let size = self.len(); + let v: Vec = self.into_iter().map(|v| v.custom_into()).collect(); + (v.leak().as_mut_ptr(), size) + } +} + +macro_rules! gen_custom_into { + ($t1:ty) => { + unsafe impl CustomInto<$t1> for $t1 { + unsafe fn custom_into(self) -> $t1 { + self + } + } + }; // (($($T1:ident),+), ($($T2:ident),+), ($($C:tt),+)) => { + // impl<$($T1, $T2: CustomInto<$T1>),+> CustomInto<($($T1),+,)> for ($($T2),+,) { + // fn custom_into(self) -> ($($T1),+,) { + // ($(self.$C.custom_into()),+,) + // } + // } + // } +} + +gen_custom_into!(()); +gen_custom_into!(bool); + +// impl> CustomInto> for Option { +// fn custom_into(self) -> Option { +// self.map(|s| s.custom_into()) +// } +// } + +unsafe impl> CustomInto> for Vec { + unsafe fn custom_into(self) -> Vec { + self.into_iter().map(|x| x.custom_into()).collect() + } +} + +// impl, T2: CustomInto> +// CustomInto> for HashMap +// { +// fn custom_into(self) -> HashMap { +// self.into_iter() +// .map(|(k, v)| (k.custom_into(), v.custom_into())) +// .collect() +// } +// } + +// impl CustomInto<&'static str> for &str { +// fn custom_into(self) -> &'static str { +// // This is how we get around the liftime checker +// unsafe { +// let ptr = self as *const str; +// let ptr = ptr as *mut str; +// let boxed = Box::from_raw(ptr); +// Box::leak(boxed) +// } +// } +// } + +// gen_custom_into!((T1), (TT2), (0)); +// gen_custom_into!((T1, T2), (TT1, TT2), (0, 1)); +// gen_custom_into!((T1, T2, T3), (TT1, TT2, TT3), (0, 1, 2)); +// gen_custom_into!((T1, T2, T3, T4), (TT1, TT2, TT3, TT4), (0, 1, 2, 3)); +// gen_custom_into!( +// (T1, T2, T3, T4, T5), +// (TT1, TT2, TT3, TT4, TT5), +// (0, 1, 2, 3, 4) +// ); +// gen_custom_into!( +// (T1, T2, T3, T4, T5, T6), +// (TT1, TT2, TT3, TT4, TT5, TT6), +// (0, 1, 2, 3, 4, 5) +// ); + +// // There are some restrictions I cannot figure out around conflicting trait +// // implimentations so this is my solution for now +// gen_custom_into!(String); + +// gen_custom_into!(()); + +// gen_custom_into!(bool); + +// gen_custom_into!(i8); +// gen_custom_into!(i16); +// gen_custom_into!(i32); +// gen_custom_into!(i64); + +// gen_custom_into!(u8); +// gen_custom_into!(u16); +// gen_custom_into!(u32); +// gen_custom_into!(u64); + +// gen_custom_into!(f32); +// gen_custom_into!(f64); diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/lib.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/lib.rs index 351c28c06..7cba7c727 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-traits/src/lib.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/lib.rs @@ -1,3 +1,3 @@ -pub mod python; - +pub mod c; pub mod javascript; +pub mod python; From 5d276fc4f419b122850152c094ef0404f9255253 Mon Sep 17 00:00:00 2001 From: SilasMarvin <19626586+SilasMarvin@users.noreply.github.com> Date: Thu, 25 Apr 2024 15:22:29 -0700 Subject: [PATCH 02/19] Working C --- pgml-sdks/pgml/go/test.c | 8 ++ pgml-sdks/pgml/src/builtins.rs | 8 +- pgml-sdks/pgml/src/languages/c.rs | 82 +++++++++++++ pgml-sdks/pgml/src/model.rs | 8 +- pgml-sdks/pgml/src/open_source_ai.rs | 23 ++-- pgml-sdks/pgml/src/query_builder.rs | 5 +- pgml-sdks/pgml/src/query_runner.rs | 24 ++-- pgml-sdks/pgml/src/splitter.rs | 8 +- pgml-sdks/pgml/src/transformer_pipeline.rs | 8 +- .../rust-bridge/rust-bridge-macros/src/c.rs | 112 +++++++++-------- .../rust-bridge-macros/src/common.rs | 115 ++++++++++++++++-- .../rust-bridge-macros/src/javascript.rs | 5 +- .../rust-bridge-macros/src/python.rs | 4 +- .../rust-bridge/rust-bridge-traits/src/c.rs | 13 +- 14 files changed, 327 insertions(+), 96 deletions(-) diff --git a/pgml-sdks/pgml/go/test.c b/pgml-sdks/pgml/go/test.c index 9992fd19d..3fc1d053f 100644 --- a/pgml-sdks/pgml/go/test.c +++ b/pgml-sdks/pgml/go/test.c @@ -33,5 +33,13 @@ int main() { printf("Result %u -> %s\n", i, results[i]); } + // Test the TransformerPipeline + TransformerPipelineC * t_pipeline = TransformerPipelineC_new("text-generation", "TheBloke/zephyr-7B-beta-GPTQ", "{\"revision\": \"main\"}", "postgres://pg:ml@sql.cloud.postgresml.org:38042/pgml"); + GeneralJsonAsyncIteratorC * t_pipeline_iter = TransformerPipelineC_transform_stream(t_pipeline, "\"AI is going to\"", "{\"max_new_tokens\": 100}", NULL); + while (!GeneralJsonAsyncIteratorC_done(t_pipeline_iter)) { + char * res = GeneralJsonAsyncIteratorC_next(t_pipeline_iter); + printf("Token -> %s\n", res); + } + return 0; } diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs index b23f4d699..638e63353 100644 --- a/pgml-sdks/pgml/src/builtins.rs +++ b/pgml-sdks/pgml/src/builtins.rs @@ -3,8 +3,7 @@ use sqlx::Row; use tracing::instrument; /// Provides access to builtin database methods -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct Builtins { database_url: Option, } @@ -14,7 +13,10 @@ use crate::{get_or_initialize_pool, query_runner::QueryRunner, types::Json}; #[cfg(feature = "python")] use crate::{query_runner::QueryRunnerPython, types::JsonPython}; -// #[alias_methods(new, query, transform)] +#[cfg(feature = "c")] +use crate::{languages::c::JsonC, query_runner::QueryRunnerC}; + +#[alias_methods(new, query, transform)] impl Builtins { pub fn new(database_url: Option) -> Self { Self { database_url } diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs index 3babf097b..78bafd858 100644 --- a/pgml-sdks/pgml/src/languages/c.rs +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -1,11 +1,15 @@ use crate::types::{DateTime, GeneralJsonAsyncIterator, GeneralJsonIterator, Json}; +use futures::pin_mut; +use futures::stream::Stream; use rust_bridge::c::CustomInto; +use std::pin::Pin; pub type JsonC = std::ffi::c_char; unsafe impl CustomInto for *mut JsonC { unsafe fn custom_into(self) -> Json { let s = std::ffi::CStr::from_ptr(self).to_str().unwrap(); + eprintln!("\nABOU TO DECODE: {}\n", s); serde_json::from_str::(s).unwrap().into() } } @@ -16,3 +20,81 @@ unsafe impl CustomInto<*mut JsonC> for Json { std::ffi::CString::new(s).unwrap().into_raw() } } + +#[repr(C)] +pub struct GeneralJsonIteratorC { + pub wrapped: + *mut std::iter::Peekable> + Send>>, +} + +unsafe impl CustomInto<*mut GeneralJsonIteratorC> for GeneralJsonIterator { + unsafe fn custom_into(self) -> *mut GeneralJsonIteratorC { + Box::into_raw(Box::new(GeneralJsonIteratorC { + wrapped: Box::into_raw(Box::new(self.0.peekable())), + })) + } +} + +#[no_mangle] +pub unsafe extern "C" fn GeneralJsonIteratorC_done(iterator: *mut GeneralJsonIteratorC) -> bool { + let mut c = Box::leak(Box::from_raw(iterator)); + if let Some(_) = (*c.wrapped).peek() { + false + } else { + true + } +} + +#[no_mangle] +pub unsafe extern "C" fn GeneralJsonIteratorC_next( + iterator: *mut GeneralJsonIteratorC, +) -> *mut JsonC { + let c = Box::leak(Box::from_raw(iterator)); + let b = Box::leak(Box::from_raw(c.wrapped)); + (*b).next().unwrap().unwrap().custom_into() +} + +#[repr(C)] +pub struct GeneralJsonAsyncIteratorC { + pub wrapped: *mut futures::stream::Peekable< + Pin> + Send>>, + >, +} + +unsafe impl CustomInto<*mut GeneralJsonAsyncIteratorC> for GeneralJsonAsyncIterator { + unsafe fn custom_into(self) -> *mut GeneralJsonAsyncIteratorC { + use futures::stream::StreamExt; + Box::into_raw(Box::new(GeneralJsonAsyncIteratorC { + wrapped: Box::into_raw(Box::new(self.0.peekable())), + })) + } +} + +#[no_mangle] +pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_done( + iterator: *mut GeneralJsonAsyncIteratorC, +) -> bool { + crate::get_or_set_runtime().block_on(async move { + use futures::stream::StreamExt; + let c = Box::leak(Box::from_raw(iterator)); + let s = Box::leak(Box::from_raw(c.wrapped)); + let mut s = Pin::new(s); + let res = s.as_mut().peek_mut().await; + if let Some(res) = res { + false + } else { + true + } + }) +} + +#[no_mangle] +pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_next( + iterator: *mut GeneralJsonAsyncIteratorC, +) -> *mut JsonC { + crate::get_or_set_runtime().block_on(async move { + use futures::stream::StreamExt; + let mut c = Box::leak(Box::from_raw(iterator)); + (*c.wrapped).next().await.unwrap().unwrap().custom_into() + }) +} diff --git a/pgml-sdks/pgml/src/model.rs b/pgml-sdks/pgml/src/model.rs index a5eb75552..a361f577b 100644 --- a/pgml-sdks/pgml/src/model.rs +++ b/pgml-sdks/pgml/src/model.rs @@ -11,6 +11,9 @@ use crate::{ #[cfg(feature = "python")] use crate::types::JsonPython; +#[cfg(feature = "c")] +use crate::languages::c::JsonC; + /// A few notes on the following enums: /// - Sqlx does provide type derivation for enums, but it's not very good /// - Queries using these enums require a number of additional queries to get their oids and @@ -52,8 +55,7 @@ pub(crate) struct ModelDatabaseData { } /// A model used for embedding, inference, etc... -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct Model { pub(crate) name: String, pub(crate) runtime: ModelRuntime, @@ -67,7 +69,7 @@ impl Default for Model { } } -// #[alias_methods(new, transform)] +#[alias_methods(new, transform)] impl Model { /// Creates a new [Model] pub fn new(name: Option, source: Option, parameters: Option) -> Self { diff --git a/pgml-sdks/pgml/src/open_source_ai.rs b/pgml-sdks/pgml/src/open_source_ai.rs index 7687c289f..5ee32bc88 100644 --- a/pgml-sdks/pgml/src/open_source_ai.rs +++ b/pgml-sdks/pgml/src/open_source_ai.rs @@ -13,9 +13,14 @@ use crate::{ #[cfg(feature = "python")] use crate::types::{GeneralJsonAsyncIteratorPython, GeneralJsonIteratorPython, JsonPython}; +#[cfg(feature = "c")] +use crate::{ + languages::c::JsonC, + languages::c::{GeneralJsonAsyncIteratorC, GeneralJsonIteratorC}, +}; + /// A drop in replacement for OpenAI -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct OpenSourceAI { database_url: Option, } @@ -163,13 +168,13 @@ impl Iterator for AsyncToSyncJsonIterator { } } -// #[alias_methods( -// new, -// chat_completions_create, -// chat_completions_create_async, -// chat_completions_create_stream, -// chat_completions_create_stream_async -// )] +#[alias_methods( + new, + chat_completions_create, + chat_completions_create_async, + chat_completions_create_stream, + chat_completions_create_stream_async +)] impl OpenSourceAI { /// Creates a new [OpenSourceAI] /// diff --git a/pgml-sdks/pgml/src/query_builder.rs b/pgml-sdks/pgml/src/query_builder.rs index 4250f9db1..ed35c08c3 100644 --- a/pgml-sdks/pgml/src/query_builder.rs +++ b/pgml-sdks/pgml/src/query_builder.rs @@ -12,6 +12,9 @@ use crate::{pipeline::Pipeline, types::Json, Collection}; #[cfg(feature = "python")] use crate::{pipeline::PipelinePython, types::JsonPython}; +#[cfg(feature = "c")] +use crate::{languages::c::JsonC, pipeline::PipelineC}; + #[derive(alias, Clone, Debug)] pub struct QueryBuilder { collection: Collection, @@ -19,7 +22,7 @@ pub struct QueryBuilder { pipeline: Option, } -#[alias_methods(limit, filter, vector_recall, to_full_string, fetch_all)] +#[alias_methods(limit, filter, vector_recall, to_full_string, fetch_all(skip = "C"))] impl QueryBuilder { pub fn new(collection: Collection) -> Self { let query = json!({ diff --git a/pgml-sdks/pgml/src/query_runner.rs b/pgml-sdks/pgml/src/query_runner.rs index e4c8df750..cb5ba77cd 100644 --- a/pgml-sdks/pgml/src/query_runner.rs +++ b/pgml-sdks/pgml/src/query_runner.rs @@ -8,6 +8,9 @@ use crate::{get_or_initialize_pool, types::Json}; #[cfg(feature = "python")] use crate::types::JsonPython; +#[cfg(feature = "c")] +use crate::languages::c::JsonC; + #[derive(Clone, Debug)] enum BindValue { String(String), @@ -17,23 +20,22 @@ enum BindValue { Json(Json), } -// #[derive(alias, Clone, Debug)] -#[derive(Clone, Debug)] +#[derive(alias, Clone, Debug)] pub struct QueryRunner { query: String, bind_values: Vec, database_url: Option, } -// #[alias_methods( -// fetch_all, -// execute, -// bind_string, -// bind_int, -// bind_float, -// bind_bool, -// bind_json -// )] +#[alias_methods( + fetch_all, + execute, + bind_string, + bind_int, + bind_float, + bind_bool, + bind_json +)] impl QueryRunner { pub fn new(query: &str, database_url: Option) -> Self { Self { diff --git a/pgml-sdks/pgml/src/splitter.rs b/pgml-sdks/pgml/src/splitter.rs index 091e1a21a..b7dd6c74d 100644 --- a/pgml-sdks/pgml/src/splitter.rs +++ b/pgml-sdks/pgml/src/splitter.rs @@ -11,6 +11,9 @@ use crate::{ #[cfg(feature = "python")] use crate::types::JsonPython; +#[cfg(feature = "c")] +use crate::languages::c::JsonC; + #[allow(dead_code)] #[derive(Debug, Clone)] pub(crate) struct SplitterDatabaseData { @@ -19,8 +22,7 @@ pub(crate) struct SplitterDatabaseData { } /// A text splitter -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct Splitter { pub(crate) name: String, pub(crate) parameters: Json, @@ -33,7 +35,7 @@ impl Default for Splitter { } } -// #[alias_methods(new)] +#[alias_methods(new)] impl Splitter { /// Creates a new [Splitter] /// diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index bd50844c2..7210aa6e2 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -4,8 +4,7 @@ use sqlx::Row; use tracing::instrument; /// Provides access to builtin database methods -// #[derive(alias, Debug, Clone)] -#[derive(Debug, Clone)] +#[derive(alias, Debug, Clone)] pub struct TransformerPipeline { task: Json, database_url: Option, @@ -17,7 +16,10 @@ use crate::{get_or_initialize_pool, types::Json}; #[cfg(feature = "python")] use crate::types::{GeneralJsonAsyncIteratorPython, JsonPython}; -// #[alias_methods(new, transform, transform_stream)] +#[cfg(feature = "c")] +use crate::{languages::c::GeneralJsonAsyncIteratorC, languages::c::JsonC}; + +#[alias_methods(new, transform, transform_stream)] impl TransformerPipeline { /// Creates a new [TransformerPipeline] /// diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 4cd6921cf..d9eb18913 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -7,14 +7,13 @@ use std::{ use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; use crate::{ - common::{AttributeArgs, GetImplMethod}, + common::{AttributeArgs, GetImplMethod, SupportedLanguage}, types::{OutputType, SupportedType}, }; pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { let name_ident = format_ident!("{}C", parsed.ident); let wrapped_type_ident = parsed.ident; - let wrapped_type_name = wrapped_type_ident.to_string(); let expanded = quote! { #[repr(C)] @@ -34,6 +33,15 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { } } + #[cfg(feature = "c")] + unsafe impl rust_bridge::c::CustomInto<#wrapped_type_ident> for *mut #name_ident { + unsafe fn custom_into(self) -> #wrapped_type_ident { + let c = Box::leak(Box::from_raw(self)); + let s = Box::leak(Box::from_raw(c.wrapped)); + s.clone() + } + } + #[cfg(feature = "c")] unsafe impl rust_bridge::c::CustomInto<&'static mut #wrapped_type_ident> for *mut #name_ident { unsafe fn custom_into(self) -> &'static mut #wrapped_type_ident { @@ -71,7 +79,7 @@ pub fn generate_c_methods( match &item { syn::ImplItem::Fn(f) => { let method_name = f.sig.ident.to_string(); - if !attribute_args.args.contains(&method_name) { + if !attribute_args.should_alias_method(&method_name, SupportedLanguage::C) { continue; } } @@ -164,6 +172,8 @@ pub fn generate_c_methods( } }; + eprintln!("\n\n{}\n\n", method); + methods.push(method); } @@ -189,22 +199,35 @@ fn get_method_arguments( let mut c_argument_prep = Vec::new(); let mut rust_function_arguments = Vec::new(); - if let Some(_receiver) = &method.receiver { + if let Some(receiver) = &method.receiver { c_function_arguments.push(format!("s: *mut {name_ident}")); - c_argument_prep.push(format!( - "let s: &mut {wrapped_type_ident} = s.custom_into();" - )); + if receiver.to_string().contains('&') { + c_argument_prep.push(format!( + "let s: &mut {wrapped_type_ident} = s.custom_into();" + )); + } else { + c_argument_prep.push(format!("let s: {wrapped_type_ident} = s.custom_into();")); + } rust_function_arguments.push("s".to_string()); } for (argument_name, argument_type) in &method.method_arguments { + let argument_name_without_mut = argument_name.replacen("mut", "", 1); let ( go_function_arguments_, go_arguments_prep_, c_function_arguments_, + c_function_argument_types, c_argument_prep_, rust_function_arguments_, - ) = get_c_types(argument_name, argument_type); + ) = get_c_types(&argument_name_without_mut, argument_type); + + let c_function_arguments_ = c_function_arguments_ + .into_iter() + .zip(c_function_argument_types) + .map(|(argument_name, argument_type)| format!("{argument_name}: {argument_type}")) + .collect::>() + .join(","); go_function_arguments.push(go_function_arguments_); go_arguments_prep.push(go_arguments_prep_); @@ -233,7 +256,7 @@ fn get_method_arguments( fn get_c_types( argument_name: &str, ty: &SupportedType, -) -> (String, String, String, String, String) { +) -> (String, String, Vec, Vec, String, String) { let t = ty.to_language_string(&None); let c_to_rust = format!("let {argument_name}: {t} = {argument_name}.custom_into();"); match ty { @@ -242,6 +265,7 @@ fn get_c_types( go_function_arguments, go_argument_prep, c_function_arguments, + c_function_argument_types, c_argument_prep, rust_function_arguments, ) = get_c_types(argument_name, &r.ty); @@ -249,6 +273,7 @@ fn get_c_types( "".to_string(), "".to_string(), c_function_arguments, + c_function_argument_types, c_to_rust, argument_name.to_string(), ) @@ -256,7 +281,8 @@ fn get_c_types( SupportedType::str | SupportedType::String => ( "".to_string(), "".to_string(), - format!("{argument_name}: *mut std::ffi::c_char"), + vec![format!("{argument_name}")], + vec!["*mut std::ffi::c_char".to_string()], c_to_rust, argument_name.to_string(), ), @@ -265,14 +291,21 @@ fn get_c_types( go_function_arguments, go_argument_prep, mut c_function_arguments, + mut c_function_argument_types, c_argument_prep, rust_function_arguments, ) = get_c_types(argument_name, &r); + let v = c_function_argument_types.last_mut().unwrap(); + if !v.starts_with('*') { + *v = format!("*mut {v}"); + } + ( "".to_string(), "".to_string(), c_function_arguments, + c_function_argument_types, c_to_rust, argument_name.to_string(), ) @@ -280,7 +313,8 @@ fn get_c_types( SupportedType::bool => ( "".to_string(), "".to_string(), - "bool".to_string(), + vec![format!("{argument_name}")], + vec!["bool".to_string()], "".to_string(), argument_name.to_string(), ), @@ -289,12 +323,15 @@ fn get_c_types( go_function_arguments, go_argument_prep, mut c_function_arguments, + mut c_function_argument_types, mut c_argument_prep, rust_function_arguments, ) = get_c_types(argument_name, v); - let mut c_function_arguments = c_function_arguments.replacen("*mut", "*mut *mut", 1); - c_function_arguments.push_str(", v_size: std::ffi::c_ulong"); + let v = c_function_argument_types.last_mut().unwrap(); + *v = v.replacen("*mut", "*mut *mut", 1); + c_function_arguments.push("v_size".to_string()); + c_function_argument_types.push("std::ffi::c_ulong".to_string()); c_argument_prep = "let v_size: usize = v_size as usize;".to_string(); let c_to_rust = format!("{c_argument_prep}\nlet {argument_name}: {t} = {argument_name}.custom_into_vec(v_size);"); @@ -303,6 +340,7 @@ fn get_c_types( "".to_string(), "".to_string(), c_function_arguments, + c_function_argument_types, c_to_rust, argument_name.to_string(), ) @@ -313,35 +351,40 @@ fn get_c_types( SupportedType::i64 => ( "".to_string(), "".to_string(), - format!("{argument_name}: std::ffi::c_longlong"), + vec![format!("{argument_name}")], + vec!["std::ffi::c_long".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::u64 => ( "".to_string(), "".to_string(), - format!("{argument_name}: std::ffi::c_ulonglong"), + vec![format!("{argument_name}")], + vec!["std::ffi::c_ulong".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::i32 => ( "".to_string(), "".to_string(), - format!("{argument_name}: std::ffi::c_long"), + vec![format!("{argument_name}")], + vec!["std::ffi::c_int".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::f64 => ( "".to_string(), "".to_string(), - format!("{argument_name}: std::ffi::c_double"), + vec![format!("{argument_name}")], + vec!["std::ffi::c_double".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::CustomType(s) => ( "".to_string(), "".to_string(), - format!("{argument_name}: *mut {s}C"), + vec![format!("{argument_name}")], + vec![format!("*mut {s}C")], c_to_rust, argument_name.to_string(), ), @@ -349,39 +392,6 @@ fn get_c_types( } } -// fn get_c_types(argument_name: &str, ty: &SupportedType) -> (String, Option) { -// match ty { -// SupportedType::Reference(r) => get_c_types(&r.ty), -// SupportedType::str | SupportedType::String => ("*mut std::ffi::c_char".to_string(), None), -// SupportedType::bool => ("bool".to_string(), None), -// SupportedType::Vec(v) => { -// let mut v = get_c_types(v); -// if !v.0.contains('*') { -// v.0 = format!("*mut {}", v.0); -// } -// if v.1.is_some() { -// panic!("Vec> not supported in c"); -// } -// (v.0, Some("std::ffi::c_ulong".to_string())) -// } -// SupportedType::HashMap(_) => panic!("HashMap arguments not supported in c"), -// SupportedType::Option(r) => { -// let mut t = get_c_types(r); -// if !t.0.contains('*') { -// t.0 = format!("*mut {}", t.0); -// } -// t -// } -// SupportedType::Tuple(_) => panic!("Tuple arguments not supported in c"), -// SupportedType::S => unreachable!(), -// SupportedType::i64 => ("std::ffi::c_longlong".to_string(), None), -// SupportedType::u64 => ("std::ffi::c_ulonglong".to_string(), None), -// SupportedType::i32 => ("std::ffi::c_long".to_string(), None), -// SupportedType::f64 => ("std::ffi::c_double".to_string(), None), -// SupportedType::CustomType(s) => (format!("*mut {s}"), None), -// } -// } - fn rust_type_to_c_type( wrapped_type_ident: &Ident, ty: &SupportedType, diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs index f17b4b63a..17d875fbe 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs @@ -1,30 +1,131 @@ -use proc_macro2::Ident; +use proc_macro2::{Group, Ident}; use quote::{format_ident, ToTokens}; use syn::{ - parse::Parser, + parenthesized, + parse::{Parse, Parser}, punctuated::Punctuated, + token, visit::{self, Visit}, - ImplItemFn, ReturnType, Token, Visibility, + Expr, ExprAssign, ImplItemFn, Lit, ReturnType, Token, Visibility, }; use crate::types::{GetOutputType, GetSupportedType, OutputType, SupportedType}; +#[derive(Debug, Copy, Clone, PartialEq, Eq)] +pub enum SupportedLanguage { + C, + Python, + JavaScript, +} + +impl From<&str> for SupportedLanguage { + fn from(value: &str) -> Self { + match value { + "C" => SupportedLanguage::C, + "Python" => SupportedLanguage::Python, + "JavaScript" => SupportedLanguage::JavaScript, + _ => panic!("Cannot convert {value} to SupportedLanguage"), + } + } +} + pub struct AttributeArgs { - pub args: Vec, + pub args: Vec, +} + +#[derive(Debug, Clone)] +struct Item { + method: String, + language_exceptions: Vec, +} + +#[derive(Debug)] +enum AdditionalAttribute { + Skip(SupportedLanguage), +} + +impl From<&ExprAssign> for AdditionalAttribute { + fn from(value: &ExprAssign) -> Self { + let a_ty = match &*value.left { + Expr::Path(p) => p.into_token_stream().to_string(), + _ => panic!( + r#"Getting left value - Expected additional attributes to look something like: #[alias_methods(new(skip = "c"))]"# + ), + }; + match a_ty.as_str() { + "skip" => { + let skip_method = match &*value.right { + Expr::Lit(l) => match &l.lit { + Lit::Str(l) => l.value().as_str().into(), + _ => { + panic!( + r#"Getting Lit value - Expected additional attributes to look something like: #[alias_methods(new(skip = "c"))]"# + ) + } + }, + _ => panic!( + r#"Getting Lit - Expected additional attributes to look something like: #[alias_methods(new(skip = "c"))]"# + ), + }; + AdditionalAttribute::Skip(skip_method) + } + _ => panic!("Currently only skip additional attributes are supported"), + } + } +} + +impl Parse for Item { + fn parse(input: syn::parse::ParseStream) -> syn::Result { + let method: Ident = input.parse()?; + let lookahead = input.lookahead1(); + if !lookahead.peek(token::Paren) { + Ok(Self { + method: method.to_string(), + language_exceptions: Vec::new(), + }) + } else { + let group: Group = input.parse()?; + let group_parser = Punctuated::::parse_terminated; + let parsed_group = group_parser + .parse(group.stream().into()) + .expect("Error parsing attributes for custom_methods macro"); + let a_atts: Vec = parsed_group + .into_pairs() + .map(|p| p.value().into()) + .collect(); + // Update this part as needed + let mut language_exceptions = Vec::new(); + for att in a_atts { + match att { + AdditionalAttribute::Skip(a) => language_exceptions.push(a), + } + } + Ok(Self { + method: method.to_string(), + language_exceptions, + }) + } + } } impl AttributeArgs { pub fn new(attributes: proc_macro::TokenStream) -> Self { - let attribute_parser = Punctuated::::parse_terminated; + let attribute_parser = Punctuated::::parse_terminated; let parsed_attributes = attribute_parser .parse(attributes) .expect("Error parsing attributes for custom_methods macro"); - let args: Vec = parsed_attributes + let args: Vec = parsed_attributes .into_pairs() - .map(|p| p.value().to_string()) + .map(|p| p.value().clone()) .collect(); Self { args } } + + pub fn should_alias_method(&self, method_name: &str, language: SupportedLanguage) -> bool { + self.args + .iter() + .any(|item| item.method == method_name && !item.language_exceptions.contains(&language)) + } } #[derive(Debug)] diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs index 6aa5cf667..76ccea7c6 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs @@ -3,7 +3,7 @@ use std::fs::OpenOptions; use std::io::{Read, Write}; use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; -use crate::common::{AttributeArgs, GetImplMethod}; +use crate::common::{AttributeArgs, GetImplMethod, SupportedLanguage}; use crate::types::{OutputType, SupportedType}; pub fn generate_javascript_alias(parsed: DeriveInput) -> proc_macro::TokenStream { @@ -112,7 +112,8 @@ pub fn generate_javascript_methods( match &item { syn::ImplItem::Fn(f) => { let method_name = f.sig.ident.to_string(); - if !attribute_args.args.contains(&method_name) { + if !attribute_args.should_alias_method(&method_name, SupportedLanguage::JavaScript) + { continue; } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs index a453bf14f..87d1c8c4f 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs @@ -3,7 +3,7 @@ use std::fs::OpenOptions; use std::io::{Read, Write}; use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; -use crate::common::{AttributeArgs, GetImplMethod}; +use crate::common::{AttributeArgs, GetImplMethod, SupportedLanguage}; use crate::types::{OutputType, SupportedType}; const STUB_TOP: &str = r#" @@ -192,7 +192,7 @@ pub fn generate_python_methods( match &item { syn::ImplItem::Fn(f) => { let method_name = f.sig.ident.to_string(); - if !attribute_args.args.contains(&method_name) { + if !attribute_args.should_alias_method(&method_name, SupportedLanguage::Python) { continue; } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs index ddb7f3650..c74edf4bd 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs @@ -9,7 +9,6 @@ pub unsafe trait CustomIntoVec { unsafe fn custom_into_vec(self, size: usize) -> Vec; } -// unsafe impl> CustomIntoVec for *mut T2 { unsafe impl CustomIntoVec for *mut *mut T2 where *mut T2: CustomInto, @@ -43,6 +42,18 @@ unsafe impl CustomInto<*mut std::ffi::c_char> for String { } } +unsafe impl CustomInto for *mut std::ffi::c_int { + unsafe fn custom_into(self) -> i32 { + *self + } +} + +unsafe impl CustomInto for *mut std::ffi::c_double { + unsafe fn custom_into(self) -> f64 { + *self + } +} + unsafe impl CustomInto> for *mut T2 where *mut T2: CustomInto, From f252d77f8fbde74011969ec8d033da9a3486d4a2 Mon Sep 17 00:00:00 2001 From: Silas Marvin <19626586+SilasMarvin@users.noreply.github.com> Date: Sat, 4 May 2024 10:09:36 -0700 Subject: [PATCH 03/19] Moved to c --- pgml-sdks/pgml/{go => c}/Makefile | 0 pgml-sdks/pgml/{ => c}/go/go.mod | 0 pgml-sdks/pgml/{ => c}/go/pgml.go | 0 pgml-sdks/pgml/{go => c}/test.c | 4 +- pgml-sdks/pgml/c/zig/build.zig | 78 +++++++++++++++++++++++++++++++ pgml-sdks/pgml/c/zig/src/main.zig | 37 +++++++++++++++ pgml-sdks/pgml/src/collection.rs | 1 + pgml-sdks/pgml/src/languages/c.rs | 1 - 8 files changed, 118 insertions(+), 3 deletions(-) rename pgml-sdks/pgml/{go => c}/Makefile (100%) rename pgml-sdks/pgml/{ => c}/go/go.mod (100%) rename pgml-sdks/pgml/{ => c}/go/pgml.go (100%) rename pgml-sdks/pgml/{go => c}/test.c (96%) create mode 100644 pgml-sdks/pgml/c/zig/build.zig create mode 100644 pgml-sdks/pgml/c/zig/src/main.zig diff --git a/pgml-sdks/pgml/go/Makefile b/pgml-sdks/pgml/c/Makefile similarity index 100% rename from pgml-sdks/pgml/go/Makefile rename to pgml-sdks/pgml/c/Makefile diff --git a/pgml-sdks/pgml/go/go.mod b/pgml-sdks/pgml/c/go/go.mod similarity index 100% rename from pgml-sdks/pgml/go/go.mod rename to pgml-sdks/pgml/c/go/go.mod diff --git a/pgml-sdks/pgml/go/pgml.go b/pgml-sdks/pgml/c/go/pgml.go similarity index 100% rename from pgml-sdks/pgml/go/pgml.go rename to pgml-sdks/pgml/c/go/pgml.go diff --git a/pgml-sdks/pgml/go/test.c b/pgml-sdks/pgml/c/test.c similarity index 96% rename from pgml-sdks/pgml/go/test.c rename to pgml-sdks/pgml/c/test.c index 3fc1d053f..a7538e705 100644 --- a/pgml-sdks/pgml/go/test.c +++ b/pgml-sdks/pgml/c/test.c @@ -21,7 +21,7 @@ int main() { // Print the documents printf("\n\nPrinting documents:\n"); int i; - for (i = 0; i < r_size; i++) { + for (i = 0; i < r_size; ++i) { printf("Document %u -> %s\n", i, documents[i]); } @@ -29,7 +29,7 @@ int main() { r_size = 0; char** results = CollectionC_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Test query!\"}}}, \"limit\": 5}", pipeline, &r_size); printf("\n\nPrinting results:\n"); - for (i = 0; i < r_size; i++) { + for (i = 0; i < r_size; ++i) { printf("Result %u -> %s\n", i, results[i]); } diff --git a/pgml-sdks/pgml/c/zig/build.zig b/pgml-sdks/pgml/c/zig/build.zig new file mode 100644 index 000000000..300954738 --- /dev/null +++ b/pgml-sdks/pgml/c/zig/build.zig @@ -0,0 +1,78 @@ +const std = @import("std"); + +// Although this function looks imperative, note that its job is to +// declaratively construct a build graph that will be executed by an external +// runner. +pub fn build(b: *std.Build) void { + // Standard target options allows the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + + const exe = b.addExecutable(.{ + .name = "zig", + // In this case the main source file is merely a path, however, in more + // complicated build scripts, this could be a generated file. + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + // Need to link our Rust pgml library + exe.addLibraryPath(.{ .path = "./../../target/debug" }); + exe.linkSystemLibrary("pgml"); + + // This declares intent for the executable to be installed into the + // standard location when the user invokes the "install" step (the default + // step when running `zig build`). + b.installArtifact(exe); + + // This *creates* a Run step in the build graph, to be executed when another + // step is evaluated that depends on it. The next line below will establish + // such a dependency. + const run_cmd = b.addRunArtifact(exe); + + // By making the run step depend on the install step, it will be run from the + // installation directory rather than directly from within the cache directory. + // This is not necessary, however, if the application depends on other installed + // files, this ensures they will be present and in the expected location. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // This creates a build step. It will be visible in the `zig build --help` menu, + // and can be selected like this: `zig build run` + // This will evaluate the `run` step rather than the default, which is "install". + const run_step = b.step("run", "Run the app"); + run_step.dependOn(&run_cmd.step); + + // Creates a step for unit testing. This only builds the test executable + // but does not run it. + const unit_tests = b.addTest(.{ + .root_source_file = .{ .path = "src/main.zig" }, + .target = target, + .optimize = optimize, + }); + + // Need to link our Rust pgml library + exe.addLibraryPath(.{ .path = "./../../target/debug" }); + unit_tests.linkSystemLibrary("pgml"); + + const run_unit_tests = b.addRunArtifact(unit_tests); + + // Similar to creating the run step earlier, this exposes a `test` step to + // the `zig build --help` menu, providing a way for the user to request + // running the unit tests. + const test_step = b.step("test", "Run unit tests"); + test_step.dependOn(&run_unit_tests.step); +} diff --git a/pgml-sdks/pgml/c/zig/src/main.zig b/pgml-sdks/pgml/c/zig/src/main.zig new file mode 100644 index 000000000..af806dda1 --- /dev/null +++ b/pgml-sdks/pgml/c/zig/src/main.zig @@ -0,0 +1,37 @@ +const pgml = @cImport({ + // See https://github.com/ziglang/zig/issues/515 + // @cDefine("_NO_CRT_STDIO_INLINE", "1"); + // @cInclude("./../pgml.h"); + @cInclude("./../pgml.h"); +}); + +pub fn main() void { + // Create the Collection and Pipeline + var collection: *pgml.CollectionC = pgml.CollectionC_new(@constCast("test_c"), null); + var pipeline: *pgml.PipelineC = pgml.PipelineC_new(@constCast("test_c"), @constCast("{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}")); + + // Add the Pipeline to the Collection + pgml.CollectionC_add_pipeline(collection, pipeline); + + // Upsert the documents + // const documents_to_upsert: [2][]const u8 = .{ "{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}" }; + // const c_documents_to_upsert: [*c][*c]pgml.JsonC = @as([*c][*c]pgml.JsonC, @ptrCast(@constCast(documents_to_upsert[0..2].ptr))); + // pgml.CollectionC_upsert_documents(collection, c_documents_to_upsert, 2, null); +} + +// test "simple test" { +// // Create the Collection and Pipeline +// var collection: *pgml.CollectionC = pgml.CollectionC_new(@constCast("test_c"), null); +// var pipeline: *pgml.PipelineC = pgml.PipelineC_new(@constCast("test_c"), @constCast("{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}")); + +// // Add the Pipeline to the Collection +// pgml.CollectionC_add_pipeline(collection, pipeline); + +// // Upsert the documents +// // char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; +// // CollectionC_upsert_documents(collection, documents_to_upsert, 2, NULL); + +// // // Retrieve the documents +// // unsigned long r_size = 0; +// // char** documents = CollectionC_get_documents(collection, NULL, &r_size); +// } diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index ba24420ab..0209f1c40 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -518,6 +518,7 @@ impl Collection { documents: Vec, args: Option, ) -> anyhow::Result<()> { + eprintln!("IN THE UPSERT DOCUMENTS FUNCTION"); // The flow for this function // 1. Create the collection if it does not exist // 2. Get all pipelines where ACTIVE = TRUE diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs index 78bafd858..a9c42cf5b 100644 --- a/pgml-sdks/pgml/src/languages/c.rs +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -9,7 +9,6 @@ pub type JsonC = std::ffi::c_char; unsafe impl CustomInto for *mut JsonC { unsafe fn custom_into(self) -> Json { let s = std::ffi::CStr::from_ptr(self).to_str().unwrap(); - eprintln!("\nABOU TO DECODE: {}\n", s); serde_json::from_str::(s).unwrap().into() } } From a67b104218605db78684ca96d80d0f304bf48ccd Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 13:35:07 -0500 Subject: [PATCH 04/19] remove #[repr(C)] --- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index d9eb18913..f823b9432 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -16,7 +16,6 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { let wrapped_type_ident = parsed.ident; let expanded = quote! { - #[repr(C)] #[cfg(feature = "c")] pub struct #name_ident { pub wrapped: *mut #wrapped_type_ident From 7dc364de979d3afe8f45b17ee39215512d3b6925 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 13:36:19 -0500 Subject: [PATCH 05/19] remove unused imports --- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index f823b9432..84a0fc70c 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -1,9 +1,6 @@ use proc_macro2::Ident; -use quote::{format_ident, quote, ToTokens}; -use std::{ - io::{Read, Write}, - str::FromStr, -}; +use quote::{format_ident, quote}; +use std::str::FromStr; use syn::{visit::Visit, DeriveInput, ItemImpl, Type}; use crate::{ From daf5510d99ef47bc45cb95a2bb39aa071d4c031e Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 14:14:15 -0500 Subject: [PATCH 06/19] only Box once; add destructor --- .../rust-bridge/rust-bridge-macros/src/c.rs | 27 ++++++++++++++----- 1 file changed, 20 insertions(+), 7 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 84a0fc70c..68692078a 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -15,7 +15,7 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { let expanded = quote! { #[cfg(feature = "c")] pub struct #name_ident { - pub wrapped: *mut #wrapped_type_ident + pub wrapped: #wrapped_type_ident } #[cfg(feature = "c")] @@ -23,7 +23,7 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { unsafe fn custom_into(self) -> *mut #name_ident { Box::into_raw(Box::new( #name_ident { - wrapped: Box::into_raw(Box::new(self)) + wrapped: self } )) } @@ -32,9 +32,8 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { #[cfg(feature = "c")] unsafe impl rust_bridge::c::CustomInto<#wrapped_type_ident> for *mut #name_ident { unsafe fn custom_into(self) -> #wrapped_type_ident { - let c = Box::leak(Box::from_raw(self)); - let s = Box::leak(Box::from_raw(c.wrapped)); - s.clone() + let c = Box::from_raw(self); + c.wrapped } } @@ -42,7 +41,7 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { unsafe impl rust_bridge::c::CustomInto<&'static mut #wrapped_type_ident> for *mut #name_ident { unsafe fn custom_into(self) -> &'static mut #wrapped_type_ident { let c = Box::leak(Box::from_raw(self)); - Box::leak(Box::from_raw(c.wrapped)) + &mut c.wrapped } } @@ -50,11 +49,13 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { unsafe impl rust_bridge::c::CustomInto<&'static #wrapped_type_ident> for *mut #name_ident { unsafe fn custom_into(self) -> &'static #wrapped_type_ident { let c = Box::leak(Box::from_raw(self)); - &*Box::leak(Box::from_raw(c.wrapped)) + &c.wrapped } } }; + eprintln!("\n\n{expanded}\n\n"); + proc_macro::TokenStream::from(expanded) } @@ -173,6 +174,18 @@ pub fn generate_c_methods( methods.push(method); } + let method_name = format_ident!("{name_ident}_delete"); + let destructor = quote! { + #[cfg(feature = "c")] + #[no_mangle] + pub unsafe extern "C" fn #method_name(ptr: *mut #name_ident) { + drop(Box::from_raw(ptr)) + } + }; + + eprintln!("\n\n{destructor}\n\n"); + methods.push(destructor); + proc_macro::TokenStream::from(quote! { #(#methods)* }) From eb6b83b95c9df33ca1ea62dab63214fec93f41b8 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 15:15:36 -0500 Subject: [PATCH 07/19] lowercase function names; prefix pgml --- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 68692078a..139c6d476 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -99,7 +99,11 @@ pub fn generate_c_methods( rust_function_arguments, ) = get_method_arguments(&wrapped_type_ident, &name_ident, &method); - let method_name = format_ident!("{}_{}", name_ident, method_ident); + let method_name = format_ident!( + "pgml_{}_{}", + name_ident.to_string().to_lowercase(), + method_ident + ); let (return_part, augment_r_size) = rust_output_to_c_output(&wrapped_type_ident, &method.output_type); @@ -174,7 +178,7 @@ pub fn generate_c_methods( methods.push(method); } - let method_name = format_ident!("{name_ident}_delete"); + let method_name = format_ident!("pgml_{}_delete", name_ident.to_string().to_lowercase()); let destructor = quote! { #[cfg(feature = "c")] #[no_mangle] From 81920ce45941fd97848915643073761edfabf585 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 21:08:58 -0500 Subject: [PATCH 08/19] fix clippy lints --- pgml-sdks/pgml/src/builtins.rs | 2 +- pgml-sdks/pgml/src/transformer_pipeline.rs | 2 +- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 1 - pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs | 3 +-- pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs | 1 - pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs | 1 - pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs | 6 +++--- pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs | 2 -- 8 files changed, 6 insertions(+), 12 deletions(-) diff --git a/pgml-sdks/pgml/src/builtins.rs b/pgml-sdks/pgml/src/builtins.rs index 638e63353..37923efb1 100644 --- a/pgml-sdks/pgml/src/builtins.rs +++ b/pgml-sdks/pgml/src/builtins.rs @@ -87,7 +87,7 @@ impl Builtins { query.bind(task.0) }; let results = query.bind(inputs).bind(args).fetch_all(&pool).await?; - let results = results.get(0).unwrap().get::(0); + let results = results.first().unwrap().get::(0); Ok(Json(results)) } } diff --git a/pgml-sdks/pgml/src/transformer_pipeline.rs b/pgml-sdks/pgml/src/transformer_pipeline.rs index 7210aa6e2..860c5543c 100644 --- a/pgml-sdks/pgml/src/transformer_pipeline.rs +++ b/pgml-sdks/pgml/src/transformer_pipeline.rs @@ -88,7 +88,7 @@ impl TransformerPipeline { .fetch_all(&pool) .await? }; - let results = results.get(0).unwrap().get::(0); + let results = results.first().unwrap().get::(0); Ok(Json(results)) } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 139c6d476..206217234 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -401,7 +401,6 @@ fn get_c_types( c_to_rust, argument_name.to_string(), ), - _ => todo!(), } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs index 17d875fbe..dc9ec066b 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/common.rs @@ -1,7 +1,6 @@ use proc_macro2::{Group, Ident}; use quote::{format_ident, ToTokens}; use syn::{ - parenthesized, parse::{Parse, Parser}, punctuated::Punctuated, token, @@ -30,7 +29,7 @@ impl From<&str> for SupportedLanguage { } pub struct AttributeArgs { - pub args: Vec, + args: Vec, } #[derive(Debug, Clone)] diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs index 76ccea7c6..41b1396d9 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/javascript.rs @@ -301,7 +301,6 @@ pub fn generate_javascript_methods( if let Ok(path) = path { let mut file = OpenOptions::new() .create(true) - .write(true) .append(true) .read(true) .open(path) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs index 87d1c8c4f..835303f12 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/python.rs @@ -373,7 +373,6 @@ pub fn generate_python_methods( if let Ok(path) = path { let mut file = OpenOptions::new() .create(true) - .write(true) .append(true) .read(true) .open(path) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs index 99947b1da..6629995a3 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/types.rs @@ -37,9 +37,9 @@ pub enum SupportedType { CustomType(String), } -impl ToString for SupportedType { - fn to_string(&self) -> String { - self.to_language_string(&None) +impl std::fmt::Display for SupportedType { + fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { + write!(f, "{}", self.to_language_string(&None)) } } diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs index c74edf4bd..76cc80ee1 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs @@ -1,5 +1,3 @@ -use std::collections::HashMap; - /// Very similar to the `Into` trait, but we can implement it on foreign types. pub unsafe trait CustomInto { unsafe fn custom_into(self) -> T; From fc46c1a4e7803c76dafafbaafd6f6ff37709272d Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 21:16:30 -0500 Subject: [PATCH 09/19] more clippy --- pgml-sdks/pgml/Cargo.toml | 2 +- pgml-sdks/pgml/src/languages/c.rs | 27 +++++++++------------------ 2 files changed, 10 insertions(+), 19 deletions(-) diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml index 89c2f6275..a4e3af03d 100644 --- a/pgml-sdks/pgml/Cargo.toml +++ b/pgml-sdks/pgml/Cargo.toml @@ -50,7 +50,7 @@ parking_lot = "0.12.1" once_cell = "1.19.0" [features] -default = [] +default = ["c"] python = ["dep:pyo3", "dep:pyo3-asyncio"] javascript = ["dep:neon"] c = [] diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs index a9c42cf5b..1538bd369 100644 --- a/pgml-sdks/pgml/src/languages/c.rs +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -1,5 +1,4 @@ -use crate::types::{DateTime, GeneralJsonAsyncIterator, GeneralJsonIterator, Json}; -use futures::pin_mut; +use crate::types::{GeneralJsonAsyncIterator, GeneralJsonIterator, Json}; use futures::stream::Stream; use rust_bridge::c::CustomInto; use std::pin::Pin; @@ -36,12 +35,8 @@ unsafe impl CustomInto<*mut GeneralJsonIteratorC> for GeneralJsonIterator { #[no_mangle] pub unsafe extern "C" fn GeneralJsonIteratorC_done(iterator: *mut GeneralJsonIteratorC) -> bool { - let mut c = Box::leak(Box::from_raw(iterator)); - if let Some(_) = (*c.wrapped).peek() { - false - } else { - true - } + let c = Box::leak(Box::from_raw(iterator)); + (*c.wrapped).peek().is_none() } #[no_mangle] @@ -53,11 +48,12 @@ pub unsafe extern "C" fn GeneralJsonIteratorC_next( (*b).next().unwrap().unwrap().custom_into() } +type PeekableStream = + futures::stream::Peekable> + Send>>>; + #[repr(C)] pub struct GeneralJsonAsyncIteratorC { - pub wrapped: *mut futures::stream::Peekable< - Pin> + Send>>, - >, + pub wrapped: *mut PeekableStream, } unsafe impl CustomInto<*mut GeneralJsonAsyncIteratorC> for GeneralJsonAsyncIterator { @@ -74,16 +70,11 @@ pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_done( iterator: *mut GeneralJsonAsyncIteratorC, ) -> bool { crate::get_or_set_runtime().block_on(async move { - use futures::stream::StreamExt; let c = Box::leak(Box::from_raw(iterator)); let s = Box::leak(Box::from_raw(c.wrapped)); let mut s = Pin::new(s); let res = s.as_mut().peek_mut().await; - if let Some(res) = res { - false - } else { - true - } + res.is_none() }) } @@ -93,7 +84,7 @@ pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_next( ) -> *mut JsonC { crate::get_or_set_runtime().block_on(async move { use futures::stream::StreamExt; - let mut c = Box::leak(Box::from_raw(iterator)); + let c = Box::leak(Box::from_raw(iterator)); (*c.wrapped).next().await.unwrap().unwrap().custom_into() }) } From efbceed7571db3b55b9fe9abf0590373a52e330c Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Tue, 21 May 2024 21:22:10 -0500 Subject: [PATCH 10/19] even more clippy lints --- .../rust-bridge/rust-bridge-macros/src/c.rs | 73 +++---------------- 1 file changed, 10 insertions(+), 63 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 206217234..d4af6a5c2 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -91,13 +91,8 @@ pub fn generate_c_methods( } let method_ident = method.method_ident.clone(); - let ( - go_function_arguments, - go_arguments_prep, - mut c_function_arguments, - c_argument_prep, - rust_function_arguments, - ) = get_method_arguments(&wrapped_type_ident, &name_ident, &method); + let (mut c_function_arguments, c_argument_prep, rust_function_arguments) = + get_method_arguments(&wrapped_type_ident, &name_ident, &method); let method_name = format_ident!( "pgml_{}_{}", @@ -203,11 +198,7 @@ fn get_method_arguments( proc_macro2::TokenStream, proc_macro2::TokenStream, proc_macro2::TokenStream, - proc_macro2::TokenStream, - proc_macro2::TokenStream, ) { - let mut go_function_arguments = Vec::new(); - let mut go_arguments_prep = Vec::new(); let mut c_function_arguments = Vec::new(); let mut c_argument_prep = Vec::new(); let mut rust_function_arguments = Vec::new(); @@ -227,8 +218,6 @@ fn get_method_arguments( for (argument_name, argument_type) in &method.method_arguments { let argument_name_without_mut = argument_name.replacen("mut", "", 1); let ( - go_function_arguments_, - go_arguments_prep_, c_function_arguments_, c_function_argument_types, c_argument_prep_, @@ -242,16 +231,12 @@ fn get_method_arguments( .collect::>() .join(","); - go_function_arguments.push(go_function_arguments_); - go_arguments_prep.push(go_arguments_prep_); c_function_arguments.push(c_function_arguments_); c_argument_prep.push(c_argument_prep_); rust_function_arguments.push(rust_function_arguments_); } ( - proc_macro2::TokenStream::from_str(&go_function_arguments.join("\n")).unwrap(), - proc_macro2::TokenStream::from_str(&go_arguments_prep.join("\n")).unwrap(), proc_macro2::TokenStream::from_str(&c_function_arguments.join(",")).unwrap(), proc_macro2::TokenStream::from_str(&c_argument_prep.join("\n")).unwrap(), proc_macro2::TokenStream::from_str(&rust_function_arguments.join(",")).unwrap(), @@ -269,22 +254,14 @@ fn get_method_arguments( fn get_c_types( argument_name: &str, ty: &SupportedType, -) -> (String, String, Vec, Vec, String, String) { +) -> (Vec, Vec, String, String) { let t = ty.to_language_string(&None); let c_to_rust = format!("let {argument_name}: {t} = {argument_name}.custom_into();"); match ty { SupportedType::Reference(r) => { - let ( - go_function_arguments, - go_argument_prep, - c_function_arguments, - c_function_argument_types, - c_argument_prep, - rust_function_arguments, - ) = get_c_types(argument_name, &r.ty); + let (c_function_arguments, c_function_argument_types, _, _) = + get_c_types(argument_name, &r.ty); ( - "".to_string(), - "".to_string(), c_function_arguments, c_function_argument_types, c_to_rust, @@ -292,22 +269,14 @@ fn get_c_types( ) } SupportedType::str | SupportedType::String => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["*mut std::ffi::c_char".to_string()], c_to_rust, argument_name.to_string(), ), SupportedType::Option(r) => { - let ( - go_function_arguments, - go_argument_prep, - mut c_function_arguments, - mut c_function_argument_types, - c_argument_prep, - rust_function_arguments, - ) = get_c_types(argument_name, &r); + let (c_function_arguments, mut c_function_argument_types, _, _) = + get_c_types(argument_name, r); let v = c_function_argument_types.last_mut().unwrap(); if !v.starts_with('*') { @@ -315,8 +284,6 @@ fn get_c_types( } ( - "".to_string(), - "".to_string(), c_function_arguments, c_function_argument_types, c_to_rust, @@ -324,34 +291,24 @@ fn get_c_types( ) } SupportedType::bool => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["bool".to_string()], "".to_string(), argument_name.to_string(), ), SupportedType::Vec(v) => { - let ( - go_function_arguments, - go_argument_prep, - mut c_function_arguments, - mut c_function_argument_types, - mut c_argument_prep, - rust_function_arguments, - ) = get_c_types(argument_name, v); + let (mut c_function_arguments, mut c_function_argument_types, _, _) = + get_c_types(argument_name, v); let v = c_function_argument_types.last_mut().unwrap(); *v = v.replacen("*mut", "*mut *mut", 1); c_function_arguments.push("v_size".to_string()); c_function_argument_types.push("std::ffi::c_ulong".to_string()); - c_argument_prep = "let v_size: usize = v_size as usize;".to_string(); + let c_argument_prep = "let v_size: usize = v_size as usize;".to_string(); let c_to_rust = format!("{c_argument_prep}\nlet {argument_name}: {t} = {argument_name}.custom_into_vec(v_size);"); ( - "".to_string(), - "".to_string(), c_function_arguments, c_function_argument_types, c_to_rust, @@ -362,40 +319,30 @@ fn get_c_types( SupportedType::Tuple(_) => panic!("Tuple arguments not supported in c"), SupportedType::S => unreachable!(), SupportedType::i64 => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["std::ffi::c_long".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::u64 => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["std::ffi::c_ulong".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::i32 => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["std::ffi::c_int".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::f64 => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec!["std::ffi::c_double".to_string()], format!("let {argument_name}: {t} = {argument_name} as {t};"), argument_name.to_string(), ), SupportedType::CustomType(s) => ( - "".to_string(), - "".to_string(), vec![format!("{argument_name}")], vec![format!("*mut {s}C")], c_to_rust, From 2b181deca72b9d850fe31c1173f98be116343a49 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:02:35 -0500 Subject: [PATCH 11/19] tweak C Makefile and example --- pgml-sdks/pgml/Cargo.toml | 5 +- pgml-sdks/pgml/c/Makefile | 34 +++------ pgml-sdks/pgml/c/cbindgen.toml | 117 ++++++++++++++++++++++++++++++ pgml-sdks/pgml/c/example/main.c | 45 ++++++++++++ pgml-sdks/pgml/c/go/go.mod | 3 - pgml-sdks/pgml/c/go/pgml.go | 23 ------ pgml-sdks/pgml/c/test.c | 45 ------------ pgml-sdks/pgml/c/zig/build.zig | 78 -------------------- pgml-sdks/pgml/c/zig/src/main.zig | 37 ---------- pgml-sdks/pgml/src/collection.rs | 1 - 10 files changed, 173 insertions(+), 215 deletions(-) create mode 100644 pgml-sdks/pgml/c/cbindgen.toml create mode 100644 pgml-sdks/pgml/c/example/main.c delete mode 100644 pgml-sdks/pgml/c/go/go.mod delete mode 100644 pgml-sdks/pgml/c/go/pgml.go delete mode 100644 pgml-sdks/pgml/c/test.c delete mode 100644 pgml-sdks/pgml/c/zig/build.zig delete mode 100644 pgml-sdks/pgml/c/zig/src/main.zig diff --git a/pgml-sdks/pgml/Cargo.toml b/pgml-sdks/pgml/Cargo.toml index a4e3af03d..b2d1c7c5f 100644 --- a/pgml-sdks/pgml/Cargo.toml +++ b/pgml-sdks/pgml/Cargo.toml @@ -13,9 +13,6 @@ keywords = ["postgres", "machine learning", "vector databases", "embeddings"] name = "pgml" crate-type = ["lib", "cdylib"] -[rust-analyzer.checkOnSave] -extraArgs = ["--target-dir", "/path/to/proect/target/check"] - [dependencies] rust_bridge = {path = "../rust-bridge/rust-bridge", version = "0.1.0"} sqlx = { version = "0.7.3", features = [ "runtime-tokio-rustls", "postgres", "json", "time", "uuid"] } @@ -50,7 +47,7 @@ parking_lot = "0.12.1" once_cell = "1.19.0" [features] -default = ["c"] +default = [] python = ["dep:pyo3", "dep:pyo3-asyncio"] javascript = ["dep:neon"] c = [] diff --git a/pgml-sdks/pgml/c/Makefile b/pgml-sdks/pgml/c/Makefile index a8d614023..b7a0724c0 100644 --- a/pgml-sdks/pgml/c/Makefile +++ b/pgml-sdks/pgml/c/Makefile @@ -1,31 +1,17 @@ BINARY_NAME=pgml +HEADER=include/${BINARY_NAME}.h +PGML_LIB=../target/debug/ -build: - cargo build --features c - cargo expand --features c > expanded.rs - cbindgen --lang C -o pgml.h expanded.rs - # GOARCH=amd64 GOOS=darwin go build -o ${BINARY_NAME}-darwin main.go - GOARCH=amd64 GOOS=linux go build -o ${BINARY_NAME}-linux pgml.go - # GOARCH=amd64 GOOS=windows go build -o ${BINARY_NAME}-windows main.go +bindings: + cargo b --features c + RUST_TOOLCHAIN=$(dirname $(rustup +nightly which rustc)) cbindgen --config cbindgen.toml --output ${HEADER} ../ -build_test: - cargo build --features c - cargo expand --features c > expanded.rs - cbindgen --lang C -o pgml.h expanded.rs - gcc test.c -o test -l pgml -L ./../target/debug - -test: build_test - LD_LIBRARY_PATH=./../target/debug ./test - -test_c: - gcc test.c -o test -l pgml -L ./../target/debug - LD_LIBRARY_PATH=./../target/debug ./test +build: bindings + gcc -Wall -o build/example -Iinclude/ -L${PGML_LIB} -l ${BINARY_NAME} example/main.c run: build - LD_LIBRARY_PATH=./../target/debug ./${BINARY_NAME}-linux + LD_LIBRARY_PATH=${PGML_LIB} ./build/example clean: - go clean - # rm ${BINARY_NAME}-darwin - rm ${BINARY_NAME}-linux - # rm ${BINARY_NAME}-windows + rm ${HEADER} + diff --git a/pgml-sdks/pgml/c/cbindgen.toml b/pgml-sdks/pgml/c/cbindgen.toml new file mode 100644 index 000000000..4efcf2453 --- /dev/null +++ b/pgml-sdks/pgml/c/cbindgen.toml @@ -0,0 +1,117 @@ +language = "C" + + +############## Options for Wrapping the Contents of the Header ################# + +# header = "/* Text to put at the beginning of the generated file. Probably a license. */" +# trailer = "/* Text to put at the end of the generated file */" +# include_guard = "my_bindings_h" +# pragma_once = true +# autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" +include_version = false +# namespace = "my_namespace" +namespaces = [] +using_namespaces = [] +sys_includes = [] +includes = [] +no_includes = false +# cpp_compat = true +after_includes = "" + + +############################ Code Style Options ################################ + +braces = "SameLine" +line_length = 100 +tab_width = 2 +documentation = true +documentation_style = "auto" +documentation_length = "full" +line_endings = "LF" # also "CR", "CRLF", "Native" + + +############################# Codegen Options ################################## + +style = "both" +sort_by = "Name" # default for `fn.sort_by` and `const.sort_by` +usize_is_size_t = true + +[defines] +# "target_os = freebsd" = "DEFINE_FREEBSD" +# "feature = serde" = "DEFINE_SERDE" + +[export] +include = [] +exclude = [] +# prefix = "CAPI_" +item_types = [] +renaming_overrides_prefixing = false + +[export.rename] + +[export.body] + +[export.mangle] + +[fn] +rename_args = "None" +# must_use = "MUST_USE_FUNC" +# deprecated = "DEPRECATED_FUNC" +# deprecated_with_note = "DEPRECATED_FUNC_WITH_NOTE" +# no_return = "NO_RETURN" +# prefix = "START_FUNC" +# postfix = "END_FUNC" +args = "auto" +sort_by = "Name" + +[struct] +rename_fields = "None" +# must_use = "MUST_USE_STRUCT" +# deprecated = "DEPRECATED_STRUCT" +# deprecated_with_note = "DEPRECATED_STRUCT_WITH_NOTE" +derive_constructor = false +derive_eq = false +derive_neq = false +derive_lt = false +derive_lte = false +derive_gt = false +derive_gte = false + +[enum] +rename_variants = "None" +# must_use = "MUST_USE_ENUM" +# deprecated = "DEPRECATED_ENUM" +# deprecated_with_note = "DEPRECATED_ENUM_WITH_NOTE" +add_sentinel = false +prefix_with_name = false +derive_helper_methods = false +derive_const_casts = false +derive_mut_casts = false +# cast_assert_name = "ASSERT" +derive_tagged_enum_destructor = false +derive_tagged_enum_copy_constructor = false +enum_class = true +private_default_tagged_enum_constructor = false + +[const] +allow_static_const = true +allow_constexpr = false +sort_by = "Name" + +[macro_expansion] +bitflags = false + +############## Options for How Your Rust library Should Be Parsed ############## + +[parse] +parse_deps = false +# include = [] +exclude = [] +clean = false +extra_bindings = [] + +[parse.expand] +crates = ["pgml"] +all_features = false +default_features = true +features = ["c"] diff --git a/pgml-sdks/pgml/c/example/main.c b/pgml-sdks/pgml/c/example/main.c new file mode 100644 index 000000000..092547f2c --- /dev/null +++ b/pgml-sdks/pgml/c/example/main.c @@ -0,0 +1,45 @@ +#include + +#include "pgml.h" + +int main() { + // Create the Collection and Pipeline + CollectionC * collection = pgml_collectionc_new("test_c", NULL); + PipelineC * pipeline = pgml_pipelinec_new("test_c", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}"); + + // Add the Pipeline to the Collection + pgml_collectionc_add_pipeline(collection, pipeline); + + // Upsert the documents + char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; + pgml_collectionc_upsert_documents(collection, documents_to_upsert, 2, NULL); + + // Retrieve the documents + unsigned long r_size = 0; + char** documents = pgml_collectionc_get_documents(collection, NULL, &r_size); + + // Print the documents + printf("\n\nPrinting documents:\n"); + int i; + for (i = 0; i < r_size; ++i) { + printf("Document %u -> %s\n", i, documents[i]); + } + + // Search over the documents + r_size = 0; + char** results = pgml_collectionc_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Test query!\"}}}, \"limit\": 5}", pipeline, &r_size); + printf("\n\nPrinting results:\n"); + for (i = 0; i < r_size; ++i) { + printf("Result %u -> %s\n", i, results[i]); + } + + // Test the TransformerPipeline + TransformerPipelineC * t_pipeline = pgml_transformerpipelinec_new("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", NULL, NULL); + GeneralJsonAsyncIteratorC * t_pipeline_iter = pgml_transformerpipelinec_transform_stream(t_pipeline, "\"AI is going to\"", "{\"max_new_tokens\": 100}", NULL); + while (!GeneralJsonAsyncIteratorC_done(t_pipeline_iter)) { + char * res = GeneralJsonAsyncIteratorC_next(t_pipeline_iter); + printf("Token -> %s\n", res); + } + + return 0; +} diff --git a/pgml-sdks/pgml/c/go/go.mod b/pgml-sdks/pgml/c/go/go.mod deleted file mode 100644 index 6b1511192..000000000 --- a/pgml-sdks/pgml/c/go/go.mod +++ /dev/null @@ -1,3 +0,0 @@ -module pgml - -go 1.22.2 diff --git a/pgml-sdks/pgml/c/go/pgml.go b/pgml-sdks/pgml/c/go/pgml.go deleted file mode 100644 index e22b91dd6..000000000 --- a/pgml-sdks/pgml/c/go/pgml.go +++ /dev/null @@ -1,23 +0,0 @@ -package main - -/* -#cgo LDFLAGS: -l pgml -L ./../target/debug -#include "pgml.h" -*/ -import "C" - -import ( - "unsafe" -) - -type Collection struct { - collection *C.CollectionC -} - -func main() { - c_string_p := C.CString("Test CString") - defer C.free(unsafe.Pointer(c_string_p)) - collection := C.new_collection(c_string_p) - C.test_collection(collection) - defer C.free_collection(collection) -} diff --git a/pgml-sdks/pgml/c/test.c b/pgml-sdks/pgml/c/test.c deleted file mode 100644 index a7538e705..000000000 --- a/pgml-sdks/pgml/c/test.c +++ /dev/null @@ -1,45 +0,0 @@ -#include - -#include "pgml.h" - -int main() { - // Create the Collection and Pipeline - CollectionC * collection = CollectionC_new("test_c", NULL); - PipelineC * pipeline = PipelineC_new("test_c", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}"); - - // Add the Pipeline to the Collection - CollectionC_add_pipeline(collection, pipeline); - - // Upsert the documents - char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; - CollectionC_upsert_documents(collection, documents_to_upsert, 2, NULL); - - // Retrieve the documents - unsigned long r_size = 0; - char** documents = CollectionC_get_documents(collection, NULL, &r_size); - - // Print the documents - printf("\n\nPrinting documents:\n"); - int i; - for (i = 0; i < r_size; ++i) { - printf("Document %u -> %s\n", i, documents[i]); - } - - // Search over the documents - r_size = 0; - char** results = CollectionC_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Test query!\"}}}, \"limit\": 5}", pipeline, &r_size); - printf("\n\nPrinting results:\n"); - for (i = 0; i < r_size; ++i) { - printf("Result %u -> %s\n", i, results[i]); - } - - // Test the TransformerPipeline - TransformerPipelineC * t_pipeline = TransformerPipelineC_new("text-generation", "TheBloke/zephyr-7B-beta-GPTQ", "{\"revision\": \"main\"}", "postgres://pg:ml@sql.cloud.postgresml.org:38042/pgml"); - GeneralJsonAsyncIteratorC * t_pipeline_iter = TransformerPipelineC_transform_stream(t_pipeline, "\"AI is going to\"", "{\"max_new_tokens\": 100}", NULL); - while (!GeneralJsonAsyncIteratorC_done(t_pipeline_iter)) { - char * res = GeneralJsonAsyncIteratorC_next(t_pipeline_iter); - printf("Token -> %s\n", res); - } - - return 0; -} diff --git a/pgml-sdks/pgml/c/zig/build.zig b/pgml-sdks/pgml/c/zig/build.zig deleted file mode 100644 index 300954738..000000000 --- a/pgml-sdks/pgml/c/zig/build.zig +++ /dev/null @@ -1,78 +0,0 @@ -const std = @import("std"); - -// Although this function looks imperative, note that its job is to -// declaratively construct a build graph that will be executed by an external -// runner. -pub fn build(b: *std.Build) void { - // Standard target options allows the person running `zig build` to choose - // what target to build for. Here we do not override the defaults, which - // means any target is allowed, and the default is native. Other options - // for restricting supported target set are available. - const target = b.standardTargetOptions(.{}); - - // Standard optimization options allow the person running `zig build` to select - // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not - // set a preferred release mode, allowing the user to decide how to optimize. - const optimize = b.standardOptimizeOption(.{}); - - const exe = b.addExecutable(.{ - .name = "zig", - // In this case the main source file is merely a path, however, in more - // complicated build scripts, this could be a generated file. - .root_source_file = .{ .path = "src/main.zig" }, - .target = target, - .optimize = optimize, - }); - - // Need to link our Rust pgml library - exe.addLibraryPath(.{ .path = "./../../target/debug" }); - exe.linkSystemLibrary("pgml"); - - // This declares intent for the executable to be installed into the - // standard location when the user invokes the "install" step (the default - // step when running `zig build`). - b.installArtifact(exe); - - // This *creates* a Run step in the build graph, to be executed when another - // step is evaluated that depends on it. The next line below will establish - // such a dependency. - const run_cmd = b.addRunArtifact(exe); - - // By making the run step depend on the install step, it will be run from the - // installation directory rather than directly from within the cache directory. - // This is not necessary, however, if the application depends on other installed - // files, this ensures they will be present and in the expected location. - run_cmd.step.dependOn(b.getInstallStep()); - - // This allows the user to pass arguments to the application in the build - // command itself, like this: `zig build run -- arg1 arg2 etc` - if (b.args) |args| { - run_cmd.addArgs(args); - } - - // This creates a build step. It will be visible in the `zig build --help` menu, - // and can be selected like this: `zig build run` - // This will evaluate the `run` step rather than the default, which is "install". - const run_step = b.step("run", "Run the app"); - run_step.dependOn(&run_cmd.step); - - // Creates a step for unit testing. This only builds the test executable - // but does not run it. - const unit_tests = b.addTest(.{ - .root_source_file = .{ .path = "src/main.zig" }, - .target = target, - .optimize = optimize, - }); - - // Need to link our Rust pgml library - exe.addLibraryPath(.{ .path = "./../../target/debug" }); - unit_tests.linkSystemLibrary("pgml"); - - const run_unit_tests = b.addRunArtifact(unit_tests); - - // Similar to creating the run step earlier, this exposes a `test` step to - // the `zig build --help` menu, providing a way for the user to request - // running the unit tests. - const test_step = b.step("test", "Run unit tests"); - test_step.dependOn(&run_unit_tests.step); -} diff --git a/pgml-sdks/pgml/c/zig/src/main.zig b/pgml-sdks/pgml/c/zig/src/main.zig deleted file mode 100644 index af806dda1..000000000 --- a/pgml-sdks/pgml/c/zig/src/main.zig +++ /dev/null @@ -1,37 +0,0 @@ -const pgml = @cImport({ - // See https://github.com/ziglang/zig/issues/515 - // @cDefine("_NO_CRT_STDIO_INLINE", "1"); - // @cInclude("./../pgml.h"); - @cInclude("./../pgml.h"); -}); - -pub fn main() void { - // Create the Collection and Pipeline - var collection: *pgml.CollectionC = pgml.CollectionC_new(@constCast("test_c"), null); - var pipeline: *pgml.PipelineC = pgml.PipelineC_new(@constCast("test_c"), @constCast("{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}")); - - // Add the Pipeline to the Collection - pgml.CollectionC_add_pipeline(collection, pipeline); - - // Upsert the documents - // const documents_to_upsert: [2][]const u8 = .{ "{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}" }; - // const c_documents_to_upsert: [*c][*c]pgml.JsonC = @as([*c][*c]pgml.JsonC, @ptrCast(@constCast(documents_to_upsert[0..2].ptr))); - // pgml.CollectionC_upsert_documents(collection, c_documents_to_upsert, 2, null); -} - -// test "simple test" { -// // Create the Collection and Pipeline -// var collection: *pgml.CollectionC = pgml.CollectionC_new(@constCast("test_c"), null); -// var pipeline: *pgml.PipelineC = pgml.PipelineC_new(@constCast("test_c"), @constCast("{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"intfloat/e5-small\"}}}")); - -// // Add the Pipeline to the Collection -// pgml.CollectionC_add_pipeline(collection, pipeline); - -// // Upsert the documents -// // char * documents_to_upsert[2] = {"{\"id\": \"doc1\", \"text\": \"test1\"}", "{\"id\": \"doc2\", \"text\": \"test2\"}"}; -// // CollectionC_upsert_documents(collection, documents_to_upsert, 2, NULL); - -// // // Retrieve the documents -// // unsigned long r_size = 0; -// // char** documents = CollectionC_get_documents(collection, NULL, &r_size); -// } diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index 0209f1c40..ba24420ab 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -518,7 +518,6 @@ impl Collection { documents: Vec, args: Option, ) -> anyhow::Result<()> { - eprintln!("IN THE UPSERT DOCUMENTS FUNCTION"); // The flow for this function // 1. Create the collection if it does not exist // 2. Get all pipelines where ACTIVE = TRUE From 43c6773bea51cea71bad522b76ff6f7c5aa0fc5d Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:10:50 -0500 Subject: [PATCH 12/19] renaming --- pgml-sdks/pgml/c/Makefile | 7 ++++--- pgml-sdks/pgml/c/{example/main.c => example.c} | 0 2 files changed, 4 insertions(+), 3 deletions(-) rename pgml-sdks/pgml/c/{example/main.c => example.c} (100%) diff --git a/pgml-sdks/pgml/c/Makefile b/pgml-sdks/pgml/c/Makefile index b7a0724c0..3890e0661 100644 --- a/pgml-sdks/pgml/c/Makefile +++ b/pgml-sdks/pgml/c/Makefile @@ -1,5 +1,5 @@ BINARY_NAME=pgml -HEADER=include/${BINARY_NAME}.h +HEADER=${BINARY_NAME}.h PGML_LIB=../target/debug/ bindings: @@ -7,11 +7,12 @@ bindings: RUST_TOOLCHAIN=$(dirname $(rustup +nightly which rustc)) cbindgen --config cbindgen.toml --output ${HEADER} ../ build: bindings - gcc -Wall -o build/example -Iinclude/ -L${PGML_LIB} -l ${BINARY_NAME} example/main.c + gcc -Wall -o ./example -Iinclude/ -L${PGML_LIB} -l ${BINARY_NAME} example.c run: build - LD_LIBRARY_PATH=${PGML_LIB} ./build/example + LD_LIBRARY_PATH=${PGML_LIB} ./example clean: rm ${HEADER} + rm -rf ./example diff --git a/pgml-sdks/pgml/c/example/main.c b/pgml-sdks/pgml/c/example.c similarity index 100% rename from pgml-sdks/pgml/c/example/main.c rename to pgml-sdks/pgml/c/example.c From e50294b092af433955e7346b460888950cdef133 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:19:40 -0500 Subject: [PATCH 13/19] rename fns; only forward declare iterators --- pgml-sdks/pgml/c/Makefile | 4 +++- pgml-sdks/pgml/c/example.c | 4 ++-- pgml-sdks/pgml/src/languages/c.rs | 17 ++++++++--------- 3 files changed, 13 insertions(+), 12 deletions(-) diff --git a/pgml-sdks/pgml/c/Makefile b/pgml-sdks/pgml/c/Makefile index 3890e0661..192766cfe 100644 --- a/pgml-sdks/pgml/c/Makefile +++ b/pgml-sdks/pgml/c/Makefile @@ -4,7 +4,9 @@ PGML_LIB=../target/debug/ bindings: cargo b --features c - RUST_TOOLCHAIN=$(dirname $(rustup +nightly which rustc)) cbindgen --config cbindgen.toml --output ${HEADER} ../ + rustup default nightly + cbindgen --config cbindgen.toml --output ${HEADER} ../ + rustup default stable build: bindings gcc -Wall -o ./example -Iinclude/ -L${PGML_LIB} -l ${BINARY_NAME} example.c diff --git a/pgml-sdks/pgml/c/example.c b/pgml-sdks/pgml/c/example.c index 092547f2c..8dd6bd3a7 100644 --- a/pgml-sdks/pgml/c/example.c +++ b/pgml-sdks/pgml/c/example.c @@ -36,8 +36,8 @@ int main() { // Test the TransformerPipeline TransformerPipelineC * t_pipeline = pgml_transformerpipelinec_new("text-generation", "meta-llama/Meta-Llama-3-8B-Instruct", NULL, NULL); GeneralJsonAsyncIteratorC * t_pipeline_iter = pgml_transformerpipelinec_transform_stream(t_pipeline, "\"AI is going to\"", "{\"max_new_tokens\": 100}", NULL); - while (!GeneralJsonAsyncIteratorC_done(t_pipeline_iter)) { - char * res = GeneralJsonAsyncIteratorC_next(t_pipeline_iter); + while (!pgml_generaljsonasynciteratorc_done(t_pipeline_iter)) { + char * res = pgml_generaljsonasynciteratorc_next(t_pipeline_iter); printf("Token -> %s\n", res); } diff --git a/pgml-sdks/pgml/src/languages/c.rs b/pgml-sdks/pgml/src/languages/c.rs index 1538bd369..9118b0cd4 100644 --- a/pgml-sdks/pgml/src/languages/c.rs +++ b/pgml-sdks/pgml/src/languages/c.rs @@ -19,10 +19,8 @@ unsafe impl CustomInto<*mut JsonC> for Json { } } -#[repr(C)] pub struct GeneralJsonIteratorC { - pub wrapped: - *mut std::iter::Peekable> + Send>>, + wrapped: *mut std::iter::Peekable> + Send>>, } unsafe impl CustomInto<*mut GeneralJsonIteratorC> for GeneralJsonIterator { @@ -34,13 +32,15 @@ unsafe impl CustomInto<*mut GeneralJsonIteratorC> for GeneralJsonIterator { } #[no_mangle] -pub unsafe extern "C" fn GeneralJsonIteratorC_done(iterator: *mut GeneralJsonIteratorC) -> bool { +pub unsafe extern "C" fn pgml_generaljsoniteratorc_done( + iterator: *mut GeneralJsonIteratorC, +) -> bool { let c = Box::leak(Box::from_raw(iterator)); (*c.wrapped).peek().is_none() } #[no_mangle] -pub unsafe extern "C" fn GeneralJsonIteratorC_next( +pub unsafe extern "C" fn pgml_generaljsoniteratorc_next( iterator: *mut GeneralJsonIteratorC, ) -> *mut JsonC { let c = Box::leak(Box::from_raw(iterator)); @@ -51,9 +51,8 @@ pub unsafe extern "C" fn GeneralJsonIteratorC_next( type PeekableStream = futures::stream::Peekable> + Send>>>; -#[repr(C)] pub struct GeneralJsonAsyncIteratorC { - pub wrapped: *mut PeekableStream, + wrapped: *mut PeekableStream, } unsafe impl CustomInto<*mut GeneralJsonAsyncIteratorC> for GeneralJsonAsyncIterator { @@ -66,7 +65,7 @@ unsafe impl CustomInto<*mut GeneralJsonAsyncIteratorC> for GeneralJsonAsyncItera } #[no_mangle] -pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_done( +pub unsafe extern "C" fn pgml_generaljsonasynciteratorc_done( iterator: *mut GeneralJsonAsyncIteratorC, ) -> bool { crate::get_or_set_runtime().block_on(async move { @@ -79,7 +78,7 @@ pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_done( } #[no_mangle] -pub unsafe extern "C" fn GeneralJsonAsyncIteratorC_next( +pub unsafe extern "C" fn pgml_generaljsonasynciteratorc_next( iterator: *mut GeneralJsonAsyncIteratorC, ) -> *mut JsonC { crate::get_or_set_runtime().block_on(async move { From 77ccc3af585e8212389b558a9c69bf92bfaf63a8 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:23:58 -0500 Subject: [PATCH 14/19] call destructors in example; remove eprintln --- pgml-sdks/pgml/c/example.c | 5 +++++ pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 5 ----- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/pgml-sdks/pgml/c/example.c b/pgml-sdks/pgml/c/example.c index 8dd6bd3a7..fc85d6523 100644 --- a/pgml-sdks/pgml/c/example.c +++ b/pgml-sdks/pgml/c/example.c @@ -41,5 +41,10 @@ int main() { printf("Token -> %s\n", res); } + // cleanup + pgml_transformerpipelinec_delete(t_pipeline); + pgml_pipelinec_delete(pipeline); + pgml_collectionc_delete(collection); + return 0; } diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index d4af6a5c2..642d67238 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -54,8 +54,6 @@ pub fn generate_c_alias(parsed: DeriveInput) -> proc_macro::TokenStream { } }; - eprintln!("\n\n{expanded}\n\n"); - proc_macro::TokenStream::from(expanded) } @@ -168,8 +166,6 @@ pub fn generate_c_methods( } }; - eprintln!("\n\n{}\n\n", method); - methods.push(method); } @@ -182,7 +178,6 @@ pub fn generate_c_methods( } }; - eprintln!("\n\n{destructor}\n\n"); methods.push(destructor); proc_macro::TokenStream::from(quote! { From f03d19b0cc18a2a091d80166049a0c1ffcba822c Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Thu, 23 May 2024 14:36:30 -0500 Subject: [PATCH 15/19] add guard and comment to header gen --- pgml-sdks/pgml/c/cbindgen.toml | 115 +-------------------------------- 1 file changed, 3 insertions(+), 112 deletions(-) diff --git a/pgml-sdks/pgml/c/cbindgen.toml b/pgml-sdks/pgml/c/cbindgen.toml index 4efcf2453..07d509008 100644 --- a/pgml-sdks/pgml/c/cbindgen.toml +++ b/pgml-sdks/pgml/c/cbindgen.toml @@ -1,117 +1,8 @@ language = "C" - - -############## Options for Wrapping the Contents of the Header ################# - -# header = "/* Text to put at the beginning of the generated file. Probably a license. */" -# trailer = "/* Text to put at the end of the generated file */" -# include_guard = "my_bindings_h" -# pragma_once = true -# autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" -include_version = false -# namespace = "my_namespace" -namespaces = [] -using_namespaces = [] -sys_includes = [] -includes = [] -no_includes = false -# cpp_compat = true -after_includes = "" - - -############################ Code Style Options ################################ - -braces = "SameLine" -line_length = 100 -tab_width = 2 -documentation = true -documentation_style = "auto" -documentation_length = "full" -line_endings = "LF" # also "CR", "CRLF", "Native" - - -############################# Codegen Options ################################## - -style = "both" -sort_by = "Name" # default for `fn.sort_by` and `const.sort_by` -usize_is_size_t = true - -[defines] -# "target_os = freebsd" = "DEFINE_FREEBSD" -# "feature = serde" = "DEFINE_SERDE" - -[export] -include = [] -exclude = [] -# prefix = "CAPI_" -item_types = [] -renaming_overrides_prefixing = false - -[export.rename] - -[export.body] - -[export.mangle] - -[fn] -rename_args = "None" -# must_use = "MUST_USE_FUNC" -# deprecated = "DEPRECATED_FUNC" -# deprecated_with_note = "DEPRECATED_FUNC_WITH_NOTE" -# no_return = "NO_RETURN" -# prefix = "START_FUNC" -# postfix = "END_FUNC" -args = "auto" -sort_by = "Name" - -[struct] -rename_fields = "None" -# must_use = "MUST_USE_STRUCT" -# deprecated = "DEPRECATED_STRUCT" -# deprecated_with_note = "DEPRECATED_STRUCT_WITH_NOTE" -derive_constructor = false -derive_eq = false -derive_neq = false -derive_lt = false -derive_lte = false -derive_gt = false -derive_gte = false - -[enum] -rename_variants = "None" -# must_use = "MUST_USE_ENUM" -# deprecated = "DEPRECATED_ENUM" -# deprecated_with_note = "DEPRECATED_ENUM_WITH_NOTE" -add_sentinel = false -prefix_with_name = false -derive_helper_methods = false -derive_const_casts = false -derive_mut_casts = false -# cast_assert_name = "ASSERT" -derive_tagged_enum_destructor = false -derive_tagged_enum_copy_constructor = false -enum_class = true -private_default_tagged_enum_constructor = false - -[const] -allow_static_const = true -allow_constexpr = false -sort_by = "Name" - -[macro_expansion] -bitflags = false - -############## Options for How Your Rust library Should Be Parsed ############## - -[parse] -parse_deps = false -# include = [] -exclude = [] -clean = false -extra_bindings = [] +header = "// Copyright (c) 2024 PostgresML Team" +include_guard = "PGML_H" +autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" [parse.expand] crates = ["pgml"] -all_features = false -default_features = true features = ["c"] From 8fe774b2c14825ea8b02c97504e94497046df87d Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Fri, 24 May 2024 11:27:17 -0500 Subject: [PATCH 16/19] use typedefs in generated header --- pgml-sdks/pgml/c/cbindgen.toml | 1 + 1 file changed, 1 insertion(+) diff --git a/pgml-sdks/pgml/c/cbindgen.toml b/pgml-sdks/pgml/c/cbindgen.toml index 07d509008..bc8424a26 100644 --- a/pgml-sdks/pgml/c/cbindgen.toml +++ b/pgml-sdks/pgml/c/cbindgen.toml @@ -2,6 +2,7 @@ language = "C" header = "// Copyright (c) 2024 PostgresML Team" include_guard = "PGML_H" autogen_warning = "/* Warning, this file is autogenerated by cbindgen. Don't modify this manually. */" +style = "type" [parse.expand] crates = ["pgml"] From bfe5e5ee65ef40ba56ce0b5b1cc3e410a3a786a7 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Fri, 24 May 2024 13:58:50 -0500 Subject: [PATCH 17/19] remove ignored files and some commented code --- pgml-sdks/pgml/.gitignore | 2 -- pgml-sdks/pgml/src/collection.rs | 29 ----------------------------- 2 files changed, 31 deletions(-) diff --git a/pgml-sdks/pgml/.gitignore b/pgml-sdks/pgml/.gitignore index e82a5d1fb..a20f70eac 100644 --- a/pgml-sdks/pgml/.gitignore +++ b/pgml-sdks/pgml/.gitignore @@ -169,6 +169,4 @@ scratch.sql scratch.py # Some SDK specific things -expanded.rs -test pgml.h diff --git a/pgml-sdks/pgml/src/collection.rs b/pgml-sdks/pgml/src/collection.rs index ba24420ab..676fe6f0c 100644 --- a/pgml-sdks/pgml/src/collection.rs +++ b/pgml-sdks/pgml/src/collection.rs @@ -102,35 +102,6 @@ pub(crate) struct CollectionDatabaseData { pub project_info: ProjectInfo, } -// #[repr(C)] -// pub struct CollectionC { -// pub collection: *mut Collection, -// } - -// #[no_mangle] -// pub unsafe extern "C" fn new_collection(name: *const std::ffi::c_char) -> *mut CollectionC { -// let name = std::ffi::CStr::from_ptr(name).to_str().unwrap(); -// println!("Nice one Silas: {}", name); -// let collection = Box::into_raw(Box::new(Collection::new(name, None).unwrap())); -// Box::into_raw(Box::new(CollectionC { collection })) -// } - -// #[no_mangle] -// pub unsafe extern "C" fn free_collection(collection: *mut CollectionC) { -// if collection.is_null() { -// return; -// } -// drop(Box::from_raw(collection)); -// } - -// #[no_mangle] -// pub unsafe extern "C" fn test_collection(collection: *mut CollectionC) { -// let collection: *mut Collection = (*collection).collection; -// let collection: Collection = (*collection).clone(); -// println!("Nice one Silas x two: {}", collection.name); -// println!("test"); -// } - /// A collection of documents #[derive(alias, Debug, Clone)] pub struct Collection { From 561f8fb7533222386f2885661279b0e2d8581088 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:32:05 -0500 Subject: [PATCH 18/19] remove commented code --- .../rust-bridge/rust-bridge-traits/src/c.rs | 72 +------------------ 1 file changed, 1 insertion(+), 71 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs index 76cc80ee1..c06ac59d8 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-traits/src/c.rs @@ -83,84 +83,14 @@ macro_rules! gen_custom_into { self } } - }; // (($($T1:ident),+), ($($T2:ident),+), ($($C:tt),+)) => { - // impl<$($T1, $T2: CustomInto<$T1>),+> CustomInto<($($T1),+,)> for ($($T2),+,) { - // fn custom_into(self) -> ($($T1),+,) { - // ($(self.$C.custom_into()),+,) - // } - // } - // } + }; } gen_custom_into!(()); gen_custom_into!(bool); -// impl> CustomInto> for Option { -// fn custom_into(self) -> Option { -// self.map(|s| s.custom_into()) -// } -// } - unsafe impl> CustomInto> for Vec { unsafe fn custom_into(self) -> Vec { self.into_iter().map(|x| x.custom_into()).collect() } } - -// impl, T2: CustomInto> -// CustomInto> for HashMap -// { -// fn custom_into(self) -> HashMap { -// self.into_iter() -// .map(|(k, v)| (k.custom_into(), v.custom_into())) -// .collect() -// } -// } - -// impl CustomInto<&'static str> for &str { -// fn custom_into(self) -> &'static str { -// // This is how we get around the liftime checker -// unsafe { -// let ptr = self as *const str; -// let ptr = ptr as *mut str; -// let boxed = Box::from_raw(ptr); -// Box::leak(boxed) -// } -// } -// } - -// gen_custom_into!((T1), (TT2), (0)); -// gen_custom_into!((T1, T2), (TT1, TT2), (0, 1)); -// gen_custom_into!((T1, T2, T3), (TT1, TT2, TT3), (0, 1, 2)); -// gen_custom_into!((T1, T2, T3, T4), (TT1, TT2, TT3, TT4), (0, 1, 2, 3)); -// gen_custom_into!( -// (T1, T2, T3, T4, T5), -// (TT1, TT2, TT3, TT4, TT5), -// (0, 1, 2, 3, 4) -// ); -// gen_custom_into!( -// (T1, T2, T3, T4, T5, T6), -// (TT1, TT2, TT3, TT4, TT5, TT6), -// (0, 1, 2, 3, 4, 5) -// ); - -// // There are some restrictions I cannot figure out around conflicting trait -// // implimentations so this is my solution for now -// gen_custom_into!(String); - -// gen_custom_into!(()); - -// gen_custom_into!(bool); - -// gen_custom_into!(i8); -// gen_custom_into!(i16); -// gen_custom_into!(i32); -// gen_custom_into!(i64); - -// gen_custom_into!(u8); -// gen_custom_into!(u16); -// gen_custom_into!(u32); -// gen_custom_into!(u64); - -// gen_custom_into!(f32); -// gen_custom_into!(f64); From 1dff030af2c107ffae3d300d5a132d7f27defc54 Mon Sep 17 00:00:00 2001 From: Kevin Zimmerman <4733573+kczimm@users.noreply.github.com> Date: Mon, 3 Jun 2024 14:39:04 -0500 Subject: [PATCH 19/19] remove comment --- pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs | 8 -------- 1 file changed, 8 deletions(-) diff --git a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs index 642d67238..3f83b66ad 100644 --- a/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs +++ b/pgml-sdks/rust-bridge/rust-bridge-macros/src/c.rs @@ -238,14 +238,6 @@ fn get_method_arguments( ) } -// Need: -// - go function arguments -// - go function argument prep for calling c function -// - go conversion from c returned value - For custom types this is always a wrapper for everything else this is a primitve type -// - c function arguments -// - c function arguments prep for calling rust function -// - arguments to call rust function with -// - c conversion from rust returned value - This is done with the into trait fn get_c_types( argument_name: &str, ty: &SupportedType,