Skip to content

Commit b6d6cb3

Browse files
authored
Added Rust and C docs for Client SDKs (#1506)
1 parent fb2426f commit b6d6cb3

File tree

9 files changed

+1030
-31
lines changed

9 files changed

+1030
-31
lines changed

pgml-cms/docs/api/client-sdk/README.md

Lines changed: 153 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -12,17 +12,39 @@ The client SDK can be installed using standard package managers for JavaScript,
1212
Installing the SDK into your project is as simple as:
1313

1414
{% tabs %}
15-
{% tab title="JavaScript " %}
15+
{% tab title="JavaScript" %}
1616
```bash
1717
npm i pgml
1818
```
1919
{% endtab %}
2020

21-
{% tab title="Python " %}
21+
{% tab title="Python" %}
2222
```bash
2323
pip install pgml
2424
```
2525
{% endtab %}
26+
27+
{% tab title="Rust" %}
28+
```bash
29+
cargo add pgml
30+
```
31+
{% endtab %}
32+
33+
{% tab title="C" %}
34+
35+
First clone the `postgresml` repository and navigate to the `pgml-sdks/pgml/c` directory:
36+
```bash
37+
git clone https://github.com/postgresml/postgresml
38+
cd postgresml/pgml-sdks/pgml/c
39+
```
40+
41+
Then build the bindings
42+
```bash
43+
make bindings
44+
```
45+
46+
This will generate the `pgml.h` file and a `.so` on linux and `.dyblib` on MacOS.
47+
{% endtab %}
2648
{% endtabs %}
2749

2850
## Getting started
@@ -41,10 +63,10 @@ export PGML_DATABASE_URL=postgres://user:password@sql.cloud.postgresml.org:6432/
4163

4264
### Create a collection
4365

44-
The SDK is written in asynchronous code, so you need to run it inside an async runtime. Both Python and JavaScript support async functions natively.
66+
The SDK is written in asynchronous code, so you need to run it inside an async runtime. Both Python, JavaScript and Rust support async functions natively.
4567

4668
{% tabs %}
47-
{% tab title="JavaScript " %}
69+
{% tab title="JavaScript" %}
4870
```javascript
4971
const pgml = require("pgml");
5072

@@ -63,6 +85,29 @@ async def main():
6385
collection = Collection("sample_collection")
6486
```
6587
{% endtab %}
88+
89+
{% tab title="Rust" %}
90+
```rust
91+
use pgml::{Collection, Pipeline};
92+
use anyhow::Error;
93+
94+
#[tokio::main]
95+
async fn main() -> Result<(), Error> {
96+
let mut collection = Collection::new("sample_collection", None)?;
97+
}
98+
```
99+
{% endtab %}
100+
101+
{% tab title="C" %}
102+
```c
103+
#include <stdio.h>
104+
#include "pgml.h"
105+
106+
int main() {
107+
CollectionC * collection = pgml_collectionc_new("sample_collection", NULL);
108+
}
109+
```
110+
{% endtab %}
66111
{% endtabs %}
67112

68113
The above example imports the `pgml` module and creates a collection object. By itself, the collection only tracks document contents and identifiers, but once we add a pipeline, we can instruct the SDK to perform additional tasks when documents and are inserted and retrieved.
@@ -93,7 +138,7 @@ await collection.add_pipeline(pipeline);
93138
```python
94139
# Add this code to the end of the main function from the above example.
95140
pipeline = Pipeline(
96-
"test_pipeline",
141+
"sample_pipeline",
97142
{
98143
"text": {
99144
"splitter": { "model": "recursive_character" },
@@ -107,6 +152,37 @@ pipeline = Pipeline(
107152
await collection.add_pipeline(pipeline)
108153
```
109154
{% endtab %}
155+
156+
{% tab title="Rust" %}
157+
```rust
158+
// Add this code to the end of the main function from the above example.
159+
let mut pipeline = Pipeline::new(
160+
"sample_pipeline",
161+
Some(
162+
serde_json::json!({
163+
"text": {
164+
"splitter": { "model": "recursive_character" },
165+
"semantic_search": {
166+
"model": "Alibaba-NLP/gte-base-en-v1.5",
167+
},
168+
},
169+
})
170+
.into(),
171+
),
172+
)?;
173+
174+
collection.add_pipeline(&mut pipeline).await?;
175+
```
176+
{% endtab %}
177+
178+
{% tab title="C" %}
179+
```c
180+
// Add this code to the end of the main function from the above example.
181+
PipelineC * pipeline = pgml_pipelinec_new("sample_pipeline", "{\"text\": {\"splitter\": {\"model\": \"recursive_character\"},\"semantic_search\": {\"model\": \"Alibaba-NLP/gte-base-en-v1.5\"}}}");
182+
183+
pgml_collectionc_add_pipeline(collection, pipeline);
184+
```
185+
{% endtab %}
110186
{% endtabs %}
111187
112188
The pipeline configuration is a key/value object, where the key is the name of a column in a document, and the value is the action the SDK should perform on that column.
@@ -153,9 +229,36 @@ documents = [
153229
await collection.upsert_documents(documents)
154230
```
155231
{% endtab %}
156-
{% endtabs %}
157232

158-
If the same document `id` is used, the SDK computes the difference between existing and new documents and only updates the chunks that have changed.
233+
{% tab title="Rust" %}
234+
```rust
235+
// Add this code to the end of the main function in the above example.
236+
let documents = vec![
237+
serde_json::json!({
238+
"id": "Document One",
239+
"text": "document one contents...",
240+
})
241+
.into(),
242+
serde_json::json!({
243+
"id": "Document Two",
244+
"text": "document two contents...",
245+
})
246+
.into(),
247+
];
248+
249+
collection.upsert_documents(documents, None).await?;
250+
```
251+
{% endtab %}
252+
253+
{% tab title="C" %}
254+
```c
255+
// Add this code to the end of the main function in the above example.
256+
char * documents_to_upsert[2] = {"{\"id\": \"Document One\", \"text\": \"document one contents...\"}", "{\"id\": \"Document Two\", \"text\": \"document two contents...\"}"};
257+
258+
pgml_collectionc_upsert_documents(collection, documents_to_upsert, 2, NULL);
259+
```
260+
{% endtab %}
261+
{% endtabs %}
159262
160263
### Search documents
161264
@@ -203,6 +306,47 @@ results = await collection.vector_search(
203306
print(results)
204307
```
205308
{% endtab %}
309+
310+
{% tab title="Rust" %}
311+
```rust
312+
// Add this code to the end of the main function in the above example.
313+
let results = collection
314+
.vector_search(
315+
serde_json::json!({
316+
"query": {
317+
"fields": {
318+
"text": {
319+
"query": "Something about a document...",
320+
},
321+
},
322+
},
323+
"limit": 2,
324+
})
325+
.into(),
326+
&mut pipeline,
327+
)
328+
.await?;
329+
330+
println!("{:?}", results);
331+
332+
Ok(())
333+
```
334+
{% endtab %}
335+
336+
{% tab title="C" %}
337+
```c
338+
// Add this code to the end of the main function in the above example.
339+
r_size = 0;
340+
char** results = pgml_collectionc_vector_search(collection, "{\"query\": {\"fields\": {\"text\": {\"query\": \"Something about a document...\"}}}, \"limit\": 2}", pipeline, &r_size);
341+
printf("\n\nPrinting results:\n");
342+
for (i = 0; i < r_size; ++i) {
343+
printf("Result %u -> %s\n", i, results[i]);
344+
}
345+
346+
pgml_pipelinec_delete(pipeline);
347+
pgml_collectionc_delete(collection);
348+
```
349+
{% endtab %}
206350
{% endtabs %}
207351
208352
We are using built-in vector search, powered by embeddings and the PostgresML [pgml.embed()](../sql-extension/pgml.embed) function, which embeds the `query` argument, compares it to the embeddings stored in the database, and returns the top two results, ranked by cosine similarity.
@@ -228,6 +372,8 @@ if __name__ == "__main__":
228372
{% endtab %}
229373
{% endtabs %}
230374

375+
Note that `Rust` and `C` example do not require any additional code to run correctly.
376+
231377
Once you run the example, you should see something like this in the terminal:
232378

233379
```bash

0 commit comments

Comments
 (0)