@@ -42,6 +42,7 @@ New to DocArray? Depending on your use case and background, there are multiple w
42
42
- [ Coming from pure PyTorch or TensorFlow] ( #coming-from-pytorch )
43
43
- [ Coming from Pydantic] ( #coming-from-pydantic )
44
44
- [ Coming from FastAPI] ( #coming-from-fastapi )
45
+ - [ Coming from Jina] ( #coming-from-jina )
45
46
- [ Coming from a vector database] ( #coming-from-a-vector-database )
46
47
- [ Coming from Langchain] ( #coming-from-langchain )
47
48
@@ -681,7 +682,7 @@ from fastapi import FastAPI
681
682
from docarray.base_doc import DocArrayResponse
682
683
from docarray import BaseDoc
683
684
from docarray.documents import ImageDoc
684
- from docarray.typing import NdArray
685
+ from docarray.typing import NdArray, ImageTensor
685
686
686
687
687
688
class InputDoc (BaseDoc ):
@@ -712,6 +713,7 @@ async def create_item(doc: InputDoc) -> OutputDoc:
712
713
)
713
714
return doc
714
715
716
+ input_doc = InputDoc(text = ' ' , img = ImageDoc(tensor = np.random.random((3 , 224 , 224 ))))
715
717
716
718
async with AsyncClient(app = app, base_url = " http://test" ) as ac:
717
719
response = await ac.post(" /embed/" , data = input_doc.json())
@@ -721,6 +723,70 @@ Just like a vanilla Pydantic model!
721
723
722
724
</details >
723
725
726
+ ### Coming from Jina
727
+
728
+ <details markdown =" 1 " >
729
+ <summary >Click to expand</summary >
730
+
731
+ Jina has adopted docarray as their library for representing and serializing Documents.
732
+
733
+ Jina allows to serve models and services that are built with DocArray allowing you to serve and scale these applications
734
+ making full use of DocArray's serialization capabilites.
735
+
736
+ ``` python
737
+ import numpy as np
738
+ from jina import Deployment, Executor, requests
739
+ from docarray import BaseDoc, DocList
740
+ from docarray.documents import ImageDoc
741
+ from docarray.typing import NdArray, ImageTensor
742
+
743
+
744
+ class InputDoc (BaseDoc ):
745
+ img: ImageDoc
746
+ text: str
747
+
748
+
749
+ class OutputDoc (BaseDoc ):
750
+ embedding_clip: NdArray
751
+ embedding_bert: NdArray
752
+
753
+
754
+ def model_img (img : ImageTensor) -> NdArray:
755
+ return np.zeros((100 , 1 ))
756
+
757
+
758
+ def model_text (text : str ) -> NdArray:
759
+ return np.zeros((100 , 1 ))
760
+
761
+
762
+ class MyEmbeddingExecutor (Executor ):
763
+ @requests (on = ' /embed' )
764
+ def encode (self , docs : DocList[InputDoc], ** kwargs ) -> DocList[OutputDoc]:
765
+ ret = DocList[OutputDoc]()
766
+ for doc in docs:
767
+ output = OutputDoc(
768
+ embedding_clip = model_img(doc.img.tensor),
769
+ embedding_bert = model_text(doc.text),
770
+ )
771
+ ret.append(output)
772
+ return ret
773
+
774
+
775
+ with Deployment(
776
+ protocols = [' grpc' , ' http' ], ports = [12345 , 12346 ], uses = MyEmbeddingExecutor
777
+ ) as dep:
778
+ resp = dep.post(
779
+ on = ' /embed' ,
780
+ inputs = DocList[InputDoc](
781
+ [InputDoc(text = ' ' , img = ImageDoc(tensor = np.random.random((3 , 224 , 224 ))))]
782
+ ),
783
+ return_type = DocList[OutputDoc],
784
+ )
785
+ print (resp)
786
+ ```
787
+
788
+ </details >
789
+
724
790
### Coming from a vector database
725
791
726
792
<details markdown =" 1 " >
@@ -774,13 +840,12 @@ Currently, DocArray supports the following vector databases:
774
840
- [ Qdrant] ( https://qdrant.tech/ )
775
841
- [ Elasticsearch] ( https://www.elastic.co/elasticsearch/ ) v8 and v7
776
842
- [ Redis] ( https://redis.io/ )
843
+ - [ Milvus] ( https://milvus.io )
777
844
- ExactNNMemorySearch as a local alternative with exact kNN search.
778
845
- [ HNSWlib] ( https://github.com/nmslib/hnswlib ) as a local-first ANN alternative
779
846
780
847
An integration of [ OpenSearch] ( https://opensearch.org/ ) is currently in progress.
781
848
782
- DocArray <=0.21 also support [ Milvus] ( https://milvus.io/ ) , but this is not yet supported in the current version.
783
-
784
849
Of course this is only one of the things that DocArray can do, so we encourage you to check out the rest of this readme!
785
850
786
851
</details >
0 commit comments