import os from langchain.embeddings import OpenAIEmbeddings from pymilvus import ( connections, utility, FieldSchema, CollectionSchema, DataType, Collection, ) # init MILVUS_HOST = os.getenv("MILVUS_HOST") or "127.0.0.1" MILVUS_PORT = os.getenv("MILVUS_PORT") or "19530" connections.connect("default", host=MILVUS_HOST, port=MILVUS_PORT) if not utility.has_collection("leaf_documents"): _document_id = FieldSchema( name="document_id", dtype=DataType.INT64, ) _document_chunk_id = FieldSchema( name="document_chunk_id", dtype=DataType.INT64, is_primary=True, ) _library_id = FieldSchema( name="library_id", dtype=DataType.INT64, ) _user_id = FieldSchema( name="user_id", dtype=DataType.INT64, ) _document_vector = FieldSchema( name="vector", dtype=DataType.FLOAT_VECTOR, dim=1536 ) schema = CollectionSchema( fields=[_document_id, _document_chunk_id, _library_id, _user_id, _document_vector], enable_dynamic_field=True ) collection_name = "leaf_documents" print("Create collection...") collection = Collection( name=collection_name, schema=schema, using='default', shards_num=2 ) collection.create_index( field_name="vector", index_params={"metric_type": "L2", "M": 8, "efConstruction": 64, "index_type": "HNSW"}, ) collection.create_index( field_name="user_id", index_name="idx_user_id" ) collection = Collection("leaf_documents") collection.load() embeddings = OpenAIEmbeddings(model="text-embedding-ada-002") def text_to_vector(text: str): return embeddings.embed_query(text) def insert_document(document_id: int, document_chunk_id: int, library_id: int, user_id: int, vector: list): return collection.insert( data=[ [document_id], [document_chunk_id], [library_id], [user_id], [vector] ], ).primary_keys[0]