langchain-chat-with-milvus/document_ai/vector.py

72 lines
1.9 KiB
Python
Raw Normal View History

2023-11-18 15:08:22 +00:00
import time
import proto.documents_pb2_grpc
import proto.documents_pb2
import init
import doc_client
import sys
import signal
from threading import Thread
threads = []
def sync_documents():
while True:
chunks_response = doc_client.stub.GetNoVectorDocumentChunks(proto.documents_pb2.GetNotVectorDocumentChunksRequest()).chunks
# # get all documents with no vector
for chunk in chunks_response:
#
# # 最多不超过 10 个
# if len(threads) >= 10:
# print("线程数已满,等待 5 秒...")
# time.sleep(5)
# continue
#
# # 等待
# for t in threads:
# if t.is_alive():
# t.join()
# print("线程 " + str(t) + " 已结束。")
# threads.remove(t)
#
# # 创建线程
# print("创建线程...")
# t = Thread(target=vector_and_save, args=(chunk,))
# threads.append(t)
#
vector_and_save(chunk)
print("进入下一次循环...")
time.sleep(1 * 5)
def vector_and_save(chunk):
chunk_content = chunk.content
print("正在进行文本向量化...")
text_vector = init.text_to_vector(chunk_content)
# update vector
update_vector_response = init.insert_document(
document_id=chunk.document.id,
document_chunk_id=chunk.id,
library_id=chunk.document.library_id,
user_id=chunk.document.user_id,
vector=text_vector
)
print(update_vector_response)
# update vector_id
update_vector_id_response = doc_client.stub.UpdateDocumentChunk(proto.documents_pb2.UpdateChunkedDocumentRequest(
id=chunk.id,
vector_id=update_vector_response
))
print(update_vector_id_response)
print("向量化完成。")
if __name__ == '__main__':
sync_documents()