langchain-chat-with-milvus/document_ai/chunk.py

39 lines
1.1 KiB
Python
Raw Normal View History

2023-11-15 13:12:06 +00:00
import time
2023-11-15 08:20:30 +00:00
import documents_pb2_grpc
import documents_pb2
import init
import doc_client
2023-11-15 13:12:06 +00:00
import sys
import signal
def sync_documents():
while True:
documents_response = doc_client.stub.GetNoVectorDocuments(documents_pb2.GetNoVectorDocumentsRequest()).documents
2023-11-15 08:20:30 +00:00
2023-11-15 13:12:06 +00:00
# # get all documents with no vector
for document in documents_response:
docContent = document.title + "\n" + document.content
2023-11-15 08:20:30 +00:00
2023-11-15 13:12:06 +00:00
print("正在更新向量...")
text_vector = init.text_to_vector(docContent)
2023-11-15 08:20:30 +00:00
2023-11-15 13:12:06 +00:00
# update vector
update_vector_response = init.insert_document(document.id, document.user_id, text_vector)
print(update_vector_response)
2023-11-15 08:20:30 +00:00
2023-11-15 13:12:06 +00:00
# update vector_id
update_vector_id_response = doc_client.stub.UpdateDocument(documents_pb2.UpdateDocumentRequest(
id=document.id,
vector_id=update_vector_response
))
2023-11-15 08:20:30 +00:00
2023-11-15 13:12:06 +00:00
print(update_vector_id_response)
print("更新向量完成")
2023-11-15 08:20:30 +00:00
2023-11-15 14:24:03 +00:00
time.sleep(1 * 5)
2023-11-18 11:41:38 +00:00
if __name__ == '__main__':
sync_documents()