import time import proto.documents_pb2_grpc import proto.documents_pb2 import init import doc_client import sys import signal from threading import Thread threads = [] def sync_documents(): while True: chunks_response = doc_client.stub.GetNoVectorDocumentChunks(proto.documents_pb2.GetNotVectorDocumentChunksRequest()).chunks # # get all documents with no vector for chunk in chunks_response: # # # 最多不超过 10 个 # if len(threads) >= 10: # print("线程数已满,等待 5 秒...") # time.sleep(5) # continue # # # 等待 # for t in threads: # if t.is_alive(): # t.join() # print("线程 " + str(t) + " 已结束。") # threads.remove(t) # # # 创建线程 # print("创建线程...") # t = Thread(target=vector_and_save, args=(chunk,)) # threads.append(t) # vector_and_save(chunk) print("进入下一次循环...") time.sleep(1 * 5) def vector_and_save(chunk): chunk_content = chunk.content print("正在进行文本向量化...") text_vector = init.text_to_vector(chunk_content) # update vector update_vector_response = init.insert_document( document_id=chunk.document.id, document_chunk_id=chunk.id, library_id=chunk.document.library_id, user_id=chunk.document.user_id, vector=text_vector ) print(update_vector_response) # update vector_id update_vector_id_response = doc_client.stub.UpdateDocumentChunk(proto.documents_pb2.UpdateChunkedDocumentRequest( id=chunk.id, vector_id=update_vector_response )) print(update_vector_id_response) print("向量化完成。") if __name__ == '__main__': sync_documents()