Skip to main content

lindorm_ai

Lindorm Embedding

This notebook covers how to get started with Lindorm Embedding AI model.

from lindormai.model_manager import ModelManager
import environs

env = environs.Env()
env.read_env(".env")


class Config:
AI_EMB_ENDPOINT = env.str("AI_EMB_ENDPOINT", '<EMB_ENDPOINT>')
AI_USERNAME = env.str("AI_USERNAME", 'root')
AI_PWD = env.str("AI_PWD", '<PASSWORD>')

AI_DEFAULT_RERANK_MODEL = "rerank_bge_large"
AI_DEFAULT_EMBEDDING_MODEL = "bge-large-zh-v1.5"
AI_DEFAULT_XIAOBU2_EMBEDDING_MODEL = "xiaobu2"

LDAI_EMB_ENDPOINT=Config.AI_EMB_ENDPOINT
LDAI_EMB_USERNAME=Config.AI_USERNAME
LDAI_EMB_PWD=Config.AI_PWD

Define Helper functions

def check_model_exist(model_mgr, model_name):
model_list = model_mgr.list()
for model in model_list:
if model_name == model['name'] and 'READY' == model['status']:
return True
return False

def create_emb_model(model_mgr, model_name, path, algo):
task = "FEATURE_EXTRACTION"
result = model_mgr.create(name=model_name, task=task, path=path, algo=algo)
return result

def create_rerank_model(model_mgr, model_name, path, algo):
task = "SEMANTIC_SIMILARITY"
result = model_mgr.create(name=model_name, task=task, path=path, algo=algo)
return result

Create & Deploy Embedding Model

ldai_model_mgr = ModelManager(LDAI_EMB_ENDPOINT, LDAI_EMB_USERNAME, LDAI_EMB_PWD)

emb_model_name = 'bge_model'
emb_model_path = "huggingface://BAAI/bge-large-zh-v1.5"
emb_model_algo = "BGE_LARGE_ZH"

if not check_model_exist(ldai_model_mgr, emb_model_name):
print('model not exist! will create')
create_emb_model(ldai_model_mgr, emb_model_name, emb_model_path, emb_model_algo)
else:
print(f'model {emb_model_name} exist!')

Init LindormAIEmbeddings

from langchain_community.embeddings.lindorm_embedding import LindormAIEmbeddings

ldai_emb = LindormAIEmbeddings(endpoint=LDAI_EMB_ENDPOINT, username=LDAI_EMB_USERNAME, password=LDAI_EMB_PWD, model_name=emb_model_name)
API Reference:LindormAIEmbeddings

Embed single query

query = '辛弃疾'
response = ldai_emb.embed_query(query)
print(f"emb result: {response}")

Embed multiple documents

import random
import string

docs = []

for i in range(10):
doc = ''.join(random.choices(string.ascii_letters + string.digits, k=10))
docs.append(doc)

response = ldai_emb.embed_documents(docs)
print(f"emb result: {response[0]}")

Create & Deploy Rerank Model


rerank_model_name = 'rerank_bge_large'
rerank_model_path = "huggingface://BAAI/bge-reranker-large"
rerank_model_algo = "BGE_RERANKER_LARGE"

if not check_model_exist(ldai_model_mgr, rerank_model_name):
print('model not exist! will create')
create_rerank_model(ldai_model_mgr, rerank_model_name, rerank_model_path, rerank_model_algo)
else:
print(f'model {rerank_model_name} exist!')

Init LindormAIRerank

from langchain_community.document_compressors.lindormai_rerank import LindormAIRerank

ldai_rerank = LindormAIRerank(endpoint=LDAI_EMB_ENDPOINT, username=LDAI_EMB_USERNAME, password=LDAI_EMB_PWD, model_name=rerank_model_name)
API Reference:LindormAIRerank

Rerank documents

from langchain_core.documents import Document

docs = []
doc1 = Document('一只小狗')
doc2 = Document('一个滑滑梯')
docs.append(doc1)
docs.append(doc2)

result = ldai_rerank.compress_documents(docs, '两只小狗')
print(result)
API Reference:Document

Was this page helpful?