本文使用.Net + Ollama + 向量数据库Qdrant,实现简单RAG,代码仅实现基本演示功能。
一、Ollama如何安装使用
请移步:https://blog.csdn.net/MrTraum/article/details/139240885
二、新建.NetCore WebApi项目,并导入Nuget包
本项目使用.Net8构建。
引入以下包:
Microsoft.Extensions.AI
LangChain
LangChain.Core
LangChain.DocumentLoaders.Pdf
Microsoft.Extensions.VectorData.Abstractions
OllamaSharp
Microsoft.SemanticKernel.Connectors.Qdrant
三、文本向量化+保存Qdrant
向量化模型使用:
Qwen3-Embedding-0.6B
文本从Pdf导入,使用IFormFile
接收
public class Content
{
[VectorStoreKey]
public Guid Key { get; set; }
[VectorStoreData]
public string Text { get; set; }
[VectorStoreData]
public string Source { get; set; }
[VectorStoreVector(1024)]
public ReadOnlyMemory<float> Vector { get; set; }
}
IDocumentLoader wordLoader = new PdfPigPdfLoader();
var dataSource = DataSource.FromStream(file.File.OpenReadStream());
var documents = await wordLoader.LoadAsync(
dataSource: dataSource
);
//使用LangChain进行大文本分块
//此处采用递归分块算法
ITextSplitter recursiveSplitter = new RecursiveCharacterTextSplitter(
chunkSize: 500,
chunkOverlap: 128,
separators: new[] { "\n\n", "\n", "。", "!", "?", ";", " ", "" } // 分隔符优先级
);
var splitDocuments = recursiveSplitter.SplitDocuments(documents);
var vectorStore = new QdrantVectorStore(new QdrantClient("192.168.4.9", 6334), true);
var ragVectorRecordCollection = vectorStore.GetCollection<Guid, Content>("Contents");
await ragVectorRecordCollection.EnsureCollectionExistsAsync();
var uri = new Uri(OllamaUrl);
var ollama = new OllamaApiClient(uri, "dengcao/Qwen3-Embedding-0.6B:Q8_0");
List<Content> vectorRecords = new List<Content>();
//调用大模型进行文本向量化
var embeddings = await ollama.GenerateAndZipAsync(splitDocuments.Select(t => t.PageContent));
foreach (var doc in splitDocuments)
{
ReadOnlyMemory<float> embedding = null;
foreach (var item in embeddings)
{
if (item.Value == doc.PageContent)
{
embedding = item.Embedding.Vector;
}
}
Content ctx = new Content()
{
Key = Guid.NewGuid(),
Text = doc.PageContent,
Source = doc.Metadata.TryGetValue("source", out var src) ? src?.ToString() : "unknown",
Vector = embedding
};
vectorRecords.Add(ctx);
}
//保存如Qdrant
await ragVectorRecordCollection.UpsertAsync(vectorRecords, CancellationToken.None);
四、搜索向量数据库,将搜索到的结果交给大模型整理
[HttpGet]
public async Task<string> Query([FromQuery] string query, [FromQuery] int top = 10)
{
var vectorStore = new QdrantVectorStore(new QdrantClient("192.168.4.9", 6334), true);
var ragVectorRecordCollection = vectorStore.GetCollection<Guid, Content>("Contents");
await ragVectorRecordCollection.EnsureCollectionExistsAsync();
var uri = new Uri(OllamaUrl);
var ollama = new OllamaApiClient(uri, "dengcao/Qwen3-Embedding-0.6B:Q8_0");
var searchVector = await ollama.GenerateVectorAsync(query);
//以下代码,写不写都可以。默认会根据VectorStoreVector标签匹配
VectorSearchOptions<Content> vectorSearchOptions = new VectorSearchOptions<Content>()
{
VectorProperty = m => m.Vector,
};
//查询向量数据库
var searchResult = ragVectorRecordCollection.SearchAsync(searchVector, top, vectorSearchOptions);
var resultsList = new List<VectorSearchResult<Content>>();
await foreach (var result in searchResult)
{
resultsList.Add(result);
}
// 如果没有找到结果
if (!resultsList.Any())
{
return "没有找到相关的信息。";
}
// 构建精确控制的提示词
var promptBuilder = new StringBuilder();
// 角色定义(严格模式)
promptBuilder.AppendLine(@"你是一个知识检索系统,必须遵守以下规则:
1. 回答范围:仅使用下方<!--reference-->标记的内容
2. 禁止行为:禁止联想、推测或添加任何外部知识
3. 免责声明:当参考内容不匹配时必须声明");
// 结构化参考内容
promptBuilder.AppendLine("<!--reference-->");
foreach (var item in resultsList.OrderByDescending(x => x.Score))
{
promptBuilder.AppendLine($"| 内容:{item.Record.Text?.Trim()}");
promptBuilder.AppendLine($"| 元数据:{item.Record.Source?.Trim()}");
promptBuilder.AppendLine($"| 相似度:{item.Score:F4}");
promptBuilder.AppendLine("|--");
}
promptBuilder.AppendLine("<!--end-reference-->");
// 问题与约束条件
promptBuilder.AppendLine($"用户问题:{query?.Trim()}");
promptBuilder.AppendLine(@"响应要求:
1. 相似度高的条目优先
2. 描述中必须包含问题关键词的直系近义词
3. 采用以下响应模板:
- 当匹配成功时:『根据参考内容,搜索到以下内容。』
- 当匹配失败时:『根据提供的信息,未找到明确匹配的内容。』");
// 调用大模型整理结果
var chatRequest = new ChatRequest
{
Model = ModelName,
Stream = false,
Think = false,//禁止思考模式
Messages = new List<Message>()
{
new Message { Role = OllamaSharp.Models.Chat.ChatRole.System, Content = promptBuilder.ToString() },
new Message { Role = OllamaSharp.Models.Chat.ChatRole.User, Content = query }
}
};
var modelResponse = await ollama.ChatAsync(chatRequest).StreamToEndAsync();
return modelResponse.Message.Content;
}