Build an AI agent
This page details a number of ways that you can build an AI agent. As the number of frameworks and programming languages you can choose to do so is quite extensive, this page details three Python frameworks to get you started.
Setup
You can run SurrealDB locally or start with a free SurrealDB Cloud account.
For local, you have two options:
Install SurrealDB and run SurrealDB. Run in-memory with:
surreal start -u root -p root
docker run --rm \
--pull always \
-p 8000:8000 \
surrealdb/surrealdb:latest \
start \
--user root \
--pass root
Frameworks
Choose one of the following frameworks to view the tutorial and sample code.
Getting started
This is a simple RAG application that uses Pydantic AI and embedded SurrealDB. The integration is done by providing the agent with a custom retrieval tool, which takes a search query, executes a SurrealDB vector-search query, and returns the results.
To run the example:
Set up your OpenAI API key:
export OPENAI_API_KEY=your-api-key
Or, store it in a .env file and add --env-file .env to your uv run commands.
Build the vector store:
uv run --env-file .env -m pydantic_ai_examples.rag_surrealdb build
Ask the agent a question:
uv run --env-file .env -m pydantic_ai_examples.rag_surrealdb search "How do I register a function as a custom tool for my agent?"
Or use the web UI:
uv run --env-file .env -m pydantic_ai_examples.rag_surrealdb web
Code
from __future__ import annotations as _annotations
import asyncio
import re
import sys
import unicodedata
from collections.abc import Sequence
from contextlib import asynccontextmanager
from dataclasses import dataclass
from pathlib import Path
from typing import TypeVar
import httpx
import logfire
import uvicorn
from pydantic import BaseModel, TypeAdapter
from surrealdb import (
AsyncEmbeddedSurrealConnection,
AsyncHttpSurrealConnection,
AsyncSurreal,
AsyncWsSurrealConnection,
RecordID,
Value,
)
from typing_extensions import AsyncGenerator
from pydantic_ai import Agent, Embedder
SurrealConn = (
AsyncWsSurrealConnection
| AsyncHttpSurrealConnection
| AsyncEmbeddedSurrealConnection
)
# 'if-token-present' means nothing will be sent (and the example will work) if you don't have logfire configured
logfire.configure(send_to_logfire='if-token-present')
logfire.instrument_pydantic_ai()
logfire.instrument_surrealdb()
THIS_DIR = Path(__file__).parent
SURREALDB_NS = 'pydantic_ai_examples'
SURREALDB_DB = 'rag_surrealdb'
SURREALDB_USER = 'root'
SURREALDB_PASS = 'root'
embedder = Embedder('openai:text-embedding-3-small')
agent = Agent('openai:gpt-5.2')
RecordType = TypeVar('RecordType')
class RetrievalQueryResult(BaseModel):
url: str
title: str
content: str
dist: float
async def query(
conn: SurrealConn,
query_: str,
vars_: dict[str, Value],
record_type: type[RecordType],
) -> list[RecordType]:
result = await conn.query(query_, vars_)
result_ta = TypeAdapter(list[record_type])
rows = result_ta.validate_python(result)
return rows
@agent.tool_plain
async def retrieve(search_query: str) -> str:
"""Retrieve documentation sections based on a search query.
Args:
search_query: The search query.
"""
with logfire.span(
'create embedding for {search_query=}', search_query=search_query
):
result = await embedder.embed_query(search_query)
embedding = result.embeddings
# Embedder method guarantees there's one item here
embedding_vector = embedding[0]
# SurrealDB vector search using HNSW index
async with database_connect(False) as db:
rows = await query(
db,
"""
SELECT url, title, content, vector::distance::knn() AS dist
FROM doc_sections
WHERE embedding <|8, 40|> $vector
ORDER BY dist ASC
""",
{'vector': list(embedding_vector)},
RetrievalQueryResult,
)
return '\n\n'.join(
f'# {row.title}\nDocumentation URL:{row.url}\n\n{row.content}' for row in rows
)
async def run_agent(question: str):
"""Entry point to run the agent and perform RAG based question answering."""
logfire.info('Asking "{question}"', question=question)
answer = await agent.run(question)
print(answer.output)
# Web chat UI
app = agent.to_web()
#######################################################
# The rest of this file is dedicated to preparing the #
# search database, and some utilities. #
#######################################################
# JSON document from
# https://gist.github.com/samuelcolvin/4b5bb9bb163b1122ff17e29e48c10992
DOCS_JSON = (
'https://gist.githubusercontent.com/'
'samuelcolvin/4b5bb9bb163b1122ff17e29e48c10992/raw/'
'80c5925c42f1442c24963aaf5eb1a324d47afe95/logfire_docs.json'
)
def build_doc_rec_id(url: str) -> RecordID:
return RecordID('doc_sections', slugify(url, '_'))
async def build_search_db():
"""Build the search database."""
async with httpx.AsyncClient() as client:
response = await client.get(DOCS_JSON)
response.raise_for_status()
sections = sections_ta.validate_json(response.content)
async with database_connect(True) as db:
missing_sections: list[DocsSection] = []
for section in sections:
url = section.url()
record_id = build_doc_rec_id(url)
existing = await db.select(record_id)
if existing:
logfire.info('Skipping {url=}', url=url)
continue
missing_sections.append(section)
if missing_sections:
with logfire.span('create embeddings'):
result = await embedder.embed_documents(
[section.embedding_content() for section in missing_sections]
)
embeddings = result.embeddings
for section, embedding_vector in zip(
missing_sections, embeddings, strict=True
):
await insert_doc_section(db, section, embedding_vector)
else:
logfire.info('All documents already exist; skipping embedding generation')
async def insert_doc_section(
db: SurrealConn,
section: DocsSection,
embedding_vector: Sequence[float],
) -> None:
url = section.url()
record_id = build_doc_rec_id(url)
# Create record with embedding, using record ID directly
res = await db.create(
record_id,
{
'url': url,
'title': section.title,
'content': section.content,
'embedding': list(embedding_vector),
},
)
if not isinstance(res, dict):
raise ValueError(f'Unexpected response from database: {res}')
@dataclass
class DocsSection:
id: int
parent: int | None
path: str
level: int
title: str
content: str
def url(self) -> str:
url_path = re.sub(r'\.md$', '', self.path)
return (
f'https://logfire.pydantic.dev/docs/{url_path}/#{slugify(self.title, "-")}'
)
def embedding_content(self) -> str:
return '\n\n'.join((f'path: {self.path}', f'title: {self.title}', self.content))
sections_ta = TypeAdapter(list[DocsSection])
@asynccontextmanager
async def database_connect(
create_db: bool = False,
) -> AsyncGenerator[SurrealConn, None]:
# Running SurrealDB embedded
db_path = THIS_DIR / f'.{SURREALDB_DB}'
db_url = f'file://{db_path}'
requires_auth = False
# Running SurrealDB in a separate process, connect with URL
# db_url = 'ws://localhost:8000/rpc'
# requires_auth = True
async with AsyncSurreal(db_url) as db:
# Sign in to the database
if requires_auth:
await db.signin({'username': SURREALDB_USER, 'password': SURREALDB_PASS})
# Set namespace and database
await db.use(SURREALDB_NS, SURREALDB_DB)
# Initialize schema if creating database
if create_db:
with logfire.span('create schema'):
await db.query(DB_SCHEMA)
yield db
DB_SCHEMA = """
DEFINE TABLE doc_sections SCHEMALESS;
DEFINE FIELD embedding ON doc_sections TYPE array<float>;
DEFINE INDEX hnsw_idx_doc_sections ON doc_sections
FIELDS embedding
HNSW DIMENSION 1536
DIST COSINE
TYPE F32;
"""
def slugify(value: str, separator: str, unicode: bool = False) -> str:
"""Slugify a string, to make it URL friendly."""
# Taken unchanged from https://github.com/Python-Markdown/markdown/blob/3.7/markdown/extensions/toc.py#L38
if not unicode:
# Replace Extended Latin characters with ASCII, i.e. `žlutý` => `zluty`
value = unicodedata.normalize('NFKD', value)
value = value.encode('ascii', 'ignore').decode('ascii')
value = re.sub(r'[^\w\s-]', '', value).strip().lower()
return re.sub(rf'[{separator}\s]+', separator, value)
if __name__ == '__main__':
action = sys.argv[1] if len(sys.argv) > 1 else None
if action == 'build':
asyncio.run(build_search_db())
elif action == 'search':
if len(sys.argv) == 3:
q = sys.argv[2]
else:
q = 'How do I configure logfire to work with FastAPI?'
asyncio.run(run_agent(q))
elif action == 'web':
uvicorn.run(app, host='127.0.0.1', port=7932)
else:
print(
'uv run --extra examples -m pydantic_ai_examples.rag_surrealdb build|search|web',
file=sys.stderr,
)
sys.exit(1)
Getting started
Begin by installing the following dependencies.
# -- Using pip
pip install -U langchain-surrealdb langchain_ollama surrealdb
# -- Using poetry
poetry add langchain-surrealdb langchain_ollama surrealdb
# -- Using uv
uv add --upgrade langchain-surrealdb langchain_ollama surrealdb
surrealdb→ SurrealDB Python SDKlangchain-surrealdb→ housesSurrealDBVectorStorelangchain_ollama,langchain-openai(or HF, Cohere, etc.) → embeddings
Once this is done, you can create a vector store, add documents with embeddings, and do a similarity search.
from langchain_core.documents import Document
from langchain_surrealdb.vectorstores import SurrealDBVectorStore
from langchain_ollama import OllamaEmbeddings
from surrealdb import Surreal
conn = Surreal("ws://localhost:8000/rpc")
conn.signin({"username": "root", "password": "secret"})
conn.use("langchain", "demo")
vector_store = SurrealDBVectorStore(OllamaEmbeddings(model="llama3.2"), conn)
doc_1 = Document(page_content="foo", metadata={"source": "https://surrealdb.com"})
doc_2 = Document(page_content="SurrealDB", metadata={"source": "https://surrealdb.com"})
vector_store.add_documents(documents=[doc_1, doc_2], ids=["1", "2"])
results = vector_store.similarity_search_with_score(
query="surreal", k=1, custom_filter={"source": "https://surrealdb.com"}
)
for doc, score in results:
print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
Under the hood the helper will:
Create table
documents(if it doesn’t exist).Add an M-Tree index with the correct dimensionality.
Insert each text with its freshly generated embedding.
Similarity search
query = "How do I enable vector search in SurrealDB?"
docs = vector_store.similarity_search(
query=query, k=1, custom_filter={"source": "https://surrealdb.com"}
)
for doc in results:
print(f"{doc.page_content} [{doc.metadata}]")
The Vector Search feature of SurrealDB... [{'source': 'https://surrealdb.com'}]
If you want to get the score with the results, use similarity_search_with_score instead.
You can also transform the vector store into a retriever for easier usage in your chains.
query = "How do I enable vector search in SurrealDB?"
docs = vector_store.similarity_search(
retriever = vector_store.as_retriever(
search_type="mmr", search_kwargs={"k": 1, "lambda_mult": 0.5}
)
retriever.invoke(query)
[Document(id='4', metadata={'source': 'https://surrealdb.com'}, page_content='The Vector Search feature of SurrealDB...')]
Getting started
from agno.agent import Agent
from agno.embedder.openai import OpenAIEmbedder
from agno.knowledge.pdf_url import PDFUrlKnowledgeBase
from agno.vectordb.surrealdb import SurrealDb
from surrealdb import Surreal
# SurrealDB connection parameters
SURREALDB_URL = "ws://localhost:8000"
SURREALDB_USER = "root"
SURREALDB_PASSWORD = "root"
SURREALDB_NAMESPACE = "test"
SURREALDB_DATABASE = "test"
# Create a client
client = Surreal(url=SURREALDB_URL)
client.signin({"username": SURREALDB_USER, "password": SURREALDB_PASSWORD})
client.use(namespace=SURREALDB_NAMESPACE, database=SURREALDB_DATABASE)
surrealdb = SurrealDb(
client=client,
collection="recipes", # Collection name for storing documents
efc=150, # HNSW construction time/accuracy trade-off
m=12, # HNSW max number of connections per element
search_ef=40, # HNSW search time/accuracy trade-off
)
def sync_demo():
"""Demonstrate synchronous usage of SurrealDb"""
knowledge_base = PDFUrlKnowledgeBase(
urls=["https://agno-public.s3.amazonaws.com/recipes/ThaiRecipes.pdf"],
vector_db=surrealdb,
embedder=OpenAIEmbedder(),
)
# Load data synchronously
knowledge_base.load(recreate=True)
# Create agent and query synchronously
agent = Agent(knowledge=knowledge_base, show_tool_calls=True)
agent.print_response(
"What are the 3 categories of Thai SELECT is given to restaurants overseas?",
markdown=True,
)
if __name__ == "__main__":
# Run synchronous demo
print("Running synchronous demo...")
sync_demo()