Table of Contents
Pattern 01
Chatbot with Sliding Window Memory Management
A stateful chatbot is the most common LLM feature request. The key challenge: managing conversation history so it never overflows the context window, while preserving enough context for coherent responses.
from openai import OpenAI
from typing import Optional
import tiktoken
client = OpenAI()
class Chatbot:
"""
Production chatbot with sliding window memory management.
Automatically trims history when approaching context limit.
"""
def __init__(
self,
system_prompt: str,
model: str = "gpt-4o-mini",
max_history_tokens: int = 80_000, # Keep 48k for response
temperature: float = 0.7,
):
self.system_prompt = system_prompt
self.model = model
self.max_history_tokens = max_history_tokens
self.temperature = temperature
self.history = []
self._encoder = tiktoken.get_encoding("cl100k_base")
def _count_tokens(self, text: str) -> int:
return len(self._encoder.encode(text))
def _history_token_count(self) -> int:
return sum(
self._count_tokens(msg["content"])
for msg in self.history
)
def _trim_history(self):
"""
Remove oldest message pairs when approaching token limit.
Always removes in pairs (user + assistant) to maintain conversation structure.
"""
while self._history_token_count() > self.max_history_tokens:
if len(self.history) < 2:
break
# Remove the oldest user+assistant pair
self.history.pop(0) # Remove oldest user message
if self.history and self.history[0]["role"] == "assistant":
self.history.pop(0) # Remove its response
def chat(self, user_message: str) -> str:
"""Send a message and get a response. History is maintained automatically."""
# Add user message to history
self.history.append({"role": "user", "content": user_message})
# Trim if needed
self._trim_history()
# Build full messages list
messages = [
{"role": "system", "content": self.system_prompt}
] + self.history
response = client.chat.completions.create(
model=self.model,
messages=messages,
temperature=self.temperature,
)
assistant_message = response.choices[0].message.content
# Add assistant response to history
self.history.append({"role": "assistant", "content": assistant_message})
return assistant_message
def reset(self):
"""Clear conversation history (start a new session)."""
self.history = []
@property
def turn_count(self) -> int:
"""Number of complete conversation turns."""
return len([m for m in self.history if m["role"] == "user"])
# Usage
bot = Chatbot(
system_prompt="You are a helpful Python tutor. "
"Give short, practical answers. Use code examples.",
temperature=0.7,
)
print(bot.chat("What is a decorator?"))
print(bot.chat("Show me a practical example"))
print(bot.chat("How is this different from a class?"))
print(f"Turns so far: {bot.turn_count}")
Pattern 02
Long-Document Summarizer with Map-Reduce
A single LLM call can't handle a 200-page document. The map-reduce pattern: split the document into chunks, summarize each chunk independently (map), then summarize all the chunk summaries into a final summary (reduce).
from typing import List
import textwrap
def chunk_text(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
"""
Split text into overlapping chunks.
Overlap ensures context isn't lost at chunk boundaries.
"""
words = text.split()
chunks = []
step = chunk_size - overlap
for i in range(0, len(words), step):
chunk_words = words[i:i + chunk_size]
if chunk_words:
chunks.append(" ".join(chunk_words))
return chunks
def summarize_chunk(chunk: str, context: str = "") -> str:
"""Summarize a single chunk. Context helps with continuity."""
system = "You are an expert summarizer. Create concise, information-dense summaries."
user = f"""Summarize the following text section.
{f"Previous context: {context}" if context else ""}
TEXT:
{chunk}
Provide a dense 3-5 sentence summary capturing all key information."""
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": system},
{"role": "user", "content": user},
],
temperature=0,
max_tokens=300,
)
return response.choices[0].message.content
def summarize_document(
text: str,
final_length: str = "2-3 paragraphs",
focus: str = "key findings and actionable insights",
) -> dict:
"""
Map-reduce summarization for documents of any length.
Returns chunk summaries and final summary.
"""
chunks = chunk_text(text)
if len(chunks) == 1:
# Short document — single pass is fine
chunk_summaries = [summarize_chunk(chunks[0])]
else:
# Map phase: summarize each chunk
chunk_summaries = []
prev_summary = ""
for i, chunk in enumerate(chunks):
print(f"Summarizing chunk {i+1}/{len(chunks)}...")
summary = summarize_chunk(chunk, context=prev_summary)
chunk_summaries.append(summary)
prev_summary = summary # Pass context to next chunk
# Reduce phase: combine all chunk summaries into final summary
combined_summaries = "\n\n".join(
f"Section {i+1}: {s}" for i, s in enumerate(chunk_summaries)
)
final_response = client.chat.completions.create(
model="gpt-4o", # Use stronger model for final synthesis
messages=[
{
"role": "system",
"content": "You are an expert at synthesizing information. "
"Create coherent, comprehensive summaries.",
},
{
"role": "user",
"content": f"""These are summaries of different sections of a document.
Create a final unified summary of {final_length}.
Focus on: {focus}
SECTION SUMMARIES:
{combined_summaries}""",
}
],
temperature=0.3,
)
return {
"final_summary": final_response.choices[0].message.content,
"chunk_count": len(chunks),
"chunk_summaries": chunk_summaries,
}
# Usage
with open("long_report.txt") as f:
document_text = f.read()
result = summarize_document(document_text, final_length="1 paragraph", focus="risks")
print(result["final_summary"])
Pattern 03
Content Generator with Structured Prompts
from dataclasses import dataclass
@dataclass
class ContentSpec:
topic: str
format: str # "blog_post", "tweet_thread", "linkedin_post", "email"
tone: str # "professional", "casual", "technical", "inspirational"
audience: str
key_points: list
word_count: int = 500
include_cta: bool = False
cta_text: str = ""
FORMAT_INSTRUCTIONS = {
"blog_post": "Write a blog post with a compelling headline, introduction, "
"2-4 subheaded sections, and a conclusion.",
"tweet_thread": "Write a Twitter/X thread. Start with a hook tweet, "
"then 4-6 numbered tweets (1/, 2/, etc.), end with a summary tweet.",
"linkedin_post": "Write a LinkedIn post. Start with a strong first line (hook), "
"use short paragraphs, include 3-5 relevant hashtags at the end.",
"email": "Write a professional email with Subject:, greeting, body paragraphs, "
"and a professional sign-off.",
}
def generate_content(spec: ContentSpec) -> str:
format_instruction = FORMAT_INSTRUCTIONS.get(
spec.format,
f"Write a {spec.format} format piece."
)
key_points_text = "\n".join(f"- {p}" for p in spec.key_points)
cta_instruction = (
f"\n\nInclude a call-to-action: {spec.cta_text}" if spec.include_cta else ""
)
system_prompt = f"""You are an expert content writer specializing in {spec.format} content.
Tone: {spec.tone}
Target Audience: {spec.audience}
{format_instruction}"""
user_prompt = f"""Create content about: {spec.topic}
Key points to cover:
{key_points_text}
Target word count: approximately {spec.word_count} words{cta_instruction}"""
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{"role": "system", "content": system_prompt},
{"role": "user", "content": user_prompt},
],
temperature=0.8, # Higher creativity for content generation
)
return response.choices[0].message.content
# Usage
spec = ContentSpec(
topic="Why developers should learn AI engineering in 2026",
format="linkedin_post",
tone="professional",
audience="software developers",
key_points=[
"AI features are now a standard hiring requirement",
"LLM APIs are as easy to use as any REST API",
"The ROI for developers who learn AI is measurable",
],
word_count=300,
include_cta=True,
cta_text="Check out our free AI Engineering course at prepflix.co.in",
)
content = generate_content(spec)
print(content)
Pattern 04
Text Classifier Returning JSON
import json
from pydantic import BaseModel
from typing import Literal
class ClassificationResult(BaseModel):
label: str
confidence: Literal["high", "medium", "low"]
reasoning: str
class SupportTicketClassifier:
"""
Classifies customer support tickets into categories.
Returns structured JSON with label, confidence, and reasoning.
"""
CATEGORIES = [
"billing",
"technical_issue",
"feature_request",
"account_access",
"general_inquiry",
"complaint",
]
SYSTEM_PROMPT = f"""You are a customer support ticket classifier.
Classify tickets into one of these categories:
{chr(10).join(f"- {c}" for c in CATEGORIES)}
Return JSON with this exact structure:
{{
"label": "",
"confidence": "",
"reasoning": ""
}}
Rules:
- label must be exactly one of the listed categories
- confidence: high if clear match, medium if ambiguous, low if very unclear
- reasoning: brief, factual explanation of why you chose this label"""
def classify(self, ticket_text: str) -> ClassificationResult:
response = client.chat.completions.create(
model="gpt-4o-mini",
messages=[
{"role": "system", "content": self.SYSTEM_PROMPT},
{"role": "user", "content": ticket_text},
],
response_format={"type": "json_object"},
temperature=0,
)
data = json.loads(response.choices[0].message.content)
return ClassificationResult(**data)
def classify_batch(self, tickets: list[str]) -> list[ClassificationResult]:
"""Classify multiple tickets (runs sequentially — use async for parallelism)."""
return [self.classify(t) for t in tickets]
# Usage
classifier = SupportTicketClassifier()
tickets = [
"I was charged twice this month for my subscription",
"The API is returning 500 errors for all my requests since 2pm",
"Would love to see a dark mode option",
"I forgot my password and the reset email isn't arriving",
]
for ticket in tickets:
result = classifier.classify(ticket)
print(f"Ticket: {ticket[:50]}...")
print(f" -> {result.label} ({result.confidence}): {result.reasoning}")
print()
Pattern 05
Code Explainer — Understand Any Code in Plain English
import ast
from typing import Optional
class CodeExplainer:
"""
Explains code at different levels of detail for different audiences.
"""
def explain(
self,
code: str,
language: str = "python",
audience: str = "junior developer",
detail_level: str = "moderate", # "brief", "moderate", "deep"
) -> dict:
"""
Explain code with line-by-line analysis if requested.
Returns explanation, complexity assessment, and potential issues.
"""
detail_instructions = {
"brief": "In 2-3 sentences, explain what this code does.",
"moderate": "Explain what this code does, how it works, and its purpose.",
"deep": "Provide a deep analysis: what the code does, how each part works, "
"algorithmic complexity, potential issues, and suggestions.",
}
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": f"""You are an expert {language} developer explaining code
to a {audience}.
{detail_instructions[detail_level]}
Also identify:
- Time complexity (if relevant)
- Potential bugs or issues
- One improvement suggestion (if any)
Format your response as JSON:
{{
"explanation": "...",
"complexity": "O(...) - explanation" or null,
"potential_issues": ["issue1", "issue2"] or [],
"improvement": "suggestion" or null
}}"""
},
{"role": "user", "content": f"```{language}\n{code}\n```"},
],
response_format={"type": "json_object"},
temperature=0,
)
return json.loads(response.choices[0].message.content)
def explain_line_by_line(self, code: str, language: str = "python") -> list[dict]:
"""
Explain each logical section of code separately.
Best for teaching or code review.
"""
response = client.chat.completions.create(
model="gpt-4o",
messages=[
{
"role": "system",
"content": """Break the code into logical sections and explain each.
Return JSON array:
[
{"lines": "1-3", "code": "the code snippet", "explanation": "what it does"},
...
]"""
},
{"role": "user", "content": f"```{language}\n{code}\n```"},
],
response_format={"type": "json_object"},
temperature=0,
)
data = json.loads(response.choices[0].message.content)
# Response may be {"sections": [...]} or directly [...]
return data.get("sections", data) if isinstance(data, dict) else data
# Usage
explainer = CodeExplainer()
code = """
def quicksort(arr):
if len(arr) <= 1:
return arr
pivot = arr[len(arr) // 2]
left = [x for x in arr if x < pivot]
middle = [x for x in arr if x == pivot]
right = [x for x in arr if x > pivot]
return quicksort(left) + middle + quicksort(right)
"""
result = explainer.explain(code, detail_level="deep")
print(result["explanation"])
print("Complexity:", result["complexity"])
print("Issues:", result["potential_issues"])
Concept 06
FastAPI Integration — All 5 Patterns as REST Endpoints
from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Optional, List
import json
app = FastAPI(title="AI Features API", version="1.0")
# Initialize our feature classes
chatbot_sessions = {} # session_id -> Chatbot instance
classifier = SupportTicketClassifier()
explainer = CodeExplainer()
# --- Pydantic request/response models ---
class ChatRequest(BaseModel):
session_id: str
message: str
system_prompt: Optional[str] = "You are a helpful assistant."
class ChatResponse(BaseModel):
response: str
session_id: str
turn_count: int
class SummarizeRequest(BaseModel):
text: str
final_length: str = "2-3 paragraphs"
focus: str = "key findings"
class SummarizeResponse(BaseModel):
summary: str
chunk_count: int
class ClassifyRequest(BaseModel):
text: str
class ClassifyResponse(BaseModel):
label: str
confidence: str
reasoning: str
class ExplainRequest(BaseModel):
code: str
language: str = "python"
audience: str = "junior developer"
detail_level: str = "moderate"
class ContentRequest(BaseModel):
topic: str
format: str = "blog_post"
tone: str = "professional"
audience: str = "general"
key_points: List[str] = []
word_count: int = 500
# --- Endpoints ---
@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(request: ChatRequest):
"""Stateful chatbot with per-session memory."""
if request.session_id not in chatbot_sessions:
chatbot_sessions[request.session_id] = Chatbot(
system_prompt=request.system_prompt,
)
bot = chatbot_sessions[request.session_id]
response_text = bot.chat(request.message)
return ChatResponse(
response=response_text,
session_id=request.session_id,
turn_count=bot.turn_count,
)
@app.post("/summarize", response_model=SummarizeResponse)
async def summarize_endpoint(request: SummarizeRequest):
"""Summarize long documents using map-reduce."""
if len(request.text) < 10:
raise HTTPException(status_code=400, detail="Text too short")
result = summarize_document(
request.text,
final_length=request.final_length,
focus=request.focus,
)
return SummarizeResponse(
summary=result["final_summary"],
chunk_count=result["chunk_count"],
)
@app.post("/classify", response_model=ClassifyResponse)
async def classify_endpoint(request: ClassifyRequest):
"""Classify support tickets."""
result = classifier.classify(request.text)
return ClassifyResponse(
label=result.label,
confidence=result.confidence,
reasoning=result.reasoning,
)
@app.post("/explain")
async def explain_endpoint(request: ExplainRequest):
"""Explain code in plain English."""
result = explainer.explain(
code=request.code,
language=request.language,
audience=request.audience,
detail_level=request.detail_level,
)
return result
@app.post("/generate-content")
async def generate_content_endpoint(request: ContentRequest):
"""Generate content in various formats."""
spec = ContentSpec(
topic=request.topic,
format=request.format,
tone=request.tone,
audience=request.audience,
key_points=request.key_points,
word_count=request.word_count,
)
content = generate_content(spec)
return {"content": content}
@app.delete("/chat/{session_id}")
async def clear_chat_session(session_id: str):
"""Clear a chat session (start fresh)."""
if session_id in chatbot_sessions:
del chatbot_sessions[session_id]
return {"status": "cleared", "session_id": session_id}
# Run with: uvicorn app:app --reload
Concept 07
Pydantic Models — Your Contract Between Frontend and AI
Pydantic models serve double duty in AI apps: they validate incoming requests and they define the schema for LLM-extracted data. Here's the complete model file for the features above:
from pydantic import BaseModel, Field, field_validator
from typing import Optional, List, Literal
from enum import Enum
class ContentFormat(str, Enum):
BLOG_POST = "blog_post"
TWEET_THREAD = "tweet_thread"
LINKEDIN_POST = "linkedin_post"
EMAIL = "email"
class Tone(str, Enum):
PROFESSIONAL = "professional"
CASUAL = "casual"
TECHNICAL = "technical"
INSPIRATIONAL = "inspirational"
class GenerateRequest(BaseModel):
topic: str = Field(min_length=3, max_length=200)
format: ContentFormat = ContentFormat.BLOG_POST
tone: Tone = Tone.PROFESSIONAL
audience: str = Field(default="general audience", max_length=100)
key_points: List[str] = Field(default=[], max_length=10)
word_count: int = Field(default=500, ge=50, le=5000)
@field_validator("key_points")
@classmethod
def validate_key_points(cls, v):
return [p.strip() for p in v if p.strip()]
class APIResponse(BaseModel):
"""Standard wrapper for all API responses."""
success: bool
data: Optional[dict] = None
error: Optional[str] = None
metadata: Optional[dict] = None
@classmethod
def ok(cls, data: dict, metadata: dict = None):
return cls(success=True, data=data, metadata=metadata)
@classmethod
def fail(cls, error: str):
return cls(success=False, error=error)
| Feature | Pattern | Key Challenge |
|---|---|---|
| Customer chatbot | Sliding window chatbot | Memory management |
| Document Q&A | Map-reduce summarizer | Chunk boundaries |
| Marketing copy | Content generator | Format consistency |
| Email routing | Classifier | Label consistency |
| Dev tools | Code explainer | Code language detection |