Building LLM-Powered Features: The 5 Patterns Every Developer Needs

01Pattern 1: Chatbot with Memory 02Pattern 2: Document Summarizer 03Pattern 3: Content Generator 04Pattern 4: Text Classifier 05Pattern 5: Code Explainer 06FastAPI: All 5 as REST Endpoints 07Pydantic Request/Response Models 08Choosing the Right Pattern

Pattern 01

Chatbot with Sliding Window Memory Management

A stateful chatbot is the most common LLM feature request. The key challenge: managing conversation history so it never overflows the context window, while preserving enough context for coherent responses.

from openai import OpenAI
from typing import Optional
import tiktoken

client = OpenAI()

class Chatbot:
    """
    Production chatbot with sliding window memory management.
    Automatically trims history when approaching context limit.
    """

    def __init__(
        self,
        system_prompt: str,
        model: str = "gpt-4o-mini",
        max_history_tokens: int = 80_000,  # Keep 48k for response
        temperature: float = 0.7,
    ):
        self.system_prompt = system_prompt
        self.model = model
        self.max_history_tokens = max_history_tokens
        self.temperature = temperature
        self.history = []
        self._encoder = tiktoken.get_encoding("cl100k_base")

    def _count_tokens(self, text: str) -> int:
        return len(self._encoder.encode(text))

    def _history_token_count(self) -> int:
        return sum(
            self._count_tokens(msg["content"])
            for msg in self.history
        )

    def _trim_history(self):
        """
        Remove oldest message pairs when approaching token limit.
        Always removes in pairs (user + assistant) to maintain conversation structure.
        """
        while self._history_token_count() > self.max_history_tokens:
            if len(self.history) < 2:
                break
            # Remove the oldest user+assistant pair
            self.history.pop(0)  # Remove oldest user message
            if self.history and self.history[0]["role"] == "assistant":
                self.history.pop(0)  # Remove its response

    def chat(self, user_message: str) -> str:
        """Send a message and get a response. History is maintained automatically."""

        # Add user message to history
        self.history.append({"role": "user", "content": user_message})

        # Trim if needed
        self._trim_history()

        # Build full messages list
        messages = [
            {"role": "system", "content": self.system_prompt}
        ] + self.history

        response = client.chat.completions.create(
            model=self.model,
            messages=messages,
            temperature=self.temperature,
        )

        assistant_message = response.choices[0].message.content

        # Add assistant response to history
        self.history.append({"role": "assistant", "content": assistant_message})

        return assistant_message

    def reset(self):
        """Clear conversation history (start a new session)."""
        self.history = []

    @property
    def turn_count(self) -> int:
        """Number of complete conversation turns."""
        return len([m for m in self.history if m["role"] == "user"])


# Usage
bot = Chatbot(
    system_prompt="You are a helpful Python tutor. "
                  "Give short, practical answers. Use code examples.",
    temperature=0.7,
)

print(bot.chat("What is a decorator?"))
print(bot.chat("Show me a practical example"))
print(bot.chat("How is this different from a class?"))
print(f"Turns so far: {bot.turn_count}")

Pattern 02

Long-Document Summarizer with Map-Reduce

A single LLM call can't handle a 200-page document. The map-reduce pattern: split the document into chunks, summarize each chunk independently (map), then summarize all the chunk summaries into a final summary (reduce).

from typing import List
import textwrap

def chunk_text(text: str, chunk_size: int = 3000, overlap: int = 200) -> List[str]:
    """
    Split text into overlapping chunks.
    Overlap ensures context isn't lost at chunk boundaries.
    """
    words = text.split()
    chunks = []
    step = chunk_size - overlap

    for i in range(0, len(words), step):
        chunk_words = words[i:i + chunk_size]
        if chunk_words:
            chunks.append(" ".join(chunk_words))

    return chunks

def summarize_chunk(chunk: str, context: str = "") -> str:
    """Summarize a single chunk. Context helps with continuity."""
    system = "You are an expert summarizer. Create concise, information-dense summaries."
    user = f"""Summarize the following text section.
{f"Previous context: {context}" if context else ""}

TEXT:
{chunk}

Provide a dense 3-5 sentence summary capturing all key information."""

    response = client.chat.completions.create(
        model="gpt-4o-mini",
        messages=[
            {"role": "system", "content": system},
            {"role": "user", "content": user},
        ],
        temperature=0,
        max_tokens=300,
    )
    return response.choices[0].message.content

def summarize_document(
    text: str,
    final_length: str = "2-3 paragraphs",
    focus: str = "key findings and actionable insights",
) -> dict:
    """
    Map-reduce summarization for documents of any length.
    Returns chunk summaries and final summary.
    """
    chunks = chunk_text(text)

    if len(chunks) == 1:
        # Short document — single pass is fine
        chunk_summaries = [summarize_chunk(chunks[0])]
    else:
        # Map phase: summarize each chunk
        chunk_summaries = []
        prev_summary = ""
        for i, chunk in enumerate(chunks):
            print(f"Summarizing chunk {i+1}/{len(chunks)}...")
            summary = summarize_chunk(chunk, context=prev_summary)
            chunk_summaries.append(summary)
            prev_summary = summary  # Pass context to next chunk

    # Reduce phase: combine all chunk summaries into final summary
    combined_summaries = "\n\n".join(
        f"Section {i+1}: {s}" for i, s in enumerate(chunk_summaries)
    )

    final_response = client.chat.completions.create(
        model="gpt-4o",  # Use stronger model for final synthesis
        messages=[
            {
                "role": "system",
                "content": "You are an expert at synthesizing information. "
                           "Create coherent, comprehensive summaries.",
            },
            {
                "role": "user",
                "content": f"""These are summaries of different sections of a document.
Create a final unified summary of {final_length}.
Focus on: {focus}

SECTION SUMMARIES:
{combined_summaries}""",
            }
        ],
        temperature=0.3,
    )

    return {
        "final_summary": final_response.choices[0].message.content,
        "chunk_count": len(chunks),
        "chunk_summaries": chunk_summaries,
    }

# Usage
with open("long_report.txt") as f:
    document_text = f.read()

result = summarize_document(document_text, final_length="1 paragraph", focus="risks")
print(result["final_summary"])

Pattern 03

Content Generator with Structured Prompts

from dataclasses import dataclass

@dataclass
class ContentSpec:
    topic: str
    format: str  # "blog_post", "tweet_thread", "linkedin_post", "email"
    tone: str    # "professional", "casual", "technical", "inspirational"
    audience: str
    key_points: list
    word_count: int = 500
    include_cta: bool = False
    cta_text: str = ""

FORMAT_INSTRUCTIONS = {
    "blog_post": "Write a blog post with a compelling headline, introduction, "
                 "2-4 subheaded sections, and a conclusion.",
    "tweet_thread": "Write a Twitter/X thread. Start with a hook tweet, "
                    "then 4-6 numbered tweets (1/, 2/, etc.), end with a summary tweet.",
    "linkedin_post": "Write a LinkedIn post. Start with a strong first line (hook), "
                     "use short paragraphs, include 3-5 relevant hashtags at the end.",
    "email": "Write a professional email with Subject:, greeting, body paragraphs, "
             "and a professional sign-off.",
}

def generate_content(spec: ContentSpec) -> str:
    format_instruction = FORMAT_INSTRUCTIONS.get(
        spec.format,
        f"Write a {spec.format} format piece."
    )

    key_points_text = "\n".join(f"- {p}" for p in spec.key_points)
    cta_instruction = (
        f"\n\nInclude a call-to-action: {spec.cta_text}" if spec.include_cta else ""
    )

    system_prompt = f"""You are an expert content writer specializing in {spec.format} content.
Tone: {spec.tone}
Target Audience: {spec.audience}
{format_instruction}"""

    user_prompt = f"""Create content about: {spec.topic}

Key points to cover:
{key_points_text}

Target word count: approximately {spec.word_count} words{cta_instruction}"""

    response = client.chat.completions.create(
        model="gpt-4o",
        messages=[
            {"role": "system", "content": system_prompt},
            {"role": "user", "content": user_prompt},
        ],
        temperature=0.8,  # Higher creativity for content generation
    )
    return response.choices[0].message.content

# Usage
spec = ContentSpec(
    topic="Why developers should learn AI engineering in 2026",
    format="linkedin_post",
    tone="professional",
    audience="software developers",
    key_points=[
        "AI features are now a standard hiring requirement",
        "LLM APIs are as easy to use as any REST API",
        "The ROI for developers who learn AI is measurable",
    ],
    word_count=300,
    include_cta=True,
    cta_text="Check out our free AI Engineering course at prepflix.co.in",
)

content = generate_content(spec)
print(content)

Pattern 04

Text Classifier Returning JSON

import json
from pydantic import BaseModel
from typing import Literal

class ClassificationResult(BaseModel):
    label: str
    confidence: Literal["high", "medium", "low"]
    reasoning: str

class SupportTicketClassifier:
    """
    Classifies customer support tickets into categories.
    Returns structured JSON with label, confidence, and reasoning.
    """

    CATEGORIES = [
        "billing",
        "technical_issue",
        "feature_request",
        "account_access",
        "general_inquiry",
        "complaint",
    ]

    SYSTEM_PROMPT = f"""You are a customer support ticket classifier.

Classify tickets into one of these categories:
{chr(10).join(f"- {c}" for c in CATEGORIES)}

Return JSON with this exact structure:
{{
  "label": "",
  "confidence": "",
  "reasoning": ""
}}

Rules:
- label must be exactly one of the listed categories
- confidence: high if clear match, medium if ambiguous, low if very unclear
- reasoning: brief, factual explanation of why you chose this label"""

    def classify(self, ticket_text: str) -> ClassificationResult:
        response = client.chat.completions.create(
            model="gpt-4o-mini",
            messages=[
                {"role": "system", "content": self.SYSTEM_PROMPT},
                {"role": "user", "content": ticket_text},
            ],
            response_format={"type": "json_object"},
            temperature=0,
        )

        data = json.loads(response.choices[0].message.content)
        return ClassificationResult(**data)

    def classify_batch(self, tickets: list[str]) -> list[ClassificationResult]:
        """Classify multiple tickets (runs sequentially — use async for parallelism)."""
        return [self.classify(t) for t in tickets]

# Usage
classifier = SupportTicketClassifier()

tickets = [
    "I was charged twice this month for my subscription",
    "The API is returning 500 errors for all my requests since 2pm",
    "Would love to see a dark mode option",
    "I forgot my password and the reset email isn't arriving",
]

for ticket in tickets:
    result = classifier.classify(ticket)
    print(f"Ticket: {ticket[:50]}...")
    print(f"  -> {result.label} ({result.confidence}): {result.reasoning}")
    print()

Pattern 05

Code Explainer — Understand Any Code in Plain English

import ast
from typing import Optional

class CodeExplainer:
    """
    Explains code at different levels of detail for different audiences.
    """

    def explain(
        self,
        code: str,
        language: str = "python",
        audience: str = "junior developer",
        detail_level: str = "moderate",  # "brief", "moderate", "deep"
    ) -> dict:
        """
        Explain code with line-by-line analysis if requested.
        Returns explanation, complexity assessment, and potential issues.
        """
        detail_instructions = {
            "brief": "In 2-3 sentences, explain what this code does.",
            "moderate": "Explain what this code does, how it works, and its purpose.",
            "deep": "Provide a deep analysis: what the code does, how each part works, "
                    "algorithmic complexity, potential issues, and suggestions.",
        }

        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system",
                    "content": f"""You are an expert {language} developer explaining code
to a {audience}.
{detail_instructions[detail_level]}

Also identify:
- Time complexity (if relevant)
- Potential bugs or issues
- One improvement suggestion (if any)

Format your response as JSON:
{{
  "explanation": "...",
  "complexity": "O(...) - explanation" or null,
  "potential_issues": ["issue1", "issue2"] or [],
  "improvement": "suggestion" or null
}}"""
                },
                {"role": "user", "content": f"```{language}\n{code}\n```"},
            ],
            response_format={"type": "json_object"},
            temperature=0,
        )

        return json.loads(response.choices[0].message.content)

    def explain_line_by_line(self, code: str, language: str = "python") -> list[dict]:
        """
        Explain each logical section of code separately.
        Best for teaching or code review.
        """
        response = client.chat.completions.create(
            model="gpt-4o",
            messages=[
                {
                    "role": "system",
                    "content": """Break the code into logical sections and explain each.
Return JSON array:
[
  {"lines": "1-3", "code": "the code snippet", "explanation": "what it does"},
  ...
]"""
                },
                {"role": "user", "content": f"```{language}\n{code}\n```"},
            ],
            response_format={"type": "json_object"},
            temperature=0,
        )

        data = json.loads(response.choices[0].message.content)
        # Response may be {"sections": [...]} or directly [...]
        return data.get("sections", data) if isinstance(data, dict) else data


# Usage
explainer = CodeExplainer()

code = """
def quicksort(arr):
    if len(arr) <= 1:
        return arr
    pivot = arr[len(arr) // 2]
    left = [x for x in arr if x < pivot]
    middle = [x for x in arr if x == pivot]
    right = [x for x in arr if x > pivot]
    return quicksort(left) + middle + quicksort(right)
"""

result = explainer.explain(code, detail_level="deep")
print(result["explanation"])
print("Complexity:", result["complexity"])
print("Issues:", result["potential_issues"])

Concept 06

FastAPI Integration — All 5 Patterns as REST Endpoints

from fastapi import FastAPI, HTTPException
from fastapi.responses import StreamingResponse
from pydantic import BaseModel
from typing import Optional, List
import json

app = FastAPI(title="AI Features API", version="1.0")

# Initialize our feature classes
chatbot_sessions = {}  # session_id -> Chatbot instance
classifier = SupportTicketClassifier()
explainer = CodeExplainer()

# --- Pydantic request/response models ---

class ChatRequest(BaseModel):
    session_id: str
    message: str
    system_prompt: Optional[str] = "You are a helpful assistant."

class ChatResponse(BaseModel):
    response: str
    session_id: str
    turn_count: int

class SummarizeRequest(BaseModel):
    text: str
    final_length: str = "2-3 paragraphs"
    focus: str = "key findings"

class SummarizeResponse(BaseModel):
    summary: str
    chunk_count: int

class ClassifyRequest(BaseModel):
    text: str

class ClassifyResponse(BaseModel):
    label: str
    confidence: str
    reasoning: str

class ExplainRequest(BaseModel):
    code: str
    language: str = "python"
    audience: str = "junior developer"
    detail_level: str = "moderate"

class ContentRequest(BaseModel):
    topic: str
    format: str = "blog_post"
    tone: str = "professional"
    audience: str = "general"
    key_points: List[str] = []
    word_count: int = 500

# --- Endpoints ---

@app.post("/chat", response_model=ChatResponse)
async def chat_endpoint(request: ChatRequest):
    """Stateful chatbot with per-session memory."""
    if request.session_id not in chatbot_sessions:
        chatbot_sessions[request.session_id] = Chatbot(
            system_prompt=request.system_prompt,
        )

    bot = chatbot_sessions[request.session_id]
    response_text = bot.chat(request.message)

    return ChatResponse(
        response=response_text,
        session_id=request.session_id,
        turn_count=bot.turn_count,
    )

@app.post("/summarize", response_model=SummarizeResponse)
async def summarize_endpoint(request: SummarizeRequest):
    """Summarize long documents using map-reduce."""
    if len(request.text) < 10:
        raise HTTPException(status_code=400, detail="Text too short")

    result = summarize_document(
        request.text,
        final_length=request.final_length,
        focus=request.focus,
    )
    return SummarizeResponse(
        summary=result["final_summary"],
        chunk_count=result["chunk_count"],
    )

@app.post("/classify", response_model=ClassifyResponse)
async def classify_endpoint(request: ClassifyRequest):
    """Classify support tickets."""
    result = classifier.classify(request.text)
    return ClassifyResponse(
        label=result.label,
        confidence=result.confidence,
        reasoning=result.reasoning,
    )

@app.post("/explain")
async def explain_endpoint(request: ExplainRequest):
    """Explain code in plain English."""
    result = explainer.explain(
        code=request.code,
        language=request.language,
        audience=request.audience,
        detail_level=request.detail_level,
    )
    return result

@app.post("/generate-content")
async def generate_content_endpoint(request: ContentRequest):
    """Generate content in various formats."""
    spec = ContentSpec(
        topic=request.topic,
        format=request.format,
        tone=request.tone,
        audience=request.audience,
        key_points=request.key_points,
        word_count=request.word_count,
    )
    content = generate_content(spec)
    return {"content": content}

@app.delete("/chat/{session_id}")
async def clear_chat_session(session_id: str):
    """Clear a chat session (start fresh)."""
    if session_id in chatbot_sessions:
        del chatbot_sessions[session_id]
    return {"status": "cleared", "session_id": session_id}

# Run with: uvicorn app:app --reload

Concept 07

Pydantic Models — Your Contract Between Frontend and AI

Pydantic models serve double duty in AI apps: they validate incoming requests and they define the schema for LLM-extracted data. Here's the complete model file for the features above:

from pydantic import BaseModel, Field, field_validator
from typing import Optional, List, Literal
from enum import Enum

class ContentFormat(str, Enum):
    BLOG_POST = "blog_post"
    TWEET_THREAD = "tweet_thread"
    LINKEDIN_POST = "linkedin_post"
    EMAIL = "email"

class Tone(str, Enum):
    PROFESSIONAL = "professional"
    CASUAL = "casual"
    TECHNICAL = "technical"
    INSPIRATIONAL = "inspirational"

class GenerateRequest(BaseModel):
    topic: str = Field(min_length=3, max_length=200)
    format: ContentFormat = ContentFormat.BLOG_POST
    tone: Tone = Tone.PROFESSIONAL
    audience: str = Field(default="general audience", max_length=100)
    key_points: List[str] = Field(default=[], max_length=10)
    word_count: int = Field(default=500, ge=50, le=5000)

    @field_validator("key_points")
    @classmethod
    def validate_key_points(cls, v):
        return [p.strip() for p in v if p.strip()]

class APIResponse(BaseModel):
    """Standard wrapper for all API responses."""
    success: bool
    data: Optional[dict] = None
    error: Optional[str] = None
    metadata: Optional[dict] = None

    @classmethod
    def ok(cls, data: dict, metadata: dict = None):
        return cls(success=True, data=data, metadata=metadata)

    @classmethod
    def fail(cls, error: str):
        return cls(success=False, error=error)

Pattern Selection Guide

Feature	Pattern	Key Challenge
Customer chatbot	Sliding window chatbot	Memory management
Document Q&A	Map-reduce summarizer	Chunk boundaries
Marketing copy	Content generator	Format consistency
Email routing	Classifier	Label consistency
Dev tools	Code explainer	Code language detection

Building LLM-Powered Features: The 5 Patterns Every Developer Needs
Chatbots, summarizers, classifiers — real code, ready to ship

Table of Contents