Skip to main content

AI Integration Guide

Overview

The framework includes an AI layer (ai/) that uses the Anthropic Claude API to augment test automation with three capabilities:

ModuleClassPurpose
ai/assertion_generator.pyAssertionGeneratorGenerate test assertions from a page screenshot
ai/selector_healer.pySelectorHealerRepair broken CSS selectors using current page HTML
ai/visual_analyzer.pyVisualAnalyzerCompare a live screenshot against a Figma design reference

All three are currently stub implementations (safe no-ops) — they compile, return typed results, and pass tests, but do not yet call the API. This guide documents how to implement and use each one.


Prerequisites

1. Anthropic API key

# Add to your .env file
ANTHROPIC_API_KEY=sk-ant-...

2. Verify the SDK is installed

python -c "import anthropic; print(anthropic.__version__)"

Module 1: AssertionGenerator

File: ai/assertion_generator.py

What it does

Takes a PNG screenshot (as bytes) and asks Claude to describe what assertions a test engineer should make about the visible UI state.

Current interface

from ai.assertion_generator import AssertionGenerator

gen = AssertionGenerator()
screenshot_bytes: bytes = page.screenshot(full_page=True)
suggestions: list[str] = gen.generate_from_screenshot(screenshot_bytes)
# Currently returns [] — will return strings like:
# ["Assert that the heading 'Sports' is visible",
# "Assert that at least 3 event cards are displayed",
# "Assert that the bottom navigation bar is present"]

How to implement

# ai/assertion_generator.py
import base64
import anthropic

class AssertionGenerator:
def __init__(self, model: str = "claude-opus-4-6", max_tokens: int = 1024) -> None:
self._client = anthropic.Anthropic() # reads ANTHROPIC_API_KEY from env
self._model = model
self._max_tokens = max_tokens

def generate_from_screenshot(self, screenshot_bytes: bytes) -> list[str]:
image_b64 = base64.standard_b64encode(screenshot_bytes).decode()

message = self._client.messages.create(
model=self._model,
max_tokens=self._max_tokens,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": image_b64,
},
},
{
"type": "text",
"text": (
"You are a QA engineer reviewing a mobile web app screenshot. "
"List the most important assertions a Playwright test should make "
"about the current UI state. Return one assertion per line, "
"starting each with 'Assert that'. Be specific and concise."
),
},
],
}
],
)

raw = message.content[0].text
return [line.strip() for line in raw.splitlines() if line.strip()]

Use in a test

@pytest.mark.visual
@pytest.mark.ai
def test_sports_page_ai_assertions(
page: Page,
settings: Settings,
assertion_generator: AssertionGenerator,
) -> None:
sports = SportsPage(page, settings)
sports.load()

screenshot = page.screenshot(full_page=True)
suggestions = assertion_generator.generate_from_screenshot(screenshot)

# Log suggestions for engineer review — do not auto-assert yet
for s in suggestions:
print(f" AI: {s}")

assert isinstance(suggestions, list)

Module 2: SelectorHealer

File: ai/selector_healer.py

What it does

When a selector breaks (element not found), the healer takes the broken selector and the current page HTML, asks Claude to find the equivalent element in the new DOM, and returns a replacement selector.

Current interface

from ai.selector_healer import SelectorHealer

healer = SelectorHealer()
page_html: str = page.content()
fixed: str = healer.heal(
broken_selector="[data-testid='sports-feed']",
page_html=page_html,
)
# Currently returns the original selector — will return a working replacement

How to implement

# ai/selector_healer.py
import anthropic

class SelectorHealer:
def __init__(self, model: str = "claude-sonnet-4-6", max_tokens: int = 512) -> None:
self._client = anthropic.Anthropic()
self._model = model
self._max_tokens = max_tokens

def heal(self, broken_selector: str, page_html: str) -> str:
# Truncate HTML to stay within token limits — keep the most relevant part
html_snippet = page_html[:8_000]

message = self._client.messages.create(
model=self._model,
max_tokens=self._max_tokens,
messages=[
{
"role": "user",
"content": (
f"A Playwright selector has stopped working: `{broken_selector}`\n\n"
f"Here is the current page HTML (truncated):\n```html\n{html_snippet}\n```\n\n"
"Suggest a single replacement CSS selector that targets the same element. "
"Prefer data-testid attributes. Respond with only the selector string, nothing else."
),
}
],
)

return message.content[0].text.strip().strip("`")

Integration with BasePage (future pattern)

# In core/base_page.py — enhanced _retry() with healing
def _retry_with_healing(self, selector: str, action, ...) -> None:
try:
action()
except PWTimeoutError:
healed = self._healer.heal(selector, self._page.content())
logger.warning(f"Selector '{selector}' failed — healed to '{healed}'")
# retry with healed selector

Module 3: VisualAnalyzer

File: ai/visual_analyzer.py

What it does

Takes two images — a live browser screenshot and a Figma design reference — and asks Claude to identify visual differences. Returns a structured diff report.

Current interface

from ai.visual_analyzer import VisualAnalyzer, VisualDiff

analyzer = VisualAnalyzer()
live_screenshot: bytes = page.screenshot()
figma_reference: bytes = open("references/splash_screen.png", "rb").read()

diff: VisualDiff = analyzer.compare_with_design(live_screenshot, figma_reference)
# diff = {
# "match": True,
# "differences": [],
# "confidence": 1.0,
# }

How to implement

# ai/visual_analyzer.py
import base64
from typing import TypedDict
import anthropic


class VisualDiff(TypedDict):
match: bool
differences: list[str]
confidence: float


class VisualAnalyzer:
def __init__(self, model: str = "claude-opus-4-6", max_tokens: int = 2048) -> None:
self._client = anthropic.Anthropic()
self._model = model
self._max_tokens = max_tokens

def compare_with_design(
self,
screenshot_bytes: bytes,
figma_image_bytes: bytes,
) -> VisualDiff:
def to_b64(b: bytes) -> str:
return base64.standard_b64encode(b).decode()

message = self._client.messages.create(
model=self._model,
max_tokens=self._max_tokens,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Image 1 is the Figma design reference:"},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": to_b64(figma_image_bytes),
},
},
{"type": "text", "text": "Image 2 is the live browser screenshot:"},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": to_b64(screenshot_bytes),
},
},
{
"type": "text",
"text": (
"Compare these two UI images. List every visual difference you see "
"(layout, colors, text, spacing, missing elements). "
"If they match closely, say so. "
"Format your response as:\n"
"MATCH: yes/no\n"
"CONFIDENCE: 0.0-1.0\n"
"DIFFERENCES:\n- difference 1\n- difference 2"
),
},
],
}
],
)

return self._parse_response(message.content[0].text)

def _parse_response(self, text: str) -> VisualDiff:
lines = text.splitlines()
match = True
confidence = 1.0
differences: list[str] = []

for line in lines:
if line.startswith("MATCH:"):
match = "yes" in line.lower()
elif line.startswith("CONFIDENCE:"):
try:
confidence = float(line.split(":")[1].strip())
except ValueError:
pass
elif line.startswith("- "):
differences.append(line[2:].strip())

return VisualDiff(match=match, differences=differences, confidence=confidence)

Use in a visual test

@pytest.mark.visual
@pytest.mark.ai
def test_splash_matches_figma_design(
mobile_page: Page,
settings: Settings,
) -> None:
mobile_page.goto(settings.base_url)
mobile_page.wait_for_load_state("domcontentloaded")

live_screenshot = mobile_page.screenshot()
figma_reference = Path("references/splash_screen.png").read_bytes()

analyzer = VisualAnalyzer()
diff = analyzer.compare_with_design(live_screenshot, figma_reference)

if not diff["match"]:
for d in diff["differences"]:
print(f" DIFF: {d}")

assert diff["match"], (
f"Visual regression detected (confidence={diff['confidence']:.2f}): "
+ "; ".join(diff["differences"])
)

Figma Reference Images

Store reference screenshots in references/ (not committed — too large for git).

references/
├── splash_screen.png ← exported from Figma node 837:22442
├── sports_feed_mobile.png
├── bottom_nav.png
└── login_form_mobile.png

To export from Figma: right-click node → Export → PNG → 2x scale.


Cost & Rate Limiting

ModelCost (approx)Use case
claude-opus-4-6HigherVisual analysis, assertion generation (complex vision tasks)
claude-sonnet-4-6MediumSelector healing (text-only, lower stakes)

Run AI tests on-demand only (pytest -m ai), not in every CI run.


Environment Variables for AI Layer

# .env
ANTHROPIC_API_KEY=sk-ant-...

The anthropic SDK reads ANTHROPIC_API_KEY automatically from the environment — no code change needed.