AI Integration Guide
Overview
The framework includes an AI layer (ai/) that uses the Anthropic Claude API to augment test automation with three capabilities:
| Module | Class | Purpose |
|---|---|---|
ai/assertion_generator.py | AssertionGenerator | Generate test assertions from a page screenshot |
ai/selector_healer.py | SelectorHealer | Repair broken CSS selectors using current page HTML |
ai/visual_analyzer.py | VisualAnalyzer | Compare a live screenshot against a Figma design reference |
All three are currently stub implementations (safe no-ops) — they compile, return typed results, and pass tests, but do not yet call the API. This guide documents how to implement and use each one.
Prerequisites
1. Anthropic API key
# Add to your .env file
ANTHROPIC_API_KEY=sk-ant-...
2. Verify the SDK is installed
python -c "import anthropic; print(anthropic.__version__)"
Module 1: AssertionGenerator
File: ai/assertion_generator.py
What it does
Takes a PNG screenshot (as bytes) and asks Claude to describe what assertions a test engineer should make about the visible UI state.
Current interface
from ai.assertion_generator import AssertionGenerator
gen = AssertionGenerator()
screenshot_bytes: bytes = page.screenshot(full_page=True)
suggestions: list[str] = gen.generate_from_screenshot(screenshot_bytes)
# Currently returns [] — will return strings like:
# ["Assert that the heading 'Sports' is visible",
# "Assert that at least 3 event cards are displayed",
# "Assert that the bottom navigation bar is present"]
How to implement
# ai/assertion_generator.py
import base64
import anthropic
class AssertionGenerator:
def __init__(self, model: str = "claude-opus-4-6", max_tokens: int = 1024) -> None:
self._client = anthropic.Anthropic() # reads ANTHROPIC_API_KEY from env
self._model = model
self._max_tokens = max_tokens
def generate_from_screenshot(self, screenshot_bytes: bytes) -> list[str]:
image_b64 = base64.standard_b64encode(screenshot_bytes).decode()
message = self._client.messages.create(
model=self._model,
max_tokens=self._max_tokens,
messages=[
{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": image_b64,
},
},
{
"type": "text",
"text": (
"You are a QA engineer reviewing a mobile web app screenshot. "
"List the most important assertions a Playwright test should make "
"about the current UI state. Return one assertion per line, "
"starting each with 'Assert that'. Be specific and concise."
),
},
],
}
],
)
raw = message.content[0].text
return [line.strip() for line in raw.splitlines() if line.strip()]
Use in a test
@pytest.mark.visual
@pytest.mark.ai
def test_sports_page_ai_assertions(
page: Page,
settings: Settings,
assertion_generator: AssertionGenerator,
) -> None:
sports = SportsPage(page, settings)
sports.load()
screenshot = page.screenshot(full_page=True)
suggestions = assertion_generator.generate_from_screenshot(screenshot)
# Log suggestions for engineer review — do not auto-assert yet
for s in suggestions:
print(f" AI: {s}")
assert isinstance(suggestions, list)
Module 2: SelectorHealer
File: ai/selector_healer.py
What it does
When a selector breaks (element not found), the healer takes the broken selector and the current page HTML, asks Claude to find the equivalent element in the new DOM, and returns a replacement selector.
Current interface
from ai.selector_healer import SelectorHealer
healer = SelectorHealer()
page_html: str = page.content()
fixed: str = healer.heal(
broken_selector="[data-testid='sports-feed']",
page_html=page_html,
)
# Currently returns the original selector — will return a working replacement
How to implement
# ai/selector_healer.py
import anthropic
class SelectorHealer:
def __init__(self, model: str = "claude-sonnet-4-6", max_tokens: int = 512) -> None:
self._client = anthropic.Anthropic()
self._model = model
self._max_tokens = max_tokens
def heal(self, broken_selector: str, page_html: str) -> str:
# Truncate HTML to stay within token limits — keep the most relevant part
html_snippet = page_html[:8_000]
message = self._client.messages.create(
model=self._model,
max_tokens=self._max_tokens,
messages=[
{
"role": "user",
"content": (
f"A Playwright selector has stopped working: `{broken_selector}`\n\n"
f"Here is the current page HTML (truncated):\n```html\n{html_snippet}\n```\n\n"
"Suggest a single replacement CSS selector that targets the same element. "
"Prefer data-testid attributes. Respond with only the selector string, nothing else."
),
}
],
)
return message.content[0].text.strip().strip("`")
Integration with BasePage (future pattern)
# In core/base_page.py — enhanced _retry() with healing
def _retry_with_healing(self, selector: str, action, ...) -> None:
try:
action()
except PWTimeoutError:
healed = self._healer.heal(selector, self._page.content())
logger.warning(f"Selector '{selector}' failed — healed to '{healed}'")
# retry with healed selector
Module 3: VisualAnalyzer
File: ai/visual_analyzer.py
What it does
Takes two images — a live browser screenshot and a Figma design reference — and asks Claude to identify visual differences. Returns a structured diff report.
Current interface
from ai.visual_analyzer import VisualAnalyzer, VisualDiff
analyzer = VisualAnalyzer()
live_screenshot: bytes = page.screenshot()
figma_reference: bytes = open("references/splash_screen.png", "rb").read()
diff: VisualDiff = analyzer.compare_with_design(live_screenshot, figma_reference)
# diff = {
# "match": True,
# "differences": [],
# "confidence": 1.0,
# }
How to implement
# ai/visual_analyzer.py
import base64
from typing import TypedDict
import anthropic
class VisualDiff(TypedDict):
match: bool
differences: list[str]
confidence: float
class VisualAnalyzer:
def __init__(self, model: str = "claude-opus-4-6", max_tokens: int = 2048) -> None:
self._client = anthropic.Anthropic()
self._model = model
self._max_tokens = max_tokens
def compare_with_design(
self,
screenshot_bytes: bytes,
figma_image_bytes: bytes,
) -> VisualDiff:
def to_b64(b: bytes) -> str:
return base64.standard_b64encode(b).decode()
message = self._client.messages.create(
model=self._model,
max_tokens=self._max_tokens,
messages=[
{
"role": "user",
"content": [
{"type": "text", "text": "Image 1 is the Figma design reference:"},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": to_b64(figma_image_bytes),
},
},
{"type": "text", "text": "Image 2 is the live browser screenshot:"},
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": to_b64(screenshot_bytes),
},
},
{
"type": "text",
"text": (
"Compare these two UI images. List every visual difference you see "
"(layout, colors, text, spacing, missing elements). "
"If they match closely, say so. "
"Format your response as:\n"
"MATCH: yes/no\n"
"CONFIDENCE: 0.0-1.0\n"
"DIFFERENCES:\n- difference 1\n- difference 2"
),
},
],
}
],
)
return self._parse_response(message.content[0].text)
def _parse_response(self, text: str) -> VisualDiff:
lines = text.splitlines()
match = True
confidence = 1.0
differences: list[str] = []
for line in lines:
if line.startswith("MATCH:"):
match = "yes" in line.lower()
elif line.startswith("CONFIDENCE:"):
try:
confidence = float(line.split(":")[1].strip())
except ValueError:
pass
elif line.startswith("- "):
differences.append(line[2:].strip())
return VisualDiff(match=match, differences=differences, confidence=confidence)
Use in a visual test
@pytest.mark.visual
@pytest.mark.ai
def test_splash_matches_figma_design(
mobile_page: Page,
settings: Settings,
) -> None:
mobile_page.goto(settings.base_url)
mobile_page.wait_for_load_state("domcontentloaded")
live_screenshot = mobile_page.screenshot()
figma_reference = Path("references/splash_screen.png").read_bytes()
analyzer = VisualAnalyzer()
diff = analyzer.compare_with_design(live_screenshot, figma_reference)
if not diff["match"]:
for d in diff["differences"]:
print(f" DIFF: {d}")
assert diff["match"], (
f"Visual regression detected (confidence={diff['confidence']:.2f}): "
+ "; ".join(diff["differences"])
)
Figma Reference Images
Store reference screenshots in references/ (not committed — too large for git).
references/
├── splash_screen.png ← exported from Figma node 837:22442
├── sports_feed_mobile.png
├── bottom_nav.png
└── login_form_mobile.png
To export from Figma: right-click node → Export → PNG → 2x scale.
Cost & Rate Limiting
| Model | Cost (approx) | Use case |
|---|---|---|
claude-opus-4-6 | Higher | Visual analysis, assertion generation (complex vision tasks) |
claude-sonnet-4-6 | Medium | Selector healing (text-only, lower stakes) |
Run AI tests on-demand only (pytest -m ai), not in every CI run.
Environment Variables for AI Layer
# .env
ANTHROPIC_API_KEY=sk-ant-...
The anthropic SDK reads ANTHROPIC_API_KEY automatically from the environment — no code change needed.