The Roadmap for Mastering LLMOps in 2026

# llm_with_tracing.py

# Function: A production-ready LLM name wrapper with full observability.

# Each name is traced in Langfuse: enter, output, tokens, value, latency.

# Stipulations:

# pip set up langfuse anthropic python-dotenv

# Setup:

# 1. Create a free account at https://cloud.langfuse.com

# 2. Get your keys from Settings > API Keys

# 3. Create a .env file with the variables beneath

# Run:

# python llm_with_tracing.py

import os

import time

from dotenv import load_dotenv

import anthropic

from langfuse import Langfuse

# Load surroundings variables from .env file

load_dotenv()

# Required surroundings variables in your .env:

# LANGFUSE_PUBLIC_KEY=pk-lf-…

# LANGFUSE_SECRET_KEY=sk-lf-…

# LANGFUSE_HOST=https://cloud.langfuse.com (or your self-hosted URL)

# ANTHROPIC_API_KEY=sk-ant-…

# Initialize purchasers

langfuse_client = Langfuse() # Reads keys mechanically from surroundings

anthropic_client = anthropic.Anthropic() # Reads ANTHROPIC_API_KEY from surroundings

# ── Configuration ─────────────────────────────────────────────────────────────

# Retailer your immediate right here, not inline within the API name.

# This makes it versionable and testable independently.

SYSTEM_PROMPT = “”“You’re a useful buyer help assistant.

Reply questions clearly and concisely.

Should you have no idea one thing, say so immediately — don’t guess.”“”

MODEL = “claude-sonnet-4-20250514”

# Anthropic’s pricing as of mid-2026 (replace when pricing modifications)

# Used to calculate value per name for value monitoring

COST_PER_INPUT_TOKEN = 3.00 / 1_000_000 # $3.00 per million enter tokens

COST_PER_OUTPUT_TOKEN = 15.00 / 1_000_000 # $15.00 per million output tokens

def call_llm_with_tracing(

user_message: str,

session_id: str = “default-session”,

user_id: str = “nameless”

) -> str:

“”“

Make a traced LLM name. Each name creates a Langfuse hint with:

– Full enter and output

– Token utilization (enter, output, complete)

– Calculated value in USD

– Latency in milliseconds

– Mannequin used and session context

Parameters:

user_message : The message from the person

session_id : Teams associated calls into one dialog in Langfuse

user_id : Associates the decision with a selected person for analytics

Returns:

The LLM response as a string

““”

# Create a top-level hint for this person interplay

# The hint seems within the Langfuse dashboard as one unit of labor

hint = langfuse_client.hint(

title=“customer-support-call”,

session_id=session_id,

user_id=user_id,

enter={“user_message”: user_message, “system_prompt”: SYSTEM_PROMPT}

)

# Create a era span contained in the hint

# This captures model-specific particulars: mannequin title, tokens, value

era = hint.era(

title=“claude-completion”,

mannequin=MODEL,

enter={

“system”: SYSTEM_PROMPT,

“messages”: [{“role”: “user”, “content”: user_message}]

}

)

start_time = time.time()

attempt:

# Make the API name

response = anthropic_client.messages.create(

mannequin=MODEL,

max_tokens=1024,

system=SYSTEM_PROMPT,

messages=[{“role”: “user”, “content”: user_message}]

)

latency_ms = int((time.time() – start_time) * 1000)

# Extract the response textual content

response_text = response.content material[0].textual content

# Extract token utilization from the response

input_tokens = response.utilization.input_tokens

output_tokens = response.utilization.output_tokens

total_tokens = input_tokens + output_tokens

# Calculate value for this name

cost_usd = (

input_tokens * COST_PER_INPUT_TOKEN +

output_tokens * COST_PER_OUTPUT_TOKEN

)

# Replace the era span with outcomes

# This information populates the Langfuse value and token dashboards

era.finish(

output=response_text,

utilization={

“enter”: input_tokens,

“output”: output_tokens,

“complete”: total_tokens,

“unit”: “TOKENS”

metadata={

“latency_ms”: latency_ms,

“cost_usd”: spherical(cost_usd, 6),

“mannequin”: MODEL

}

)

# Replace the hint with the ultimate output

hint.replace(

output={“response”: response_text},

metadata={“total_cost_usd”: spherical(cost_usd, 6)}

)

# Print a abstract to stdout for native visibility

print(f“n{‘─’ * 60}”)

print(f“Consumer: {user_message}”)

print(f“Claude: {response_text}”)

print(f“Tokens: {input_tokens} in / {output_tokens} out / {total_tokens} complete”)

print(f“Price: ${cost_usd:.6f}”)

print(f“Latency: {latency_ms}ms”)

print(f“Hint: {langfuse_client.base_url}/hint/{hint.id}”)

print(f“{‘─’ * 60}n”)

return response_text

besides Exception as e:

# File the error within the hint so it exhibits up in Langfuse

era.finish(

output=None,

metadata={“error”: str(e), “latency_ms”: int((time.time() – start_time) * 1000)}

)

hint.replace(output={“error”: str(e)})

# At all times flush earlier than elevating — ensures the error hint is distributed

langfuse_client.flush()

elevate

lastly:

# Flush sends all buffered occasions to Langfuse

# In a long-running service, Langfuse flushes mechanically.

# In a script, it’s essential to flush manually earlier than the method exits.

langfuse_client.flush()

# ── Run an indication ────────────────────────────────────────────────────────

if __name__ == “__main__”:

# Simulate two turns of a buyer help dialog

test_messages = [

“What is your return policy for electronics?”,

“Can I return an item I bought 45 days ago?”

]

session = “demo-session-001”

for i, message in enumerate(test_messages):

print(f“nCall {i + 1}/{len(test_messages)}”)

attempt:

call_llm_with_tracing(

user_message=message,

session_id=session,

user_id=“test-user-42”

)

besides Exception as e:

print(f“Error on name {i + 1}: {e}”)

The Roadmap for Mastering LLMOps in 2026

Admin

Vampire Survivors Studio Suggests It Could Scrap Fortnite Crossover After Epic Video games Confirms Generative AI Utilization

Leave a Reply Cancel reply

Recommended.

Google Search Rating Volatility Round January 15-16

the DOD stated it designated Anthropic a provide chain threat over issues the AI firm might disable its tech if the Pentagon crossed its “pink traces” (Paresh Dave/Wired)

Trending.

Backrooms director Kane Parsons explains the birds, the portals, and his sensible results

100 Most Costly Key phrases for Google Advertisements in 2026

Random Forest Algorithm in Machine Studying With Instance

The Full Information to EcoGPT

Resident Evil followers have adopted a Love & Deepspace character because the son of Leon S. Kennedy and one in every of his potential spouses

AimactGrow

Categories

Recent News

Preorder Beast of Reincarnation At A Low cost And Get Some DLC For Free

The Greatest Children’ Backpacks for the 2026 Faculty Yr