OpenAI GPT-4o and o1: Deep Dive into Modern Language Models
The OpenAI Model Landscape
┌────────────────────────────────────────────────────────────┐
│ OpenAI Model Lineup (2024-2025) │
├─────────────────┬──────────┬────────────┬─────────────────┤
│ Model │ Context │ Modalities │ Specialty │
├─────────────────┼──────────┼────────────┼─────────────────┤
│ gpt-4o │ 128K │ Text+Vision│ Speed + quality │
│ gpt-4o-mini │ 128K │ Text+Vision│ Cost efficient │
│ o1 │ 200K │ Text │ Deep reasoning │
│ o1-mini │ 128K │ Text │ STEM reasoning │
│ o3-mini │ 200K │ Text │ Frontier STEM │
│ gpt-image-1 │ - │ Image gen │ Image creation │
└─────────────────┴──────────┴────────────┴─────────────────┘
GPT-4o: Multimodal at Speed
python1from openai import OpenAI 2import base64 3from pathlib import Path 4 5client = OpenAI() 6 7# Vision: Analyze an image 8def analyze_image(image_path: str, question: str) -> str: 9 image_data = base64.standard_b64encode(Path(image_path).read_bytes()).decode() 10 11 response = client.chat.completions.create( 12 model="gpt-4o", 13 messages=[{ 14 "role": "user", 15 "content": [ 16 { 17 "type": "image_url", 18 "image_url": { 19 "url": f"data:image/jpeg;base64,{image_data}", 20 "detail": "high" # high | low | auto 21 } 22 }, 23 {"type": "text", "text": question} 24 ] 25 }], 26 max_tokens=1024 27 ) 28 return response.choices[0].message.content 29 30result = analyze_image("architecture-diagram.png", 31 "Identify all services and describe the data flow between them")
Structured Outputs — Guaranteed JSON Schema
Unlike JSON mode (which can still produce invalid JSON), Structured Outputs guarantees schema conformance:
python1from pydantic import BaseModel 2from typing import Literal 3 4class ExtractedEntity(BaseModel): 5 name: str 6 type: Literal["PERSON", "ORGANIZATION", "LOCATION", "PRODUCT"] 7 confidence: float 8 9class ExtractionResult(BaseModel): 10 entities: list[ExtractedEntity] 11 summary: str 12 sentiment: Literal["POSITIVE", "NEGATIVE", "NEUTRAL"] 13 14response = client.beta.chat.completions.parse( 15 model="gpt-4o-2024-08-06", 16 messages=[ 17 {"role": "system", "content": "Extract entities from news articles."}, 18 {"role": "user", "content": "Apple CEO Tim Cook announced a new partnership with Microsoft in Seattle."} 19 ], 20 response_format=ExtractionResult # Pass the Pydantic model directly 21) 22 23result: ExtractionResult = response.choices[0].message.parsed 24for entity in result.entities: 25 print(f"{entity.name} ({entity.type}) — confidence: {entity.confidence:.2f}")
o1 Models: Chain-of-Thought Reasoning
o1 models spend time "thinking" before responding. They excel at mathematics, coding challenges, and multi-step reasoning:
python1# o1 does NOT support system messages or streaming 2# It uses reasoning_effort instead of temperature 3 4response = client.chat.completions.create( 5 model="o1", 6 messages=[{ 7 "role": "user", 8 "content": """Design a database schema for a multi-tenant SaaS application. 9 Requirements: 10 - Row-level security per tenant 11 - Audit trails for all mutations 12 - Soft deletes 13 - Efficient cross-tenant analytics for admins 14 - Must handle 10M+ rows per tenant 15 Explain trade-offs and provide CREATE TABLE statements.""" 16 }], 17 reasoning_effort="high" # low | medium | high 18) 19 20print(response.choices[0].message.content) 21# o1 will produce a deeply reasoned, comprehensive schema design
Function Calling (Tool Use)
python1import json 2 3tools = [{ 4 "type": "function", 5 "function": { 6 "name": "execute_sql", 7 "description": "Execute a SQL query against the database and return results", 8 "parameters": { 9 "type": "object", 10 "properties": { 11 "query": {"type": "string", "description": "SQL query to execute"}, 12 "database": {"type": "string", "enum": ["analytics", "production"]} 13 }, 14 "required": ["query", "database"], 15 "additionalProperties": False 16 }, 17 "strict": True # Structured Outputs for tool calls too 18 } 19}] 20 21def run_sql_agent(user_question: str, db_schema: str) -> str: 22 messages = [ 23 {"role": "system", "content": f"You are a SQL assistant. Schema:\n{db_schema}"}, 24 {"role": "user", "content": user_question} 25 ] 26 27 while True: 28 response = client.chat.completions.create( 29 model="gpt-4o", 30 messages=messages, 31 tools=tools, 32 tool_choice="auto" 33 ) 34 35 msg = response.choices[0].message 36 messages.append(msg) 37 38 if response.choices[0].finish_reason == "stop": 39 return msg.content 40 41 if response.choices[0].finish_reason == "tool_calls": 42 for tool_call in msg.tool_calls: 43 args = json.loads(tool_call.function.arguments) 44 result = execute_query(args["query"], args["database"]) 45 messages.append({ 46 "role": "tool", 47 "tool_call_id": tool_call.id, 48 "content": json.dumps(result) 49 })
Embeddings with text-embedding-3
python1import numpy as np 2 3def embed(texts: list[str], model="text-embedding-3-small") -> np.ndarray: 4 response = client.embeddings.create(input=texts, model=model) 5 return np.array([item.embedding for item in response.data]) 6 7# Dimension reduction (Matryoshka) 8def embed_compact(texts: list[str], dimensions: int = 256) -> np.ndarray: 9 response = client.embeddings.create( 10 input=texts, 11 model="text-embedding-3-large", 12 dimensions=dimensions # 256 instead of 3072 — 12x smaller 13 ) 14 return np.array([item.embedding for item in response.data])
Image Generation with gpt-image-1
python1import base64 2from PIL import Image 3import io 4 5response = client.images.generate( 6 model="gpt-image-1", 7 prompt="""A photorealistic diagram of a microservices architecture. 8 Services connected by arrows on a dark background. 9 Tech aesthetic, blueprint style.""", 10 size="1536x1024", 11 quality="high", 12 n=1 13) 14 15# gpt-image-1 returns base64 16image_bytes = base64.b64decode(response.data[0].b64_json) 17image = Image.open(io.BytesIO(image_bytes)) 18image.save("architecture.png")
Rate Limiting and Cost Management
python1from openai import RateLimitError, APIStatusError 2import asyncio 3from tenacity import retry, stop_after_attempt, wait_exponential 4 5@retry( 6 stop=stop_after_attempt(5), 7 wait=wait_exponential(multiplier=1, min=4, max=60), 8 reraise=True 9) 10async def call_with_retry(model: str, messages: list, **kwargs): 11 try: 12 return await async_client.chat.completions.create( 13 model=model, messages=messages, **kwargs 14 ) 15 except RateLimitError as e: 16 print(f"Rate limited: {e}. Retrying...") 17 raise 18 19# Token cost tracking 20def estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float: 21 prices = { 22 "gpt-4o": (2.50, 10.00), # per million tokens: input, output 23 "gpt-4o-mini": (0.15, 0.60), 24 "o1": (15.00, 60.00), 25 "text-embedding-3-small": (0.02, 0), 26 } 27 if model not in prices: 28 return 0.0 29 input_price, output_price = prices[model] 30 return (input_tokens * input_price + output_tokens * output_price) / 1_000_000