OpenAI GPT-4o and o1: Deep Dive into Modern Language Models

The OpenAI Model Landscape

┌────────────────────────────────────────────────────────────┐
│             OpenAI Model Lineup (2024-2025)                │
├─────────────────┬──────────┬────────────┬─────────────────┤
│  Model          │ Context  │ Modalities │ Specialty       │
├─────────────────┼──────────┼────────────┼─────────────────┤
│  gpt-4o         │  128K    │ Text+Vision│ Speed + quality │
│  gpt-4o-mini    │  128K    │ Text+Vision│ Cost efficient  │
│  o1             │  200K    │ Text       │ Deep reasoning  │
│  o1-mini        │  128K    │ Text       │ STEM reasoning  │
│  o3-mini        │  200K    │ Text       │ Frontier STEM   │
│  gpt-image-1    │   -      │ Image gen  │ Image creation  │
└─────────────────┴──────────┴────────────┴─────────────────┘

GPT-4o: Multimodal at Speed

python
1from openai import OpenAI
2import base64
3from pathlib import Path
4
5client = OpenAI()
6
7# Vision: Analyze an image
8def analyze_image(image_path: str, question: str) -> str:
9    image_data = base64.standard_b64encode(Path(image_path).read_bytes()).decode()
10    
11    response = client.chat.completions.create(
12        model="gpt-4o",
13        messages=[{
14            "role": "user",
15            "content": [
16                {
17                    "type": "image_url",
18                    "image_url": {
19                        "url": f"data:image/jpeg;base64,{image_data}",
20                        "detail": "high"  # high | low | auto
21                    }
22                },
23                {"type": "text", "text": question}
24            ]
25        }],
26        max_tokens=1024
27    )
28    return response.choices[0].message.content
29
30result = analyze_image("architecture-diagram.png",
31    "Identify all services and describe the data flow between them")

Structured Outputs — Guaranteed JSON Schema

Unlike JSON mode (which can still produce invalid JSON), Structured Outputs guarantees schema conformance:

python
1from pydantic import BaseModel
2from typing import Literal
3
4class ExtractedEntity(BaseModel):
5    name: str
6    type: Literal["PERSON", "ORGANIZATION", "LOCATION", "PRODUCT"]
7    confidence: float
8
9class ExtractionResult(BaseModel):
10    entities: list[ExtractedEntity]
11    summary: str
12    sentiment: Literal["POSITIVE", "NEGATIVE", "NEUTRAL"]
13
14response = client.beta.chat.completions.parse(
15    model="gpt-4o-2024-08-06",
16    messages=[
17        {"role": "system", "content": "Extract entities from news articles."},
18        {"role": "user", "content": "Apple CEO Tim Cook announced a new partnership with Microsoft in Seattle."}
19    ],
20    response_format=ExtractionResult  # Pass the Pydantic model directly
21)
22
23result: ExtractionResult = response.choices[0].message.parsed
24for entity in result.entities:
25    print(f"{entity.name} ({entity.type}) — confidence: {entity.confidence:.2f}")

o1 Models: Chain-of-Thought Reasoning

o1 models spend time "thinking" before responding. They excel at mathematics, coding challenges, and multi-step reasoning:

python
1# o1 does NOT support system messages or streaming
2# It uses reasoning_effort instead of temperature
3
4response = client.chat.completions.create(
5    model="o1",
6    messages=[{
7        "role": "user",
8        "content": """Design a database schema for a multi-tenant SaaS application.
9        Requirements:
10        - Row-level security per tenant
11        - Audit trails for all mutations
12        - Soft deletes
13        - Efficient cross-tenant analytics for admins
14        - Must handle 10M+ rows per tenant
15        Explain trade-offs and provide CREATE TABLE statements."""
16    }],
17    reasoning_effort="high"  # low | medium | high
18)
19
20print(response.choices[0].message.content)
21# o1 will produce a deeply reasoned, comprehensive schema design

Function Calling (Tool Use)

python
1import json
2
3tools = [{
4    "type": "function",
5    "function": {
6        "name": "execute_sql",
7        "description": "Execute a SQL query against the database and return results",
8        "parameters": {
9            "type": "object",
10            "properties": {
11                "query": {"type": "string", "description": "SQL query to execute"},
12                "database": {"type": "string", "enum": ["analytics", "production"]}
13            },
14            "required": ["query", "database"],
15            "additionalProperties": False
16        },
17        "strict": True  # Structured Outputs for tool calls too
18    }
19}]
20
21def run_sql_agent(user_question: str, db_schema: str) -> str:
22    messages = [
23        {"role": "system", "content": f"You are a SQL assistant. Schema:\n{db_schema}"},
24        {"role": "user", "content": user_question}
25    ]
26    
27    while True:
28        response = client.chat.completions.create(
29            model="gpt-4o",
30            messages=messages,
31            tools=tools,
32            tool_choice="auto"
33        )
34        
35        msg = response.choices[0].message
36        messages.append(msg)
37        
38        if response.choices[0].finish_reason == "stop":
39            return msg.content
40        
41        if response.choices[0].finish_reason == "tool_calls":
42            for tool_call in msg.tool_calls:
43                args = json.loads(tool_call.function.arguments)
44                result = execute_query(args["query"], args["database"])
45                messages.append({
46                    "role": "tool",
47                    "tool_call_id": tool_call.id,
48                    "content": json.dumps(result)
49                })

Embeddings with text-embedding-3

python
1import numpy as np
2
3def embed(texts: list[str], model="text-embedding-3-small") -> np.ndarray:
4    response = client.embeddings.create(input=texts, model=model)
5    return np.array([item.embedding for item in response.data])
6
7# Dimension reduction (Matryoshka)
8def embed_compact(texts: list[str], dimensions: int = 256) -> np.ndarray:
9    response = client.embeddings.create(
10        input=texts,
11        model="text-embedding-3-large",
12        dimensions=dimensions  # 256 instead of 3072 — 12x smaller
13    )
14    return np.array([item.embedding for item in response.data])

Image Generation with gpt-image-1

python
1import base64
2from PIL import Image
3import io
4
5response = client.images.generate(
6    model="gpt-image-1",
7    prompt="""A photorealistic diagram of a microservices architecture.
8    Services connected by arrows on a dark background.
9    Tech aesthetic, blueprint style.""",
10    size="1536x1024",
11    quality="high",
12    n=1
13)
14
15# gpt-image-1 returns base64
16image_bytes = base64.b64decode(response.data[0].b64_json)
17image = Image.open(io.BytesIO(image_bytes))
18image.save("architecture.png")

Rate Limiting and Cost Management

python
1from openai import RateLimitError, APIStatusError
2import asyncio
3from tenacity import retry, stop_after_attempt, wait_exponential
4
5@retry(
6    stop=stop_after_attempt(5),
7    wait=wait_exponential(multiplier=1, min=4, max=60),
8    reraise=True
9)
10async def call_with_retry(model: str, messages: list, **kwargs):
11    try:
12        return await async_client.chat.completions.create(
13            model=model, messages=messages, **kwargs
14        )
15    except RateLimitError as e:
16        print(f"Rate limited: {e}. Retrying...")
17        raise
18
19# Token cost tracking
20def estimate_cost(model: str, input_tokens: int, output_tokens: int) -> float:
21    prices = {
22        "gpt-4o": (2.50, 10.00),       # per million tokens: input, output
23        "gpt-4o-mini": (0.15, 0.60),
24        "o1": (15.00, 60.00),
25        "text-embedding-3-small": (0.02, 0),
26    }
27    if model not in prices:
28        return 0.0
29    input_price, output_price = prices[model]
30    return (input_tokens * input_price + output_tokens * output_price) / 1_000_000

OpenAI GPT-4o and o1: Deep Dive into Modern Language Models

OpenAI GPT-4o and o1: Deep Dive into Modern Language Models

The OpenAI Model Landscape

GPT-4o: Multimodal at Speed

Structured Outputs — Guaranteed JSON Schema

o1 Models: Chain-of-Thought Reasoning

Function Calling (Tool Use)

Embeddings with text-embedding-3

Image Generation with gpt-image-1

Rate Limiting and Cost Management

Sumit Kumar Pandey

Share this article

Discussion (0)