Four Ways to Generate JSON Output When Using the OpenAI responses.create() Method

There are (at least) four ways to generate JSON formatted output when using the OpenAI responses.create() method: 1.) supply detailed instructions and examples, 2.) craft a JSON schema by hand, 3.) craft a JSON schema using inference from an example, 4.) craft a JSON schema using the Pydantic library. In practice, all four techniques are often used together.

I set up a demo. The underlying context is information about a professional football player, and the goal is to extract his name, signing bonus, and annual salaries:

context = "John Smith is a quarterback for the Los " + \
  "Angeles Rams. He signed in 2025 and received a " + \
  "bonus of $654,000. His negotiated contract states " + \
  "that he will receive $800,00 in 2025, then " + \
  "$900,00 in 2026. If he makes all-Pro, then he " + \
  "will get a special award bonus of $200,000."

query = "Give me the name, and signing bonus, and the " + \
  "salary for each year for John Smith."

The desired output is:

{
  "player_name": "John Smith",
  "signing_bonus": "$654,000",
  "salaries": [
    {"year": "2025", "salary": "$800,000"},
    {"year": "2026", "salary": "$900,000"},
  ]
}

A crude approach is to supply the responses.create() method with highly detailed formatting instructions:

my_instructions = "Express the output using JSON " + \
"key-value format. For the keys, for the player " + \
"name use 'player_name' and for and the signing " + \
"bonus use 'signing_bonus'. For example, 'player_name' " + \
": 'Thomas Jones', 'signing_bonus' : '$123,000'. " + \
"For the salary, use 'year: ' followed by the year " + \
"then 'salary: ' followed by salary. For example, " + \
"'year: 2026', 'salary: $2,500,000'. List each year " + \
" seperately. Here's an example: " + \
" { 'player_name': 'Thomas Jones', " + \
"   'signing_bonus': '$123,000', " + \
"   'salaries': [ " + \
"     'year': '2023', 'salary': '$1,000,000', " + \
"     'year': '2024', 'salary': '$2,500,000', " + \
"    ],  " + \
" }"

response1 = client.responses.create(
  model = "gpt-4o",
  tools = [],
  input = [
    { "role": "user", "content": context },
    { "role": "user", "content": query },
  ],
  instructions = my_instructions,
)
print(response1.output_text)

For very simple formatting, this approach usually works well. But for non-simple scenarios it can become nearly impossible to describe the desired output using natural language.

A second approach is to craft a JSON schema by hand:

by_hand_schema = {
  "type": "object",
  "properties": {
    "player_name": { "type": "string" },
    "signing_bonus": { "type": "string" },
 
    "salaries": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "year": { "type": "string" },
          "salary": { "type": "string" },
        }, # inner properties

        "required": ["year", "salary" ],
        "additionalProperties": False,
      }, # array items
    }, # salaries array

  }, # outer properties

  "required": [ "player_name", "signing_bonus", "salaries" ],
  "additionalProperties": False,
} # schema

response2 = client.responses.create(
  model = "gpt-4o",
  tools = [],
  input = [
    { "role": "user", "content": context },
    { "role": "user", "content": query },
  ],
  text = {
    "format": {
      "type": "json_schema",
      "name": "demo",
      "schema": by_hand_schema,
      "strict": True,
    },
  },
)
print(response2.output_text)

Unless you work with JSON schema every day, this craft-schema-by-hand approach is very difficult, and often just isn’t practical.

A third technique is to use an inference tool. The idea is to craft a specific example of the format you want, pass that example to a tool, and the tool will emit a JSON schema. There are many such tools floating around the Internet.

I fed the desired output (from above) to the tool at liquid-technologies.com/online-json-to-schema-converter. The schema I got was close, but required about 30 minutes of editing:

inferred_schema = {
  "type": "object",
  "properties": {
    "player_name": { "type": "string" },
    "signing_bonus": { "type": "string" },
    "salaries": {
      "type": "array",
      "minItems": 0,  # manually added
      "items": # manually removed a '['
        {
          "type": "object",
          "properties": {
            "year": { "type": "string" },
            "salary": { "type": "string" }
          },  # inner properties
          "required": [ "year", "salary" ],
          "additionalProperties": False,  # manually added
        }, # items { dict
      # manually removed a ']'
    }, # salaries
  }, # outer properties
  "required": [ "player_name", "signing_bonus", "salaries" ],
  "additionalProperties": False,  # manually added
}

response3 = client.responses.create(
  model = "gpt-4o",
  tools = [],
  input = [
    { "role": "user", "content": context },
    { "role": "user", "content": query },
  ],
  text = {
    "format": {
      "type": "json_schema",
      "name": "demo",
      "schema": inferred_schema,
      "strict": True,
    },
  },
)
print(response3.output_text)

This inference-from-example approach is often hit-or-miss, at least based on my experience. For non-demo scenarios, the inferred JSON schema is almost never completely correct, but it’s usually close and can be corrected manually (but with non-trivial effort).

A fourth approach is to use the Pydantic library. You set up a Python-like class using special syntax, and then use the model_json_schema() method to get the schema:

from pydantic import BaseModel, ConfigDict

class Salary(BaseModel):
  # configure to add "additionalProperties": False
  model_config = ConfigDict(extra="forbid")
  year: str
  salary: str

class MyOutputSchema(BaseModel):
  model_config = ConfigDict(extra="forbid")
  player_name: str
  signing_bonus: str
  salaries: list[Salary]

# schema_pydantic = MyOutputSchema.schema() # Pydantic v1 no
schema_pydantic = MyOutputSchema.model_json_schema() # v2 yes

response4 = client.responses.create(
  model = "gpt-4o",
  tools = [],
  input = [
    { "role": "user", "content": context },
    { "role": "user", "content": query },
  ],
  text = {
    "format": {
      "type": "json_schema",
      "name": "demo",
      "schema": schema_pydantic,
      "strict": True,
    },
  },
)
print(response4.output_text)

This technique is often effective, but 1.) it adds a significant dependency, 2.) you have to learn the special Pydantic syntax, 3.) the technique is difficult to debug in case of errors.

The bottom line is that there is no single best approach for getting JSON format output from the responses.create() method. It’s best to have all four techniques in your personal tool kit.

By the way, there is an OpenAI responses.parse() method that works directly with Pydanic class definitions. For scenarios where you only want to extract data (and no tricky logic or reasoning), the responses.parse() method is often a good choice.

There are many analogies between the current development of artificial intelligence systems and the development of space exploration.

Left: The SpaceX rocket has 33 engines. Most early rocket designs in the 1960s used only one, or a few, extremely large engines because the software technology needed to coordinate multiple engines wasn’t available.

Center: This interesting design is from the movie “First Spacehip on Venus” (1960). It is a German film that was dubbed and also released in the U.S. The movie isn’t bad — I give it a B- grade. I watched the movie for the first time when I was young, and there are some scenes that gave me nightmares (especially when a creepy sludge chases the explorers up a spiral Venusian building).

Right: This interesting rocket ship is from “Satellite in the Sky” (1956). It is a British movie. The first men into space meet various challenges including a “tritonium” bomb that attaches itself to the hull of the ship. Excellent special effects for the time. I give the move a B grade.

Demo program:

# generate_json_schema.py
# instructions vs. raw vs. infer vs. Pydantic

from openai import OpenAI

print("\nBegin generate JSON schema output demo ")

# 0. create global client
key = "sk-proj-_AX7bGTXUwg-qojh2T5Z2CVXrox" + \
  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + \
  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx" + \
  "xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"

client = OpenAI(api_key=key)

context = "John Smith is a quarterback for the Los " + \
  "Angeles Rams. He signed in 2025 and received a " + \
  "bonus of $654,000. His negotiated contract states " + \
  "that he will receive $800,00 in 2025, then " + \
  "$900,00 in 2026. If he makes all-Pro, then he " + \
  "will get a special award bonus of $200,000."

query = "Give me the name, and signing bonus, and the " + \
  "salary for each year for John Smith."

print("\nThe query is: ")
print(query)

# -----------------------------------------------------------

# 1. supply output formatting intructions, no schema

my_instructions = "Express the output using JSON " + \
"key-value format. For the keys, for the player " + \
"name use 'player_name' and for and the signing " + \
"bonus use 'signing_bonus'. For example, 'player_name' " + \
": 'Thomas Jones', 'signing_bonus' : '$123,000'. " + \
"For the salary, use 'year: ' followed by the year " + \
"then 'salary: ' followed by salary. For example, " + \
"'year: 2026', 'salary: $2,500,000'. List each year " + \
" seperately. Here's an example: " + \
" { 'player_name': 'Thomas Jones', " + \
"   'signing_bonus': '$123,000', " + \
"   'salaries': [ " + \
"     'year': '2023', 'salary': '$1,000,000', " + \
"     'year': '2024', 'salary': '$2,500,000', " + \
"    ],  " + \
" }"

print("\nThe output formatting instructions are: \n ")
print(my_instructions)

response1 = client.responses.create(
  model = "gpt-4o",
  tools = [],
  input = [
    { "role": "user", "content": context },
    { "role": "user", "content": query },
  ],
  instructions = my_instructions,
)

print("\nResponse with format instructions (no schema): ")
print(response1.output_text)

# -----------------------------------------------------------

# 2. craft schema directly by hand
by_hand_schema = {
  "type": "object",
  "properties": {
    "player_name": { "type": "string" },
    "signing_bonus": { "type": "string" },
 
    "salaries": {
      "type": "array",
      "items": {
        "type": "object",
        "properties": {
          "year": { "type": "string" },
          "salary": { "type": "string" },
        }, # inner properties

        "required": ["year", "salary" ],
        "additionalProperties": False,
      }, # array items
    }, # salaries array
  }, # outer properties
  "required": [ "player_name", "signing_bonus", "salaries" ],
  "additionalProperties": False,
} # schema

response2 = client.responses.create(
  model = "gpt-4o",
  tools = [],
  input = [
    { "role": "user", "content": context },
    { "role": "user", "content": query },
  ],
  text = {
    "format": {
      "type": "json_schema",
      "name": "demo",
      "schema": by_hand_schema,
      "strict": True,
    },
  },
)

print("\nResponse using by-hand JSON schema: ")
print(response2.output_text)

# -----------------------------------------------------------

# 3. use an inference tool
# ex: liquid-technologies.com/online-json-to-schema-converter
# supplied example:
# {
#   "player_name" : "John Smith",
#   "signing_bonus" : "876,000",
#   "salaries" : [
#     { "year" : "2001", "salary" : "1,000,000" },
#   ],
# }
 
# inferred schema:
inferred_schema = {
  "type": "object",
  "properties": {
    "player_name": { "type": "string" },
    "signing_bonus": { "type": "string" },
    "salaries": {
      "type": "array",
      "minItems": 0,  # manually added
      "items": # manually removed a '['
        {
          "type": "object",
          "properties": {
            "year": { "type": "string" },
            "salary": { "type": "string" }
          },  # inner properties
          "required": [ "year", "salary" ],
          "additionalProperties": False,  # manually added
        }, # items { dict
      # manually removed a ']'
    }, # salaries
  }, # outer properties
  "required": [ "player_name", "signing_bonus", "salaries" ],
  "additionalProperties": False,  # manually added
}

response3 = client.responses.create(
  model = "gpt-4o",
  tools = [],
  input = [
    { "role": "user", "content": context },
    { "role": "user", "content": query },
  ],
  text = {
    "format": {
      "type": "json_schema",
      "name": "demo",
      "schema": inferred_schema,
      "strict": True,
    },
  },
)

print("\nResponse using inferred JSON schema: ")
print(response3.output_text)

# -----------------------------------------------------------

# 4. use Pydantic library
from pydantic import BaseModel, ConfigDict

class Salary(BaseModel):
  # configure to add "additionalProperties": False
  model_config = ConfigDict(extra="forbid")
  year: str
  salary: str

class MyOutputSchema(BaseModel):
  model_config = ConfigDict(extra="forbid")
  player_name: str
  signing_bonus: str
  salaries: list[Salary]

# schema_pydantic = MyOutputSchema.schema() # Pydantic v1 no
schema_pydantic = MyOutputSchema.model_json_schema() # v2 yes

response4 = client.responses.create(
  model = "gpt-4o",
  tools = [],
  input = [
    { "role": "user", "content": context },
    { "role": "user", "content": query },
  ],
  text = {
    "format": {
      "type": "json_schema",
      "name": "demo",
      "schema": schema_pydantic,
      "strict": True,
    },
  },
)

print("\nResponse using Pydantic schema: ")
print(response4.output_text)

# -----------------------------------------------------------

print("\nEnd demo ")