Structured Outputs
Transform unstructured model responses into strictly typed, schema-validated data. Structured Outputs ensures every response matches your predefined schema specifications, making integrations reliable and predictable.
Core Benefits
Schema Enforcement: Responses automatically conform to your JSON schema definitions.
Developer Experience: Avoid writing long prompts with strict guidelines on model outputs and use simple
Pydantic
objects to define your outputs.
Structured Data Generation with Data Designer
When using the Gretel SDK, you can specify structured data outputs by using the data_config
parameter on the DataDesigner object. This parameter can take either a JSON schema or a Pydantic BaseModel
.
from pydantic import BaseModel
class Bar(BaseModel):
answer: int
class Foo(BaseModel):
baz: string
bar: Bar
## Specify using the Pydantic Type
data_designer.add_generated_data_column(
name="column_name",
generation_prompt="Dear AI, generate some data.",
data_config={"type": "structured", "params": {"model": Foo}}
)
## Specify using an arbitrary JSON schema
data_designer.add_generated_data_column(
name="column_name",
generation_prompt="Dear AI, generate some data.",
data_config={"type": "structured", "params": {"json_schema": Foo.model_json_schema()}}
)
In the case of pydantic
types, you can also make use of Field
to define extra instruction information that will be passed along to the LLM behind the scenes. This can help you get optimal performance out of generations:
from pydantic import BaseModel, Field
class Fruit(BaseModel):
name: str = Field(..., description="Name of the fruit.")
cost: float = Field(..., description="Dollar value of the fruit.")
weight: float = Field(..., description="Weight in lbs.")
flavor: str = Field(..., description="Primary flavor profile of the fruit.")
class FruitList(BaseModel):
fruits: list[Fruit]
total_cost: float = Field(..., description="Total fruit cost")
data_designer.add_generated_data_column(
name="thai_fruits",
generation_prompt="Generate a list of fruits native to Thailand.",
data_cfg={"type": "structured", "params": {"model": FruitList}}
)
Code Generation
Code generation is handled in much the same way -- one just needs to specify the "code" type and then also provide the "syntax" for the language desired.
## Generate python code
data_designer.add_generated_data_column(
name="column_name",
generation_prompt="Dear AI, generate some data.",
llm_type="code",
data_cfg={"type": "code", "params": {"syntax": "python"}}
)
## Generate Rust code
data_designer.add_generated_data_column(
name="column_name",
generation_prompt="Dear AI, generate some data.",
llm_type="code",
data_cfg={"type": "code", "params": {"syntax": "rust"}}
)
Here's a quick demo creating a fruit salad recipe!
## Create our DD Instance
data_designer = DataDesigner(
model_suite=model_suite,
special_system_instructions="",
session=session
)
## We want to generate regional fruit salads!
data_designer.add_categorical_seed_column(
name="region",
description="Regions of the world with an exciting culinary tradition.",
values=["Thailand", "France", "South Africa"],
num_new_values_to_generate=5
)
## Now, we're making a recipe, which is pretty structured.
## So let's give data designer a recipe to follow!
from rich.pretty import Pretty
from pydantic import BaseModel, Field
class Fruit(BaseModel):
name: str = Field(..., description="Name of the fruit.")
cost: float = Field(..., description="Dollar value of the fruit.")
weight: float = Field(..., description="Weight in lbs.")
flavor: str = Field(..., description="Primary flavor profile of the fruit.")
preparation: str = Field(..., description="How to prepare the fruit for a fruit salad.")
class FruitSalad(BaseModel):
total_cost: float = Field(..., description="Total cost of all fruits.")
name: str = Field(..., description="Name of this unique fruit salad.")
fruits: list[Fruit]
def __rich__(self):
return Pretty(self)
## Tell DD to generate some fruit salads
data_designer.add_generated_data_column(
name="fruit_salad",
generation_prompt=(
"Create a description of fruits to go in a regional fruit salad from {region}!"
),
data_cfg={"type": "structured", "params": {"model": FruitSalad}}
)
## But we also want to be able to make a beautiful
## visual display to go with our fruit salad!
data_designer.add_generated_data_column(
name="fruit_salad_html",
generation_prompt=(
"<data>\n{fruit_salad}\n</data>\n\n"
"Given the provided <data>, write a self-contained HTML page "
"which provides all of the provided information in a beautiful and "
"easy-to-read format. Embed your own thorough CSS into the document.\n"
"The page should be in Dark Mode.\n"
"The content about the <data> should be in a card centered on the page.\n"
"The page is intended to be displayed within a Jupyter notebook using IPython.display.HTML."
),
llm_type="code",
data_cfg={"type": "code", "params": {"syntax": "html"}}
)
## Now let's get cooking!!
preview = data_designer.generate_dataset_preview()
Categorical Seed Columns
┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Name ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ region │ Japan │
└────────────────────────────────────────────────────────────┴────────────────────────────────────────────────────┘
Generated Data Columns
┏━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┓
┃ Name ┃ Value ┃
┡━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┩
│ fruit_salad │ { │
│ │ 'total_cost': 12.5, │
│ │ 'name': 'Japanese Summer Fruit Salad', │
│ │ 'fruits': [ │
│ │ { │
│ │ 'name': 'Mikan', │
│ │ 'cost': 1.5, │
│ │ 'weight': 0.5, │
│ │ 'flavor': 'Sweet and tangy', │
│ │ 'preparation': 'Quartered and pith removed' │
│ │ }, │
│ │ { │
│ │ 'name': 'Hozuki', │
│ │ 'cost': 2.0, │
│ │ 'weight': 0.4, │
│ │ 'flavor': 'Slightly bitter', │
│ │ 'preparation': 'Sliced thinly' │
│ │ }, │
│ │ { │
│ │ 'name': 'Daimyo Kiwi', │
│ │ 'cost': 3.0, │
│ │ 'weight': 0.3, │
│ │ 'flavor': 'Juicy and tart', │
│ │ 'preparation': 'Sliced into rounds' │
│ │ }, │
│ │ { │
│ │ 'name': 'Yuzu', │
│ │ 'cost': 4.0, │
│ │ 'weight': 0.2, │
│ │ 'flavor': 'Citrusy and sour', │
│ │ 'preparation': 'Peel removed, segmented' │
│ │ }, │
│ │ { │
│ │ 'name': 'Elderberries', │
│ │ 'cost': 2.5, │
│ │ 'weight': 0.2, │
│ │ 'flavor': 'Sweet and tangy', │
│ │ 'preparation': 'Chopped' │
│ │ } │
│ │ ] │
│ │ } │
├──────────────────────┼──────────────────────────────────────────────────────────────────────────────────────────┤
│ fruit_salad_html │ <!DOCTYPE html> │
│ │ <html lang="en"> │
│ │ <head> │
│ │ <meta charset="UTF-8"> │
│ │ <meta name="viewport" content="width=device-width, initial-scale=1.0"> │
│ │ <title>Fruit Salad Details</title> │
│ │ <style> │
│ │ body { │
│ │ background-color: #121212; │
│ │ color: #ffffff; │
│ │ font-family: Arial, sans-serif; │
│ │ margin: 0; │
│ │ padding: 0; │
│ │ display: flex; │
│ │ justify-content: center; │
│ │ align-items: center; │
│ │ height: 100vh; │
│ │ } │
│ │ .card { │
│ │ background-color: #222222; │
│ │ padding: 20px; │
│ │ border-radius: 8px; │
│ │ box-shadow: 0 4px 8px rgba(0, 0, 0, 0.1); │
│ │ width: 300px; │
│ │ text-align: center; │
│ │ } │
│ │ .card h1 { │
│ │ margin-top: 0; │
│ │ } │
│ │ .card p { │
│ │ margin: 10px 0; │
│ │ } │
│ │ .card ul { │
│ │ list-style-type: none; │
│ │ padding: 0; │
│ │ } │
│ │ .card ul li { │
│ │ margin: 5px 0; │
│ │ } │
│ │ </style> │
│ │ </head> │
│ │ <body> │
│ │ <div class="card"> │
│ │ <h1>Japanese Summer Fruit Salad</h1> │
│ │ <p>Total Cost: $12.50</p> │
│ │ <ul> │
│ │ <li><strong>Mikan:</strong> Quartered and pith removed</li> │
│ │ <li><strong>Hozuki:</strong> Sliced thinly</li> │
│ │ <li><strong>Daimyo Kiwi:</strong> Sliced into rounds</li> │
│ │ <li><strong>Yuzu:</strong> Peel removed, segmented</li> │
│ │ <li><strong>Elderberries:</strong> Chopped</li> │
│ │ </ul> │
│ │ </div> │
│ │ </body> │
│ │ </html> │
└──────────────────────┴──────────────────────────────────────────────────────────────────────────────────────────┘
[index: 0]
Last updated
Was this helpful?