def submit_generate(model, prompt: str, params: dict, ref_data=None) -> pd.DataFrame:
"""
Generate or augment data from the Navigator model.
Args:
model: The model object that will process the prompt.
prompt (str): The text prompt to generate data from.
params (dict): Parameters for data generation.
ref_data: Optional existing dataset to edit or augment.
Returns:
pd.DataFrame: The generated data.
"""
data_processor = model.create_record_handler_obj(
data_source=pd.DataFrame({"prompt": [prompt]}),
params=params,
ref_data=ref_data
)
data_processor.submit_cloud()
poll(data_processor, verbose=False)
return pd.read_json(data_processor.get_artifact_link("data"), lines=True, compression="gzip")
Example:
# Generate mock dataset
prompt = """\
Generate a mock dataset for users from the Foo company based in France.
Each user should have the following columns:
* first_name: traditional French first names.
* last_name: traditional French surnames.
* email: formatted as the first letter of their first name followed by their last name @foo.io (e.g., jdupont@foo.io).
* gender: Male/Female/Non-binary.
* city: a city in France.
* country: always 'France'.
"""
params = {
"num_records": 10,
"temperature": 0.8,
"top_p": 1,
"top_k": 50
}
df = submit_generate(model=model, prompt=prompt, params=params)