defsubmit_generate(model,prompt:str,params:dict,ref_data=None) -> pd.DataFrame:""" Generate or augment data from the Navigator model. Args: model: The model object that will process the prompt. prompt (str): The text prompt to generate data from. params (dict): Parameters for data generation. ref_data: Optional existing dataset to edit or augment. Returns: pd.DataFrame: The generated data. """ data_processor = model.create_record_handler_obj( data_source=pd.DataFrame({"prompt": [prompt]}), params=params, ref_data=ref_data ) data_processor.submit_cloud()poll(data_processor, verbose=False)return pd.read_json(data_processor.get_artifact_link("data"), lines=True, compression="gzip")
Example:
# Generate mock datasetprompt ="""\Generate a mock dataset for users from the Foo company based in France.Each user should have the following columns:* first_name: traditional French first names.* last_name: traditional French surnames.* email: formatted as the first letter of their first name followed by their last name @foo.io (e.g., jdupont@foo.io).* gender: Male/Female/Non-binary.* city: a city in France.* country: always 'France'."""params ={"num_records":10,"temperature":0.8,"top_p":1,"top_k":50}df =submit_generate(model=model, prompt=prompt, params=params)