Object Storage
This page covers how to read and write from/to AWS S3, Google Cloud Storage and Azure Blob Storage.
pip install smart_open[azure] # Install Azure deps
pip install smart_open[gcs] # Install GCS deps
pip install smart_open[s3] # Install S3 deps
pip install -U gretel-client
import pandas as pd
from smart_open import open
from gretel_client import configure_session
from gretel_client.projects import create_or_get_unique_project
# authenticate with Gretel Cloud
configure_session(api_key="prompt", cache="yes", validate=True)
# create a project
project = create_or_get_unique_project(name="synthetic-data")
# swap out s3:// with gs:// or azure://
with open('s3://example-bucket/test_in.csv', "rb") as f:
df = pd.read_csv('s3://example-bucket/test_in.csv')
# submitting dataframe to Gretel
model = proj.create_model_obj(model_config="synthetics/default", data_source=df)
model.submit_cloud()
# view the synthetic data
synthetic_df = pd.read_csv(model.get_artifact_link("data_preview"), compression="gzip")
synthetic_df
import pandas as pd
from gretel_client import configure_session
from gretel_client.projects import create_or_get_unique_project
# authenticate with Gretel Cloud
configure_session(api_key="prompt", cache="yes", validate=True)
# set model
proj = create_or_get_unique_project(name="synthetic-data")
model = proj.get_model('model_id')
# read synthetic data into dataframe
synthetic_df = pandas.read_csv(model.get_artifact_link("data_preview"), compression="gzip")
# swap out s3:// with gs:// or azure://
synthetic_df.to_csv(f's3://{bucket}/{key}')
Last modified 3mo ago