>>> odo([1, 2, 3], tuple)
(1, 2, 3)
list
» tuple
>>> odo('hive://hostname/default::users_csv',
... 'hive://hostname/default::users_parquet',
... stored_as='PARQUET', external=False)
<an eternity later ...
sqlalchemy.Table repr>
df.to_csv('/path/to/file.csv')
load data
local infile '/path/to/file.csv'
into table mytable;
>>> odo(df,
... 'hive://hostname/default::tablename')
boto.get_bucket().get_contents_to_filename()
pandas.read_json()
DataFrame.to_csv()
copy t from '/path/to/file.csv'
with
delimiter ','
header TRUE
>>> odo('s3://mybucket/path/to/data.json',
... 'postgresql://user:passwd@localhost:port/db::data')
Each node is a type (DataFrame
, list
, sqlalchemy.Table
, etc...)
Each edge is a conversion function
from odo import convert
from pyspark.sql import DataFrame as SparkDataFrame
@convert(pd.DataFrame, SparkDataFrame)
def frame_to_frame(spark_frame, **kwargs):
return spark_frame.toPandas()
conda install odo
pip install odo