This post includes code from Scalable-Data-Analysis-in-Python-with-Dask and coiled-examples.

import numpy as np
import dask.array as da
import pandas as pd
import sqlalchemy as db
from sqlalchemy import create_engine
import sqlite3
import pandas as pd

engine = db.create_engine('sqlite:///fiscal_data.db')
connection = engine.connect()
metadata = db.MetaData()

engine.execute("SELECT * FROM fiscal_data LIMIT 1").fetchall()

[(1996, 'East China', 'Anhui', '2093.3', 50661, 631930, 147002)]

sql = """
SELECT year
, region
, province
, gdp
, fdi
, it
, specific
FROM fiscal_data
"""

cnxn = connection

df = pd.read_sql(sql, cnxn)

df

df.columns

Index(['year', 'region', 'province', 'gdp', 'fdi', 'it', 'specific'], dtype='object')

df.gdp.hist()

<matplotlib.axes._subplots.AxesSubplot at 0x7f41255eacc0>

from dask.distributed import Client

client = Client(processes=False, threads_per_worker=2,
                n_workers=3, memory_limit='4GB')
client

from dask import dataframe as dd

ddf = dd.from_pandas(df, npartitions=5)

print(ddf)

ddf.npartitions

ddf.npartitions

len(ddf)

from dask.distributed import Client

client = Client(processes=False, threads_per_worker=2,
                n_workers=3, memory_limit='4GB')
client

ddf.describe().compute()

ddf.columns

feat_list = ["year", "fdi"]
cat_feat_list = ["region", "province"]
target = ["gdp"]

ddf["year"] = ddf["year"].astype(float)
ddf["fdi"] = ddf["fdi"].astype(float)
#ddf["province"] = ddf["province"].astype(float)
#ddf["region"] = ddf["region"].astype(float)
ddf["gdp"] = ddf["gdp"].astype(float)
ddf["it"] = ddf["it"].astype(float)

type(target)

x=ddf[feat_list].persist()
y=ddf[target].persist()

x

y.compute()

print(x.shape,y.shape)

x.count().compute()

from dask_ml.xgboost import XGBRegressor

XGBR = XGBRegressor()

%%time
XGBR_model = XGBR.fit(x,y)

XGBR_model

client.close()

	year	region	province	gdp	fdi	it	specific
0	1996	East China	Anhui	2093.3	50661	631930	147002.0
1	1997	East China	Anhui	2347.32	43443	657860	151981.0
2	1998	East China	Anhui	2542.96	27673	889463	174930.0
3	1999	East China	Anhui	2712.34	26131	1227364	285324.0
4	2000	East China	Anhui	2902.09	31847	1499110	195580.0
...	...	...	...	...	...	...	...
355	2003	East China	Zhejiang	9705.02	498055	2261631	391292.0
356	2004	East China	Zhejiang	11648.7	668128	3162299	656175.0
357	2005	East China	Zhejiang	13417.68	772000	2370200	656175.0
358	2006	East China	Zhejiang	15718.47	888935	2553268	1017303.0
359	2007	East China	Zhejiang	18753.73	1036576	2939778	844647.0