diff --git a/dask_ml/model_selection/_search.py b/dask_ml/model_selection/_search.py index 7f1187309..479a0dacc 100644 --- a/dask_ml/model_selection/_search.py +++ b/dask_ml/model_selection/_search.py @@ -478,8 +478,8 @@ def do_fit( out_append = out.append for X, y, t, p in zip(Xs, ys, tokens, params): - if (X, y, t) in seen: - out_append(seen[X, y, t]) + if (id(X), id(y), t) in seen: + out_append(seen[id(X), id(y), t]) else: for n, fit_params in n_and_fit_params: dsk[(fit_name, m, n)] = ( @@ -492,7 +492,7 @@ def do_fit( p, fit_params, ) - seen[(X, y, t)] = (fit_name, m) + seen[(id(X), id(y), t)] = (fit_name, m) out_append((fit_name, m)) m += 1 @@ -565,8 +565,8 @@ def do_fit_transform( out_append = out.append for X, y, t, p in zip(Xs, ys, tokens, params): - if (X, y, t) in seen: - out_append(seen[X, y, t]) + if (id(X), id(y), t) in seen: + out_append(seen[id(X), id(y), t]) else: for n, fit_params in n_and_fit_params: dsk[(fit_Xt_name, m, n)] = ( @@ -581,7 +581,7 @@ def do_fit_transform( ) dsk[(fit_name, m, n)] = (getitem, (fit_Xt_name, m, n), 0) dsk[(Xt_name, m, n)] = (getitem, (fit_Xt_name, m, n), 1) - seen[X, y, t] = m + seen[id(X), id(y), t] = m out_append(m) m += 1 @@ -665,6 +665,7 @@ def _do_fit_step( # Extract the proper subset of Xs, ys sub_Xs = get(ids, Xs) sub_ys = get(ids, ys) + # Only subset the parameters/tokens if necessary if sub_fields: sub_tokens = list(pluck(sub_inds, get(ids, tokens))) diff --git a/dask_ml/model_selection/utils.py b/dask_ml/model_selection/utils.py index c54bda333..09648286f 100644 --- a/dask_ml/model_selection/utils.py +++ b/dask_ml/model_selection/utils.py @@ -75,7 +75,10 @@ def to_keys(dsk, *args): for x in args: if x is None: yield None - elif isinstance(x, (da.Array, dd.DataFrame)): + elif isinstance(x, da.Array): + dsk.update(x.dask) + yield x + elif isinstance(x, dd.DataFrame): x = delayed(x) dsk.update(x.dask) yield x.key