@@ -51,7 +51,7 @@ class PCA(sklearn.decomposition.PCA):
5151 ignored
5252
5353 whiten : bool, optional (default False)
54- When True (False by default) the `components_` vectors are multiplied
54+ When True (False by default) the `` components_` ` vectors are multiplied
5555 by the square root of n_samples and then divided by the singular values
5656 to ensure uncorrelated outputs with unit component-wise variances.
5757
@@ -60,25 +60,10 @@ class PCA(sklearn.decomposition.PCA):
6060 improve the predictive accuracy of the downstream estimators by
6161 making their data respect some hard-wired assumptions.
6262
63- center : bool, optional (default True)
64- When False (True by default), the underlying data gets centered at zero
65- by subtracting the mean of the data from the data itself.
66-
67- PCA is performed on centered data due to its being a regression model,
68- without an intercept. As such, its pricipal components originate at the
69- origin of the transformed space.
70-
71- `center` set to False may be employed when performing PCA on already
72- centered data.
73-
74- Since centering is a required step as part of whitening, `center` set
75- to False and `whiten` set to True is a combination which may result in
76- unexpected behavior, if performed on not previously centered data.
77-
7863 svd_solver : string {'auto', 'full', 'tsqr', 'randomized'}
7964 auto :
80- the solver is selected by a default policy based on `X.shape` and
81- `n_components`: if the input data is larger than 500x500 and the
65+ the solver is selected by a default policy based on `` X.shape` ` and
66+ `` n_components` `: if the input data is larger than 500x500 and the
8267 number of components to extract is lower than 80% of the smallest
8368 dimension of the data, then the more efficient 'randomized'
8469 method is enabled. Otherwise the exact full SVD is computed and
@@ -99,7 +84,22 @@ class PCA(sklearn.decomposition.PCA):
9984 If int, random_state is the seed used by the random number generator;
10085 If RandomState instance, random_state is the random number generator;
10186 If None, the random number generator is the RandomState instance used
102- by `da.random`. Used when ``svd_solver`` == 'randomized'.
87+ by ``da.random``. Used when ``svd_solver`` == 'randomized'.
88+
89+ center : bool, optional (default True)
90+ When True (the default), the underlying data gets centered at zero
91+ by subtracting the mean of the data from the data itself.
92+
93+ PCA is performed on centered data due to its being a regression model,
94+ without an intercept. As such, its principal components originate at the
95+ origin of the transformed space.
96+
97+ ``center=False`` may be employed when performing PCA on already
98+ centered data.
99+
100+ Since centering is a required step as part of whitening, ``center`` set
101+ to False and ``whiten`` set to True is a combination which may result in
102+ unexpected behavior, if performed on not previously centered data.
103103
104104 Attributes
105105 ----------
@@ -128,7 +128,7 @@ class PCA(sklearn.decomposition.PCA):
128128 mean_ : array, shape (n_features,)
129129 Per-feature empirical mean, estimated from the training set.
130130
131- Equal to `X.mean(axis=0)`.
131+ Equal to `` X.mean(axis=0)` `.
132132
133133 n_components_ : int
134134 The estimated number of components. When n_components is set
@@ -197,20 +197,22 @@ class PCA(sklearn.decomposition.PCA):
197197 ``dask.linalg.svd_compressed``.
198198 * n_components : ``n_components='mle'`` is not allowed.
199199 Fractional ``n_components`` between 0 and 1 is not allowed.
200- * center : defaults to ``True`` and enables control over whether centering
201- gets implicitly performed as part of the PCA model steps.
200+ * center : if ``True`` (the default), automatically center input data before
201+ performing PCA.
202+ Set this parameter to ``False``, if the input data have already been
203+ centered before running ``fit()``.
202204 """
203205
204206 def __init__ (
205207 self ,
206208 n_components = None ,
207209 copy = True ,
208210 whiten = False ,
209- center = True ,
210211 svd_solver = "auto" ,
211212 tol = 0.0 ,
212213 iterated_power = 0 ,
213214 random_state = None ,
215+ center = True ,
214216 ):
215217 self .n_components = n_components
216218 self .copy = copy
@@ -221,14 +223,10 @@ def __init__(
221223 self .iterated_power = iterated_power
222224 self .random_state = random_state
223225
224- def _check_params (self ):
225- pass
226-
227226 def fit (self , X , y = None ):
228227 if not dask .is_dask_collection (X ):
229228 raise TypeError (_TYPE_MSG .format (type (X )))
230229
231- self ._check_params ()
232230 self ._fit (X )
233231 self .n_features_in_ = X .shape [1 ]
234232 return self
0 commit comments