From 0d3311fc0a48e2fbd25ef7dc7a82dca458b2ef35 Mon Sep 17 00:00:00 2001 From: Ashley Xu Date: Thu, 28 Mar 2024 22:47:06 +0000 Subject: [PATCH] chore: address comments from technical writers for legal review --- bigframes/_config/bigquery_options.py | 2 +- bigframes/ml/compose.py | 2 +- bigframes/ml/imported.py | 6 +++--- .../bigframes_vendored/pandas/core/groupby/__init__.py | 10 +++++----- .../sklearn/compose/_column_transformer.py | 4 ++-- .../bigframes_vendored/sklearn/decomposition/_pca.py | 4 ++-- .../sklearn/linear_model/_logistic.py | 2 +- .../sklearn/preprocessing/_discretization.py | 2 +- 8 files changed, 16 insertions(+), 16 deletions(-) diff --git a/bigframes/_config/bigquery_options.py b/bigframes/_config/bigquery_options.py index 9da953a582..50e14eaf28 100644 --- a/bigframes/_config/bigquery_options.py +++ b/bigframes/_config/bigquery_options.py @@ -57,7 +57,7 @@ def application_name(self) -> Optional[str]: """The application name to amend to the user-agent sent to Google APIs. The application name to amend to the user agent sent to Google APIs. - The recommended format is ``"appplication-name/major.minor.patch_version"`` + The recommended format is ``"application-name/major.minor.patch_version"`` or ``"(gpn:PartnerName;)"`` for official Google partners. """ return self._application_name diff --git a/bigframes/ml/compose.py b/bigframes/ml/compose.py index cd233589d6..21cfba8e01 100644 --- a/bigframes/ml/compose.py +++ b/bigframes/ml/compose.py @@ -13,7 +13,7 @@ # limitations under the License. """Build composite transformers on heterogeneous data. This module is styled -after Scikit-Learn's compose module: +after scikit-Learn's compose module: https://scikit-learn.org/stable/modules/classes.html#module-sklearn.compose.""" from __future__ import annotations diff --git a/bigframes/ml/imported.py b/bigframes/ml/imported.py index 7f75827083..b551150050 100644 --- a/bigframes/ml/imported.py +++ b/bigframes/ml/imported.py @@ -34,7 +34,7 @@ class TensorFlowModel(base.Predictor): model_path (str): GCS path that holds the model files. session (BigQuery Session): - BQ session to create the model + BQ session to create the model. """ def __init__( @@ -113,7 +113,7 @@ class ONNXModel(base.Predictor): model_path (str): Cloud Storage path that holds the model files. session (BigQuery Session): - BQ session to create the model + BQ session to create the model. """ def __init__( @@ -207,7 +207,7 @@ class XGBoostModel(base.Predictor): and feature_types are both specified in the model file. Supported types are "bool", "string", "int64", "float64", "array", "array", "array", "array". session (BigQuery Session): - BQ session to create the model + BQ session to create the model. """ def __init__( diff --git a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py index 8730cf0007..e1cc8c5a53 100644 --- a/third_party/bigframes_vendored/pandas/core/groupby/__init__.py +++ b/third_party/bigframes_vendored/pandas/core/groupby/__init__.py @@ -78,7 +78,7 @@ def median( Include only float, int, boolean columns. exact (bool, default False): Calculate the exact median instead of an approximation. Note: - ``exact=True`` not yet supported. + ``exact=True`` is not supported. Returns: pandas.Series or pandas.DataFrame: Median of groups. @@ -178,7 +178,7 @@ def sum( Include only float, int, boolean columns. min_count (int, default 0): The required number of valid values to perform the operation. If fewer - than ``min_count`` non-NA values are present the result will be NA. + than ``min_count`` and non-NA values are present, the result will be NA. Returns: Series or DataFrame: Computed sum of values within each group. @@ -194,7 +194,7 @@ def prod(self, numeric_only: bool = False, min_count: int = 0): Include only float, int, boolean columns. min_count (int, default 0): The required number of valid values to perform the operation. If fewer - than ``min_count`` non-NA values are present the result will be NA. + than ``min_count`` and non-NA values are present, the result will be NA. Returns: Series or DataFrame: Computed prod of values within each group. @@ -214,7 +214,7 @@ def min( Include only float, int, boolean columns. min_count (int, default 0): The required number of valid values to perform the operation. If fewer - than ``min_count`` non-NA values are present the result will be NA. + than ``min_count`` and non-NA values are present, the result will be NA. Returns: Series or DataFrame: Computed min of values within each group. @@ -234,7 +234,7 @@ def max( Include only float, int, boolean columns. min_count (int, default 0): The required number of valid values to perform the operation. If fewer - than ``min_count`` non-NA values are present the result will be NA. + than ``min_count`` and non-NA values are present, the result will be NA. Returns: Series or DataFrame: Computed max of values within each group. diff --git a/third_party/bigframes_vendored/sklearn/compose/_column_transformer.py b/third_party/bigframes_vendored/sklearn/compose/_column_transformer.py index b08eb10492..4b0bd42706 100644 --- a/third_party/bigframes_vendored/sklearn/compose/_column_transformer.py +++ b/third_party/bigframes_vendored/sklearn/compose/_column_transformer.py @@ -19,9 +19,9 @@ class ColumnTransformer(_BaseComposition): """Applies transformers to columns of BigQuery DataFrames. This estimator allows different columns or column subsets of the input - to be transformed separately and the features generated by each transformer + to be transformed separately, and the features generated by each transformer will be concatenated to form a single feature space. - This is useful for heterogeneous or columnar data, to combine several + This is useful for heterogeneous or columnar data to combine several feature extraction mechanisms or transformations into a single transformer. Args: diff --git a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py index dcce75d1d9..f126e0439d 100644 --- a/third_party/bigframes_vendored/sklearn/decomposition/_pca.py +++ b/third_party/bigframes_vendored/sklearn/decomposition/_pca.py @@ -22,8 +22,8 @@ class PCA(BaseEstimator, metaclass=ABCMeta): Args: n_components (int, float or None, default None): - Number of components to keep. - If n_components is not set all components are kept. n_components = min(n_samples, n_features). + Number of components to keep. If n_components is not set all + components are kept, n_components = min(n_samples, n_features). If 0 < n_components < 1, select the number of components such that the amount of variance that needs to be explained is greater than the percentage specified by n_components. svd_solver ("full", "randomized" or "auto", default "auto"): The solver to use to calculate the principal components. Details: https://cloud.google.com/bigquery/docs/reference/standard-sql/bigqueryml-syntax-create-pca#pca_solver. diff --git a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py index 88ff32ea06..494c730a6d 100644 --- a/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py +++ b/third_party/bigframes_vendored/sklearn/linear_model/_logistic.py @@ -38,7 +38,7 @@ class LogisticRegression(LinearClassifierMixin, BaseEstimator): automatically adjust weights inversely proportional to class frequencies in the input data as ``n_samples / (n_classes * np.bincount(y))``. Dict isn't - supported now. + supported. l1_reg (float or None, default None): The amount of L1 regularization applied. Default to None. Can't be set in "normal_equation" mode. If unset, value 0 is used. l2_reg (float, default 0.0): diff --git a/third_party/bigframes_vendored/sklearn/preprocessing/_discretization.py b/third_party/bigframes_vendored/sklearn/preprocessing/_discretization.py index 5fcc481573..98b9d0371f 100644 --- a/third_party/bigframes_vendored/sklearn/preprocessing/_discretization.py +++ b/third_party/bigframes_vendored/sklearn/preprocessing/_discretization.py @@ -18,7 +18,7 @@ class KBinsDiscretizer(TransformerMixin, BaseEstimator): strategy ({'uniform', 'quantile'}, default='quantile'): Strategy used to define the widths of the bins. 'uniform': All bins in each feature have identical widths. 'quantile': All bins in each - feature have the same number of points. Only `uniform` is supported now. + feature have the same number of points. Only `uniform` is supported. """ def fit(self, X, y=None):