From c151b7d8df441a7ba12e6c26dbbf565470db1fb6 Mon Sep 17 00:00:00 2001 From: Miles Olson Date: Fri, 16 Jan 2026 12:13:27 -0800 Subject: [PATCH 1/3] Remove TData (#4771) Summary: TData was necesssary whern we had multiple different Data classes, but recent developments have made this no longer needed Reviewed By: esantorella, saitcakmak Differential Revision: D90596942 Privacy Context Container: L1413903 --- ax/core/data.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/ax/core/data.py b/ax/core/data.py index 0ebd6fdc01a..9310ddd5c82 100644 --- a/ax/core/data.py +++ b/ax/core/data.py @@ -14,7 +14,7 @@ from functools import cached_property from io import StringIO from logging import Logger -from typing import Any, TypeVar +from typing import Any import numpy as np import numpy.typing as npt @@ -34,7 +34,6 @@ logger: Logger = get_logger(__name__) -TData = TypeVar("TData", bound="Data") DF_REPR_MAX_LENGTH = 1000 MAP_KEY = "step" @@ -119,7 +118,7 @@ class Data(Base, SerializationMixin): full_df: pd.DataFrame def __init__( - self: TData, + self, df: pd.DataFrame | None = None, _skip_ordering_and_validation: bool = False, ) -> None: @@ -159,8 +158,9 @@ def __init__( df = df.dropna(axis=0, how="all", ignore_index=True) df = self._safecast_df(df=df) self.full_df = self._get_df_with_cols_in_expected_order(df=df) - self._memo_df = None - self.has_step_column = MAP_KEY in self.full_df.columns + + self._memo_df: pd.DataFrame | None = None + self.has_step_column: bool = MAP_KEY in self.full_df.columns @classmethod def _get_df_with_cols_in_expected_order(cls, df: pd.DataFrame) -> pd.DataFrame: @@ -175,7 +175,7 @@ def _get_df_with_cols_in_expected_order(cls, df: pd.DataFrame) -> pd.DataFrame: return df @classmethod - def _safecast_df(cls: type[TData], df: pd.DataFrame) -> pd.DataFrame: + def _safecast_df(cls, df: pd.DataFrame) -> pd.DataFrame: """Function for safely casting df to standard data types. Needed because numpy does not support NaNs in integer arrays. @@ -275,7 +275,7 @@ def df(self) -> pd.DataFrame: return self._memo_df @classmethod - def from_multiple_data(cls: type[TData], data: Iterable[Data]) -> TData: + def from_multiple_data(cls, data: Iterable[Data]) -> Data: """Combines multiple objects into one (with the concatenated underlying dataframe). @@ -339,7 +339,7 @@ def filter( _skip_ordering_and_validation=True, ) - def clone(self: TData) -> TData: + def clone(self) -> Data: """Returns a new Data object with the same underlying dataframe.""" return self.__class__(df=deepcopy(self.full_df)) @@ -347,13 +347,13 @@ def __eq__(self, o: Data) -> bool: return type(self) is type(o) and dataframe_equals(self.full_df, o.full_df) def relativize( - self: TData, + self, status_quo_name: str = "status_quo", as_percent: bool = False, include_sq: bool = False, bias_correction: bool = True, control_as_constant: bool = False, - ) -> TData: + ) -> Data: """Relativize a data object w.r.t. a status_quo arm. Args: @@ -437,12 +437,12 @@ def latest(self, rows_per_group: int = 1) -> Data: ) def subsample( - self: TData, + self, keep_every: int | None = None, limit_rows_per_group: int | None = None, limit_rows_per_metric: int | None = None, include_first_last: bool = True, - ) -> TData: + ) -> Data: """Return a new Data that subsamples the `MAP_KEY` column in an equally-spaced manner. This function considers only the relative ordering of the `MAP_KEY` values, making it most suitable when these values are From 2ff631763d182a3d4fa2e64496b70a24c87247ba Mon Sep 17 00:00:00 2001 From: Miles Olson Date: Fri, 16 Jan 2026 12:13:27 -0800 Subject: [PATCH 2/3] Remove TestDataBase now that DataBase is gone (#4772) Summary: Moved these tests into TestData, since Data is the only data-related class in Ax. Reviewed By: saitcakmak Differential Revision: D90605845 Privacy Context Container: L1413903 --- ax/core/tests/test_data.py | 96 +++++++------------------------------- 1 file changed, 16 insertions(+), 80 deletions(-) diff --git a/ax/core/tests/test_data.py b/ax/core/tests/test_data.py index 8567135a126..f7ff10e597b 100644 --- a/ax/core/tests/test_data.py +++ b/ax/core/tests/test_data.py @@ -119,29 +119,22 @@ def get_test_dataframe() -> pd.DataFrame: ) -class TestDataBase(TestCase): - # Also run with has_step_column = True below - has_step_column: bool = False +class DataTest(TestCase): + """Tests for Data without a "step" column.""" def setUp(self) -> None: super().setUp() self.data_without_df = Data() - df = get_test_dataframe() - if not self.has_step_column: - self.df = df - self.data_with_df = Data(df=self.df) - else: - df_1 = df.copy().assign(**{MAP_KEY: 0}) - df_2 = df.copy().assign(**{MAP_KEY: 1}) - self.df = pd.concat((df_1, df_2)) - self.data_with_df = Data(df=self.df) - + self.df = get_test_dataframe() + self.data_with_df = Data(df=self.df) self.metric_name_to_signature = {"a": "a_signature", "b": "b_signature"} def test_init(self) -> None: + # Test equality self.assertEqual(self.data_without_df, self.data_without_df) self.assertEqual(self.data_with_df, self.data_with_df) + # Test accessing values df = self.data_with_df.df self.assertEqual( float(df[df["arm_name"] == "0_0"][df["metric_name"] == "a"]["mean"].item()), @@ -152,7 +145,14 @@ def test_init(self) -> None: 0.5, ) - self.assertEqual(self.data_with_df.has_step_column, self.has_step_column) + # Test has_step_column is False + self.assertFalse(self.data_with_df.has_step_column) + + # Test empty initialization + empty = Data() + self.assertTrue(empty.full_df.empty) + self.assertEqual(set(empty.full_df.columns), empty.REQUIRED_COLUMNS) + self.assertFalse(empty.has_step_column) def test_clone(self) -> None: data = self.data_with_df @@ -164,14 +164,9 @@ def test_clone(self) -> None: self.assertIsNot(data, data_clone) self.assertIsNot(data.df, data_clone.df) self.assertIsNone(data_clone._db_id) - if self.has_step_column: - self.assertIsNot(data.full_df, data_clone.full_df) - self.assertTrue(data.full_df.equals(data_clone.full_df)) def test_BadData(self) -> None: data = {"bad_field": "0_0", "bad_field_2": {"x": 0, "y": "a"}} - if self.has_step_column: - data[MAP_KEY] = "0" df = pd.DataFrame([data]) with self.assertRaisesRegex( ValueError, "Dataframe must contain required columns" @@ -184,17 +179,13 @@ def test_EmptyData(self) -> None: self.assertTrue(df.empty) self.assertTrue(Data.from_multiple_data([]).df.empty) - if data.has_step_column: - self.assertTrue(data.full_df.empty) - expected_columns = Data.REQUIRED_COLUMNS.union({MAP_KEY}) - else: - expected_columns = Data.REQUIRED_COLUMNS + expected_columns = Data.REQUIRED_COLUMNS self.assertEqual(set(df.columns), expected_columns) def test_from_multiple_with_generator(self) -> None: data = Data.from_multiple_data(self.data_with_df for _ in range(2)) self.assertEqual(len(data.full_df), 2 * len(self.data_with_df.full_df)) - self.assertEqual(data.has_step_column, self.has_step_column) + self.assertFalse(data.has_step_column) def test_extra_columns(self) -> None: value = 3 @@ -235,26 +226,6 @@ def test_trial_indices(self) -> None: set(self.data_with_df.full_df["trial_index"].unique()), ) - -class TestMapData(TestDataBase): - has_step_column = True - - -class DataTest(TestCase): - """Tests that are specific to Data without a "step" column.""" - - def setUp(self) -> None: - super().setUp() - self.df = get_test_dataframe() - self.metric_name_to_signature = {"a": "a_signature", "b": "b_signature"} - - def test_init(self) -> None: - # Initialize empty - empty = Data() - self.assertTrue(empty.full_df.empty) - self.assertEqual(set(empty.full_df.columns), empty.REQUIRED_COLUMNS) - self.assertFalse(empty.has_step_column) - def test_repr(self) -> None: self.assertEqual( str(Data(df=self.df)), @@ -263,13 +234,6 @@ def test_repr(self) -> None: with patch(f"{Data.__module__}.DF_REPR_MAX_LENGTH", 500): self.assertEqual(str(Data(df=self.df)), REPR_500) - def test_OtherClassInequality(self) -> None: - class CustomData(Data): - pass - - data = CustomData(df=self.df) - self.assertNotEqual(data, Data(self.df)) - def test_from_multiple(self) -> None: with self.subTest("Combinining non-empty Data"): data = Data.from_multiple_data([Data(df=self.df), Data(df=self.df)]) @@ -279,34 +243,6 @@ def test_from_multiple(self) -> None: data = Data.from_multiple_data([Data(), Data()]) self.assertEqual(data, Data()) - with self.subTest("Different types"): - - class CustomData(Data): - pass - - data = Data.from_multiple_data([CustomData(), CustomData()]) - self.assertEqual(data, Data()) - data = CustomData.from_multiple_data([Data(), CustomData()]) - self.assertEqual(data, CustomData()) - - def test_FromMultipleDataMismatchedTypes(self) -> None: - # create two custom data types - class CustomDataA(Data): - pass - - class CustomDataB(Data): - pass - - # Test using `Data.from_multiple_data` to combine non-Data types - data = Data.from_multiple_data([CustomDataA(), CustomDataB()]) - self.assertEqual(data, Data()) - - # multiple non-empty types - data_elt_A = CustomDataA(df=self.df) - data_elt_B = CustomDataB(df=self.df) - data = Data.from_multiple_data([data_elt_A, data_elt_B]) - self.assertEqual(len(data.full_df), 2 * len(self.df)) - def test_filter(self) -> None: data = Data(df=self.df) # Test that filter throws when we provide metric names and metric signatures From 4d8f78e23ac5120d27d556eacd4e6abe05c0bf2b Mon Sep 17 00:00:00 2001 From: Miles Olson Date: Fri, 16 Jan 2026 12:13:27 -0800 Subject: [PATCH 3/3] Back Data with DataRow object (#4773) Summary: NOTE: This is much slower than the implementation which is backed by a dataframe. For clarity, Ive put this naive implementation up as its own diff and the next diff hunts for speedups. Creates new source of truth for Data: the DataRow. The df is now a cached property which is dynamically generated based on these rows. In the future, these will become a Base object in SQLAlchemy st. Data will have a SQLAlchemy relationship to a list of DataRows which live in their own table. RFC: 1. Im renaming sem -> se here (but keeping sem in the df for now, since this could be an incredibly involved cleanup). Do we have alignment that this is a positive change? If so I can either start of backlog the cleanup across the codebase. cc Balandat who Ive talked about this with a while back. 2. This removes the ability for Data to contain arbitrary columns, which was added in D83682740 and afaik unused. Arbitrary new columns would not be compatible with the new storage setup (it was easy in the old setup which is why we added it), and I think we should take a careful look at how to store contextual data in the future in a structured way. Differential Revision: D90605846 --- .../plotly/tests/test_marginal_effects.py | 2 +- ax/core/base_trial.py | 4 +- ax/core/data.py | 129 ++++++++++++++---- ax/core/tests/test_data.py | 8 -- ax/core/tests/test_experiment.py | 6 +- ax/plot/pareto_utils.py | 2 +- ax/plot/scatter.py | 2 +- ax/plot/tests/test_fitted_scatter.py | 2 +- ax/plot/tests/test_pareto_utils.py | 2 +- .../json_store/tests/test_json_store.py | 8 +- ax/storage/sqa_store/utils.py | 1 + ax/utils/testing/core_stubs.py | 2 +- 12 files changed, 120 insertions(+), 48 deletions(-) diff --git a/ax/analysis/plotly/tests/test_marginal_effects.py b/ax/analysis/plotly/tests/test_marginal_effects.py index 25b299d5f77..107381844cd 100644 --- a/ax/analysis/plotly/tests/test_marginal_effects.py +++ b/ax/analysis/plotly/tests/test_marginal_effects.py @@ -43,7 +43,7 @@ def setUp(self) -> None: self.experiment.trials[i].mark_running(no_runner_required=True) self.experiment.attach_data( Data( - pd.DataFrame( + df=pd.DataFrame( { "trial_index": [i] * num_arms, "arm_name": [f"0_{j}" for j in range(num_arms)], diff --git a/ax/core/base_trial.py b/ax/core/base_trial.py index 649c17c57e9..8e9a4d2853e 100644 --- a/ax/core/base_trial.py +++ b/ax/core/base_trial.py @@ -15,7 +15,7 @@ from typing import Any, TYPE_CHECKING from ax.core.arm import Arm -from ax.core.data import Data, sort_by_trial_index_and_arm_name +from ax.core.data import Data from ax.core.evaluations_to_data import raw_evaluations_to_data from ax.core.generator_run import GeneratorRun, GeneratorRunType from ax.core.metric import Metric, MetricFetchResult @@ -442,8 +442,6 @@ def fetch_data(self, metrics: list[Metric] | None = None, **kwargs: Any) -> Data data = Metric._unwrap_trial_data_multi( results=self.fetch_data_results(metrics=metrics, **kwargs) ) - if not data.has_step_column: - data.full_df = sort_by_trial_index_and_arm_name(data.full_df) return data diff --git a/ax/core/data.py b/ax/core/data.py index 9310ddd5c82..e6e98d5a66b 100644 --- a/ax/core/data.py +++ b/ax/core/data.py @@ -38,6 +38,36 @@ MAP_KEY = "step" +class DataRow: + def __init__( + self, + trial_index: int, + arm_name: str, + metric_name: str, + metric_signature: str, + mean: float, + se: float, + step: float | None = None, + start_time: int | None = None, + end_time: int | None = None, + n: int | None = None, + ) -> None: + self.trial_index: int = trial_index + self.arm_name: str = arm_name + + self.metric_name: str = metric_name + self.metric_signature: str = metric_signature + + self.mean: float = mean + self.se: float = se + + self.step: float | None = step + + self.start_time: int | None = start_time + self.end_time: int | None = end_time + self.n: int | None = n + + class Data(Base, SerializationMixin): """Class storing numerical data for an experiment. @@ -101,8 +131,6 @@ class Data(Base, SerializationMixin): "start_time": pd.Timestamp, "end_time": pd.Timestamp, "n": int, - "frac_nonnull": np.float64, - "random_split": int, MAP_KEY: float, } @@ -115,16 +143,19 @@ class Data(Base, SerializationMixin): "metric_signature", ] - full_df: pd.DataFrame + _data_rows: list[DataRow] def __init__( self, + data_rows: Iterable[DataRow] | None = None, df: pd.DataFrame | None = None, _skip_ordering_and_validation: bool = False, ) -> None: """Initialize a ``Data`` object from the given DataFrame. Args: + data_rows: Iterable of DataRows. If provided, this will be used as the + source of truth for Data, over df. df: DataFrame with underlying data, and required columns. Data must be unique at the level of ("trial_index", "arm_name", "metric_name"), plus "step" if a "step" column is present. A @@ -135,32 +166,84 @@ def __init__( Intended only for use in `Data.filter`, where the contents of the DataFrame are known to be ordered and valid. """ - if df is None: - # Initialize with barebones DF with expected dtypes - self.full_df = pd.DataFrame.from_dict( + if data_rows is not None: + if isinstance(data_rows, pd.DataFrame): + raise ValueError( + "data_rows must be an iterable of DataRows, not a DataFrame." + ) + self._data_rows = [*data_rows] + elif df is not None: + # Unroll the df into a list of DataRows + if missing_columns := self.REQUIRED_COLUMNS - {*df.columns}: + raise ValueError( + f"Dataframe must contain required columns {list(missing_columns)}." + ) + + self._data_rows = [ + DataRow( + trial_index=row["trial_index"], + arm_name=row["arm_name"], + metric_name=row["metric_name"], + metric_signature=row["metric_signature"], + mean=row["mean"], + se=row["sem"], + step=row.get(MAP_KEY), + start_time=row.get("start_time"), + end_time=row.get("end_time"), + n=row.get("n"), + ) + for _, row in df.iterrows() + ] + else: + self._data_rows = [] + + self._memo_df: pd.DataFrame | None = None + self.has_step_column: bool = any( + row.step is not None for row in self._data_rows + ) + + @cached_property + def full_df(self) -> pd.DataFrame: + """ + Convert the DataRows into a pandas DataFrame. If step, start_time, or end_time + is None for all rows the column will be elided. + """ + if len(self._data_rows) == 0: + return pd.DataFrame.from_dict( { col: pd.Series([], dtype=self.COLUMN_DATA_TYPES[col]) for col in self.REQUIRED_COLUMNS } ) - elif _skip_ordering_and_validation: - self.full_df = df - else: - columns = set(df.columns) - missing_columns = self.REQUIRED_COLUMNS - columns - if missing_columns: - raise ValueError( - f"Dataframe must contain required columns {list(missing_columns)}." - ) - # Drop rows where every input is null. Since `dropna` can be slow, first - # check trial index to see if dropping nulls might be needed. - if df["trial_index"].isnull().any(): - df = df.dropna(axis=0, how="all", ignore_index=True) - df = self._safecast_df(df=df) - self.full_df = self._get_df_with_cols_in_expected_order(df=df) - self._memo_df: pd.DataFrame | None = None - self.has_step_column: bool = MAP_KEY in self.full_df.columns + # Detect whether any of the optional attributes are present and should be + # included as columns in the full DataFrame. + include_step = any(row.step is not None for row in self._data_rows) + include_start_time = any(row.start_time is not None for row in self._data_rows) + include_end_time = any(row.end_time is not None for row in self._data_rows) + include_n = any(row.n is not None for row in self._data_rows) + + records = [ + { + "trial_index": row.trial_index, + "arm_name": row.arm_name, + "metric_name": row.metric_name, + "metric_signature": row.metric_signature, + "mean": row.mean, + "sem": row.se, + **({"step": row.step} if include_step else {}), + **({"start_time": row.start_time} if include_start_time else {}), + **({"end_time": row.end_time} if include_end_time else {}), + **({"n": row.n} if include_n else {}), + } + for row in self._data_rows + ] + + return self._get_df_with_cols_in_expected_order( + df=self._safecast_df( + df=pd.DataFrame.from_records(records), + ), + ) @classmethod def _get_df_with_cols_in_expected_order(cls, df: pd.DataFrame) -> pd.DataFrame: diff --git a/ax/core/tests/test_data.py b/ax/core/tests/test_data.py index f7ff10e597b..45365e2a93f 100644 --- a/ax/core/tests/test_data.py +++ b/ax/core/tests/test_data.py @@ -187,14 +187,6 @@ def test_from_multiple_with_generator(self) -> None: self.assertEqual(len(data.full_df), 2 * len(self.data_with_df.full_df)) self.assertFalse(data.has_step_column) - def test_extra_columns(self) -> None: - value = 3 - extra_col_df = self.df.assign(foo=value) - data = Data(df=extra_col_df) - self.assertIn("foo", data.full_df.columns) - self.assertIn("foo", data.df.columns) - self.assertTrue((data.full_df["foo"] == value).all()) - def test_get_df_with_cols_in_expected_order(self) -> None: with self.subTest("Wrong order"): df = pd.DataFrame(columns=["mean", "trial_index", "hat"], data=[[0] * 3]) diff --git a/ax/core/tests/test_experiment.py b/ax/core/tests/test_experiment.py index 9488ee8bbf3..9f47be694f6 100644 --- a/ax/core/tests/test_experiment.py +++ b/ax/core/tests/test_experiment.py @@ -673,7 +673,7 @@ def test_fetch_and_store_data(self) -> None: # Verify we do get the stored data if there are an unimplemented metrics. # Remove attached data for nonexistent metric. - exp.data.full_df = exp.data.full_df.loc[lambda x: x["metric_name"] != "z"] + exp.data = Data(df=exp.data.full_df.loc[lambda x: x["metric_name"] != "z"]) # Remove implemented metric that is `available_while_running` # (and therefore not pulled from cache). @@ -685,7 +685,9 @@ def test_fetch_and_store_data(self) -> None: looked_up_df = looked_up_data.full_df self.assertFalse((looked_up_df["metric_name"] == "z").any()) self.assertTrue( - batch.fetch_data().full_df.equals( + batch.fetch_data() + .full_df.sort_values(["arm_name", "metric_name"], ignore_index=True) + .equals( looked_up_df.loc[lambda x: (x["trial_index"] == 0)].sort_values( ["arm_name", "metric_name"], ignore_index=True ) diff --git a/ax/plot/pareto_utils.py b/ax/plot/pareto_utils.py index 996c5c0fd99..a11af3c67dc 100644 --- a/ax/plot/pareto_utils.py +++ b/ax/plot/pareto_utils.py @@ -207,7 +207,7 @@ def get_observed_pareto_frontiers( ): # Make sure status quo is always included, for derelativization arm_names.append(experiment.status_quo.name) - data = Data(data.df[data.df["arm_name"].isin(arm_names)]) + data = Data(df=data.df[data.df["arm_name"].isin(arm_names)]) adapter = get_tensor_converter_adapter(experiment=experiment, data=data) pareto_observations = observed_pareto_frontier(adapter=adapter) # Convert to ParetoFrontierResults diff --git a/ax/plot/scatter.py b/ax/plot/scatter.py index a3ac1987146..27af4eb3c97 100644 --- a/ax/plot/scatter.py +++ b/ax/plot/scatter.py @@ -1731,7 +1731,7 @@ def tile_observations( if data is None: data = experiment.fetch_data() if arm_names is not None: - data = Data(data.df[data.df["arm_name"].isin(arm_names)]) + data = Data(df=data.df[data.df["arm_name"].isin(arm_names)]) m_ts = Generators.THOMPSON( data=data, search_space=experiment.search_space, diff --git a/ax/plot/tests/test_fitted_scatter.py b/ax/plot/tests/test_fitted_scatter.py index ef13bfd9016..9642dba9cd2 100644 --- a/ax/plot/tests/test_fitted_scatter.py +++ b/ax/plot/tests/test_fitted_scatter.py @@ -33,7 +33,7 @@ def test_fitted_scatter(self) -> None: model = Generators.BOTORCH_MODULAR( # Adapter kwargs experiment=exp, - data=Data.from_multiple_data([data, Data(df)]), + data=Data.from_multiple_data([data, Data(df=df)]), ) # Assert that each type of plot can be constructed successfully scalarized_metric_config = [ diff --git a/ax/plot/tests/test_pareto_utils.py b/ax/plot/tests/test_pareto_utils.py index bc97bbf3891..de2259a4f6d 100644 --- a/ax/plot/tests/test_pareto_utils.py +++ b/ax/plot/tests/test_pareto_utils.py @@ -107,7 +107,7 @@ def test_get_observed_pareto_frontiers(self) -> None: # For the check below, compute which arms are better than SQ df = experiment.fetch_data().df df["sem"] = np.nan - data = Data(df) + data = Data(df=df) sq_val = df[(df["arm_name"] == "status_quo") & (df["metric_name"] == "m1")][ "mean" ].values[0] diff --git a/ax/storage/json_store/tests/test_json_store.py b/ax/storage/json_store/tests/test_json_store.py index 00726651976..96d2c613007 100644 --- a/ax/storage/json_store/tests/test_json_store.py +++ b/ax/storage/json_store/tests/test_json_store.py @@ -705,10 +705,6 @@ def test_decode_map_data_backward_compatible(self) -> None: class_decoder_registry=CORE_CLASS_DECODER_REGISTRY, ) self.assertEqual(len(map_data.full_df), 2) - # Even though the "epoch" and "timestamps" columns have not been - # renamed to "step", they are present - self.assertEqual(map_data.full_df["epoch"].tolist(), [0.0, 1.0]) - self.assertEqual(map_data.full_df["timestamps"].tolist(), [3.0, 4.0]) self.assertIsInstance(map_data, Data) with self.subTest("Single map key"): @@ -729,8 +725,8 @@ def test_decode_map_data_backward_compatible(self) -> None: decoder_registry=CORE_DECODER_REGISTRY, class_decoder_registry=CORE_CLASS_DECODER_REGISTRY, ) - self.assertIn("epoch", map_data.full_df.columns) - self.assertEqual(map_data.full_df["epoch"].tolist(), [0.0, 1.0]) + self.assertEqual(len(map_data.full_df), 2) + self.assertIsInstance(map_data, Data) with self.subTest("No map key"): data_json = { diff --git a/ax/storage/sqa_store/utils.py b/ax/storage/sqa_store/utils.py index dfff1786dbf..3fc233ff15e 100644 --- a/ax/storage/sqa_store/utils.py +++ b/ax/storage/sqa_store/utils.py @@ -44,6 +44,7 @@ # don't need to recur into them during `copy_db_ids`. "auxiliary_experiments_by_purpose", "_metric_fetching_errors", + "_data_rows", } SKIP_ATTRS_ERROR_SUFFIX = "Consider adding to COPY_DB_IDS_ATTRS_TO_SKIP if appropriate." diff --git a/ax/utils/testing/core_stubs.py b/ax/utils/testing/core_stubs.py index 6d6dff0e638..6a33f38ceb3 100644 --- a/ax/utils/testing/core_stubs.py +++ b/ax/utils/testing/core_stubs.py @@ -2581,7 +2581,7 @@ def get_branin_data_batch( for i in range(len(means)) for metric in metrics ] - return Data(pd.DataFrame.from_records(records)) + return Data(df=pd.DataFrame.from_records(records)) def get_branin_data_multi_objective(