From f21f4e7da1080ad7cd2a765a8af8390c023488bd Mon Sep 17 00:00:00 2001 From: Florian Felten Date: Thu, 25 Sep 2025 14:26:51 +0200 Subject: [PATCH] Bump HF datasets version --- engiopt/cgan_1d/cgan_1d.py | 4 ++-- engiopt/cgan_2d/cgan_2d.py | 2 +- engiopt/cgan_bezier/cgan_bezier.py | 4 ++-- engiopt/cgan_cnn_2d/cgan_cnn_2d.py | 2 +- engiopt/cgan_cnn_3d/cgan_cnn_3d.py | 4 ++-- engiopt/cgan_vae/cgan_vae.py | 4 ++-- engiopt/diffusion_1d/diffusion_1d.py | 2 +- engiopt/diffusion_2d_cond/diffusion_2d_cond.py | 4 ++-- engiopt/gan_1d/gan_1d.py | 4 ++-- engiopt/gan_2d/gan_2d.py | 2 +- engiopt/gan_bezier/gan_bezier.py | 4 ++-- engiopt/gan_cnn_2d/gan_cnn_2d.py | 2 +- pyproject.toml | 2 +- 13 files changed, 20 insertions(+), 20 deletions(-) diff --git a/engiopt/cgan_1d/cgan_1d.py b/engiopt/cgan_1d/cgan_1d.py index 27257f4f..0dc79ed2 100644 --- a/engiopt/cgan_1d/cgan_1d.py +++ b/engiopt/cgan_1d/cgan_1d.py @@ -180,8 +180,8 @@ def prepare_data(problem: Problem, device: th.device) -> tuple[th.utils.data.Ten transform = flatten_dict_factory(problem, device) training_ds = th.utils.data.TensorDataset( - transform(training_ds["optimal_design"]), - *[training_ds[key] for key in problem.conditions_keys], + transform(training_ds["optimal_design"][:]), + *[training_ds[key][:] for key in problem.conditions_keys], ) # Create condition normalizer diff --git a/engiopt/cgan_2d/cgan_2d.py b/engiopt/cgan_2d/cgan_2d.py index afd4004e..8d8f54d2 100644 --- a/engiopt/cgan_2d/cgan_2d.py +++ b/engiopt/cgan_2d/cgan_2d.py @@ -177,7 +177,7 @@ def forward(self, design: th.Tensor, conds: th.Tensor) -> th.Tensor: training_ds = problem.dataset.with_format("torch", device=device)["train"] training_ds = th.utils.data.TensorDataset( - training_ds["optimal_design"].flatten(1), *[training_ds[key] for key in problem.conditions_keys] + training_ds["optimal_design"][:].flatten(1), *[training_ds[key][:] for key in problem.conditions_keys] ) dataloader = th.utils.data.DataLoader( training_ds, diff --git a/engiopt/cgan_bezier/cgan_bezier.py b/engiopt/cgan_bezier/cgan_bezier.py index 91742ee8..28053984 100644 --- a/engiopt/cgan_bezier/cgan_bezier.py +++ b/engiopt/cgan_bezier/cgan_bezier.py @@ -455,12 +455,12 @@ def denormalize(self, x: th.Tensor) -> th.Tensor: problem_dataset = problem.dataset.with_format("torch")["train"] design_scalar_keys = list(problem_dataset["optimal_design"][0].keys()) design_scalar_keys.remove("coords") - coords_set = [problem_dataset[i]["optimal_design"]["coords"] for i in range(len(problem_dataset))] + coords_set = [problem_dataset[i]["optimal_design"]["coords"][:] for i in range(len(problem_dataset))] design_scalars = [example["optimal_design"][key] for example in problem_dataset for key in design_scalar_keys] training_ds = th.utils.data.TensorDataset( th.stack(coords_set), th.stack(design_scalars).unsqueeze(1), - *[problem_dataset[key] for key, _ in problem.conditions], + *[problem_dataset[key][:] for key, _ in problem.conditions], ) cond_tensors = th.stack(training_ds.tensors[2:]) diff --git a/engiopt/cgan_cnn_2d/cgan_cnn_2d.py b/engiopt/cgan_cnn_2d/cgan_cnn_2d.py index a655c147..80cf1160 100644 --- a/engiopt/cgan_cnn_2d/cgan_cnn_2d.py +++ b/engiopt/cgan_cnn_2d/cgan_cnn_2d.py @@ -272,7 +272,7 @@ def forward(self, x: th.Tensor, c: th.Tensor) -> th.Tensor: # Configure data loader training_ds = problem.dataset.with_format("torch", device=device)["train"] training_ds = th.utils.data.TensorDataset( - training_ds["optimal_design"].flatten(1), *[training_ds[key] for key in problem.conditions_keys] + training_ds["optimal_design"][:].flatten(1), *[training_ds[key][:] for key in problem.conditions_keys] ) dataloader = th.utils.data.DataLoader( training_ds, diff --git a/engiopt/cgan_cnn_3d/cgan_cnn_3d.py b/engiopt/cgan_cnn_3d/cgan_cnn_3d.py index 5cdeae7e..454b692b 100644 --- a/engiopt/cgan_cnn_3d/cgan_cnn_3d.py +++ b/engiopt/cgan_cnn_3d/cgan_cnn_3d.py @@ -408,8 +408,8 @@ def compute_gradient_penalty(discriminator, real_samples, fake_samples, conds, d training_ds = problem.dataset.with_format("torch", device=device)["train"] # Extract 3D designs and conditions - designs_3d = training_ds["optimal_design"] # Should be (N, D, H, W) - condition_tensors = [training_ds[key] for key in problem.conditions_keys] + designs_3d = training_ds["optimal_design"][:] # Should be (N, D, H, W) + condition_tensors = [training_ds[key][:] for key in problem.conditions_keys] training_ds = th.utils.data.TensorDataset(designs_3d, *condition_tensors) dataloader = th.utils.data.DataLoader( diff --git a/engiopt/cgan_vae/cgan_vae.py b/engiopt/cgan_vae/cgan_vae.py index 8e8b0c00..80dfee18 100644 --- a/engiopt/cgan_vae/cgan_vae.py +++ b/engiopt/cgan_vae/cgan_vae.py @@ -509,8 +509,8 @@ def compute_gradient_penalty(discriminator, real_samples, fake_samples, conds, d training_ds = problem.dataset.with_format("torch", device=device)["train"] # Extract 3d designs and conditions - designs_3d = training_ds["optimal_design"] - condition_tensors = [training_ds[key] for key in problem.conditions_keys] + designs_3d = training_ds["optimal_design"][:] + condition_tensors = [training_ds[key][:] for key in problem.conditions_keys] training_ds = th.utils.data.TensorDataset(designs_3d, *condition_tensors) dataloader = th.utils.data.DataLoader( diff --git a/engiopt/diffusion_1d/diffusion_1d.py b/engiopt/diffusion_1d/diffusion_1d.py index de779db2..12cde369 100644 --- a/engiopt/diffusion_1d/diffusion_1d.py +++ b/engiopt/diffusion_1d/diffusion_1d.py @@ -66,7 +66,7 @@ def prepare_data(problem: Problem, padding_size: int, device: th.device) -> tupl transform = flatten_dict_factory(problem, device) # Add padding to the transformed data - transformed_data = transform(training_ds["optimal_design"]) + transformed_data = transform(training_ds["optimal_design"][:]) if padding_size > 0: padded_data = th.nn.functional.pad(transformed_data, (0, padding_size), mode="constant", value=0) else: diff --git a/engiopt/diffusion_2d_cond/diffusion_2d_cond.py b/engiopt/diffusion_2d_cond/diffusion_2d_cond.py index 9da594f9..978c5be4 100644 --- a/engiopt/diffusion_2d_cond/diffusion_2d_cond.py +++ b/engiopt/diffusion_2d_cond/diffusion_2d_cond.py @@ -287,12 +287,12 @@ def sample_timestep( training_ds = problem.dataset.with_format("torch", device=device)["train"] filtered_ds = th.zeros(len(training_ds), design_shape[0], design_shape[1], device=device) for i in range(len(training_ds)): - filtered_ds[i] = training_ds[i]["optimal_design"].reshape(1, design_shape[0], design_shape[1]) + filtered_ds[i] = training_ds[i]["optimal_design"][:].reshape(1, design_shape[0], design_shape[1]) filtered_ds_max = filtered_ds.max() filtered_ds_min = filtered_ds.min() filtered_ds_norm = (filtered_ds - filtered_ds_min) / (filtered_ds_max - filtered_ds_min) training_ds = th.utils.data.TensorDataset( - filtered_ds_norm.flatten(1), *[training_ds[key] for key in problem.conditions_keys] + filtered_ds_norm.flatten(1), *[training_ds[key][:] for key in problem.conditions_keys] ) cond_tensors = th.stack(training_ds.tensors[1 : len(problem.conditions) + 1]) conds_min = cond_tensors.amin(dim=tuple(range(1, cond_tensors.ndim))) diff --git a/engiopt/gan_1d/gan_1d.py b/engiopt/gan_1d/gan_1d.py index 2ef472a9..ae60b687 100644 --- a/engiopt/gan_1d/gan_1d.py +++ b/engiopt/gan_1d/gan_1d.py @@ -152,8 +152,8 @@ def prepare_data(problem: Problem, device: th.device) -> tuple[th.utils.data.Ten transform = flatten_dict_factory(problem, device) training_ds = th.utils.data.TensorDataset( - transform(training_ds["optimal_design"]), - *[training_ds[key] for key in problem.conditions_keys], + transform(training_ds["optimal_design"][:]), + *[training_ds[key][:] for key in problem.conditions_keys], ) # Create design normalizer diff --git a/engiopt/gan_2d/gan_2d.py b/engiopt/gan_2d/gan_2d.py index 42511257..95b7746e 100644 --- a/engiopt/gan_2d/gan_2d.py +++ b/engiopt/gan_2d/gan_2d.py @@ -151,7 +151,7 @@ def forward(self, img: th.Tensor) -> th.Tensor: # Configure data loader training_ds = problem.dataset.with_format("torch", device=device)["train"] - training_ds = th.utils.data.TensorDataset(training_ds["optimal_design"].flatten(1)) + training_ds = th.utils.data.TensorDataset(training_ds["optimal_design"][:].flatten(1)) dataloader = th.utils.data.DataLoader( training_ds, batch_size=args.batch_size, diff --git a/engiopt/gan_bezier/gan_bezier.py b/engiopt/gan_bezier/gan_bezier.py index 3a9d9fda..e72111bb 100644 --- a/engiopt/gan_bezier/gan_bezier.py +++ b/engiopt/gan_bezier/gan_bezier.py @@ -419,12 +419,12 @@ def prepare_data(problem, batch_size, device): problem_dataset = problem.dataset.with_format("torch")["train"] design_scalar_keys = list(problem_dataset["optimal_design"][0].keys()) design_scalar_keys.remove("coords") - coords_set = [problem_dataset[i]["optimal_design"]["coords"] for i in range(len(problem_dataset))] + coords_set = [problem_dataset[i]["optimal_design"]["coords"][:] for i in range(len(problem_dataset))] design_scalars = [example["optimal_design"][key] for example in problem_dataset for key in design_scalar_keys] training_ds = th.utils.data.TensorDataset( th.stack(coords_set), th.stack(design_scalars).unsqueeze(1), - *[problem_dataset[key] for key, _ in problem.conditions], + *[problem_dataset[key][:] for key, _ in problem.conditions], ) dataloader = th.utils.data.DataLoader(training_ds, batch_size=batch_size, shuffle=True) diff --git a/engiopt/gan_cnn_2d/gan_cnn_2d.py b/engiopt/gan_cnn_2d/gan_cnn_2d.py index d8202971..7a5cdbdc 100644 --- a/engiopt/gan_cnn_2d/gan_cnn_2d.py +++ b/engiopt/gan_cnn_2d/gan_cnn_2d.py @@ -205,7 +205,7 @@ def forward(self, x: th.Tensor) -> th.Tensor: # Configure data loader training_ds = problem.dataset.with_format("torch", device=device)["train"] - training_ds = th.utils.data.TensorDataset(training_ds["optimal_design"].flatten(1)) + training_ds = th.utils.data.TensorDataset(training_ds["optimal_design"][:].flatten(1)) dataloader = th.utils.data.DataLoader( training_ds, batch_size=args.batch_size, diff --git a/pyproject.toml b/pyproject.toml index d93efba1..9463ca11 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -37,7 +37,7 @@ dependencies = [ "scikit-learn >= 1.6.0", "hyppo >= 0.5.0", "kaleido >= 0.2.1", - "datasets >= 3.1.0,<4.0.0", + "datasets >=4.0.0", ] dynamic = ["version"]