Source code for qailab.torch.qmodel

  1""" Module with QModel """
  2from collections.abc import Callable
  3from typing import Literal
  4from sklearn.base import BaseEstimator
  5import torch
  6from torch import Tensor, optim, nn
  7from torch.optim import Optimizer
  8from torch.utils.data import DataLoader, TensorDataset, random_split
  9from tqdm import tqdm
 10import numpy as np
 11import pandas as pd
 12
 13try:
 14    from ptseries.optimizers import HybridOptimizer
 15except ImportError:
[docs] 16 class HybridOptimizer(): 17 """Dummy HO""" 18 # pylint: disable=too-few-public-methods 19 20 def __init__( 21 self, 22 model, 23 lr_classical=0.01, 24 lr_quantum=0.01, 25 optimizer_quantum='SGD', 26 optimizer_classical='Adam', 27 betas=(0.9, 0.999), 28 spsa_resamplings=1, 29 spsa_gamma_decay=0.101, 30 spsa_alpha_decay=0.602 31 ): 32 pass
33 34AVAILABLE_OPTIMIZERS: dict[str, type[Optimizer] | type[HybridOptimizer]] = {opt.__name__.lower(): opt for opt in [ 35 optim.Adam, optim.AdamW, optim.SGD, optim.Adadelta, optim.Adagrad, 36 optim.Adamax, optim.RMSprop, optim.Rprop, optim.LBFGS, HybridOptimizer]} 37 38
[docs] 39class QModel(BaseEstimator): 40 """ Quantum model class 41 42 Parameters 43 ---------- 44 module: nn.Module 45 pytorch Module representing the quantum or classical neural network. 46 loss: Callable 47 pytorch loss function to be used during training. 48 optimizer_type: type[Optimizer] | str, default = "adamw" 49 pytorch Optimizer class to be used during training. 50 learning_rate: float | Literal['auto'], default = "auto" 51 learning rate used by the optimizer, "auto" sets it to optimizer's default one. 52 quantum_learning_rate: float | Literal['auto'], default = 'auto' 53 learning rate for quantum layers used by the HybridOptimizer, "auto" sets it to optimizer's default one. 54 batch_size: int, default = 1 55 number of training examples in batch. 56 epochs: int, default = 1 57 number of epochs to train the model. 58 validation_fraction: float, default = 0.2 59 share of the training dataset to be used for validation. 60 shuffle: bool, default = True 61 whether to shuffle data every epoch. 62 device: {"cpu","cuda","mps"}, default="cpu" 63 the device neural network will be trained on. 64 65 Attributes 66 ---------- 67 optimizer: Optimizer 68 pytorch optimizer object used during training 69 loss_history: dict[str,list] 70 history of loss values from the last fit call. dict contains keys 'training' and 'validation' 71 """ 72 73 # pylint: disable=too-many-instance-attributes 74 # Reasonable amount for model training 75 76 module: nn.Module 77 loss: Callable 78 optimizer_type: type[Optimizer] | type[HybridOptimizer] 79 optimizer: Optimizer 80 learning_rate: float | Literal['auto'] 81 quantum_learning_rate: float | Literal['auto'] 82 batch_size: int 83 epochs: int 84 validation_fraction: float 85 shuffle: bool 86 device: Literal["cpu", "cuda", "mps"] = "cpu" 87 88 def __init__( 89 self, 90 module: nn.Module, 91 loss: Callable, 92 optimizer_type: type[Optimizer] | type[HybridOptimizer] | str = 'adamw', 93 learning_rate: float | Literal['auto'] = 'auto', 94 quantum_learning_rate: float | Literal['auto'] = 'auto', 95 batch_size: int = 1, 96 epochs: int = 1, 97 validation_fraction: float = 0.2, 98 shuffle: bool = True, 99 device: Literal["cpu", "cuda", "mps"] = "cpu", 100 metric: Literal["accuracy", "mse"] = "accuracy" 101 ): 102 super().__init__() 103 self.module = module 104 self.loss = loss 105 if isinstance(optimizer_type, str): 106 if optimizer_type not in AVAILABLE_OPTIMIZERS: 107 raise ValueError( 108 f"Unknown optimizer: {optimizer_type}. Available optimizers are: {list(AVAILABLE_OPTIMIZERS.keys())}") 109 optimizer_type = AVAILABLE_OPTIMIZERS[optimizer_type] 110 self.optimizer_type = optimizer_type 111 self.learning_rate = learning_rate 112 self.quantum_learning_rate = quantum_learning_rate 113 if self.optimizer_type == HybridOptimizer: 114 if self.quantum_learning_rate != 'auto' and self.learning_rate != "auto": 115 self.optimizer = self.optimizer_type(self.module, lr_classical=self.learning_rate, 116 lr_quantum=self.quantum_learning_rate) # type: ignore 117 elif self.quantum_learning_rate != 'auto' and self.learning_rate == "auto": 118 self.optimizer = self.optimizer_type(self.module, lr_quantum=self.quantum_learning_rate) # type: ignore 119 elif self.quantum_learning_rate == 'auto' and self.learning_rate != "auto": 120 self.optimizer = self.optimizer_type(self.module, lr_classical=self.learning_rate) # type: ignore 121 else: 122 self.optimizer = self.optimizer_type(self.module) # type: ignore 123 elif self.learning_rate == 'auto': 124 self.optimizer = self.optimizer_type(self.module.parameters()) # type: ignore 125 else: 126 self.optimizer = self.optimizer_type(self.module.parameters(), lr=self.learning_rate) # type: ignore 127 self.batch_size = batch_size 128 self.epochs = epochs 129 self.validation_fraction = validation_fraction 130 self.shuffle = shuffle 131 self.device = device 132 self.metric = metric 133 self.module.to(device) 134 135 self.loss_history = { 136 'training': [], 137 'validation': [] 138 } 139
[docs] 140 def reset_parameters(self) -> None: 141 """ Resets parameters of layers """ 142 for layer in self.module.modules(): 143 if hasattr(layer, "reset_parameters"): 144 layer.reset_parameters() # type: ignore
145
[docs] 146 def fit(self, x: Tensor | np.ndarray | pd.DataFrame, y: Tensor | np.ndarray | pd.DataFrame | pd.Series) -> "QModel": 147 """ scikit-learn like fit method 148 trains the neural network based on training set (x,y). 149 150 Parameters 151 ---------- 152 x: Tensor | np.ndarray | pd.DataFrame 153 The training input samples of shape (n_samples, n_features). 154 y: Tensor | np.array | pd.DataFrame | pd.Series 155 The training target values of shape (n_samples,) or (n_samples, n_outputs) 156 157 Returns 158 ------- 159 self: QModel 160 trained NN model 161 """ 162 x, y = self._x_y_to_tensor(x, y) 163 tensor_dataset = TensorDataset(x, y) 164 train_dataset, validation_dataset = random_split(tensor_dataset, [1 - self.validation_fraction, self.validation_fraction]) 165 train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=self.shuffle) 166 validation_loader = DataLoader(validation_dataset, batch_size=self.batch_size, shuffle=self.shuffle) 167 self._train_loop(train_loader, validation_loader, self.epochs) 168 return self
169 170 def _x_to_tensor(self, x: Tensor | np.ndarray | pd.DataFrame) -> Tensor: 171 if isinstance(x, np.ndarray): 172 x = torch.tensor(x, dtype=torch.float32) 173 elif isinstance(x, pd.DataFrame): 174 x = torch.tensor(x.values, dtype=torch.float32) 175 x = x.to(self.device) 176 return x 177 178 def _x_y_to_tensor( 179 self, 180 x: Tensor | np.ndarray | pd.DataFrame, 181 y: Tensor | np.ndarray | pd.DataFrame | pd.Series 182 183 ) -> tuple[Tensor, Tensor]: 184 x = self._x_to_tensor(x) 185 if isinstance(y, np.ndarray): 186 if y.dtype.kind == "i": 187 y = torch.tensor(y, dtype=torch.int64) 188 else: 189 y = torch.tensor(y, dtype=torch.float32) 190 elif isinstance(y, pd.DataFrame): 191 y = torch.tensor(y.values, dtype=torch.float32) 192 elif isinstance(y, pd.Series): 193 if y.dtype == np.dtype('int64'): 194 y = torch.tensor(y.values, dtype=torch.int64) 195 else: 196 y = torch.tensor(y.values, dtype=torch.float32) 197 if x.shape[0] != y.shape[0]: 198 raise ValueError("X and y tensors should have the same first dimension") 199 y = y.to(self.device) 200 return x, y 201
[docs] 202 def fit_predict(self, x: Tensor | np.ndarray | pd.DataFrame, y: Tensor | np.ndarray | pd.DataFrame | pd.Series) -> Tensor: 203 """ scikit-learn like fit_predict method 204 trains the neural network based on training set (x,y) and predicts values for training examples x 205 combines fit and predict methods into one. 206 207 Parameters 208 ---------- 209 x: Tensor | np.ndarray | pd.DataFrame 210 The training input samples of shape (n_samples, n_features). 211 y: Tensor | np.array | pd.DataFrame | pd.Series 212 The training target values of shape (n_samples,) or (n_samples, n_outputs). 213 214 Returns 215 ------- 216 y_pred: Tensor 217 The predicted values for the training examples x. 218 """ 219 self.fit(x, y) 220 return self.predict(x)
221 222 @staticmethod 223 def _accuracy(y_pred, y_gt): 224 return (torch.argmax(y_pred, dim=1).eq(y_gt)).sum().item() / len(y_gt) 225 226 @staticmethod 227 def _mse(y_pred, y_gt): 228 return ((y_pred - y_gt)**2).sum().item() / len(y_gt) 229 230 def _train_loop(self, train_loader: DataLoader, validation_loader: DataLoader, epochs: int): 231 self.loss_history = { 232 'training': [], 233 'validation': [] 234 } 235 pbar = tqdm(range(epochs), total=epochs, unit="epochs") 236 for epoch in pbar: 237 238 self.module.train() 239 self._train_one_epoch(train_loader) 240 241 self.module.eval() 242 with torch.inference_mode(): 243 valid_loss, valid_metric = self._validate_one_epoch(validation_loader) 244 if self.metric == "mse": 245 pbar.set_postfix(loss=valid_loss, mse=valid_metric, epoch=epoch + 1) 246 elif self.metric == "accuracy": 247 pbar.set_postfix(loss=valid_loss, acc=valid_metric, epoch=epoch + 1) 248 else: 249 pbar.set_postfix(loss=valid_loss, epoch=epoch + 1) 250 251 def _train_one_epoch(self, train_loader: DataLoader) -> tuple[np.floating, np.floating]: 252 losses = [] 253 metrics = [] 254 pbar = tqdm(train_loader, unit="batches", leave=False) 255 256 for batch, (x, y) in enumerate(pbar): 257 self.optimizer.zero_grad() 258 outputs = self.module(x) 259 loss = self.loss(outputs, y) 260 if self.metric == "mse": 261 metrics.append(self._mse(outputs, y)) 262 elif self.metric == "accuracy": 263 metrics.append(self._accuracy(outputs, y)) 264 loss.backward() 265 self.optimizer.step() 266 losses.append(loss.item()) 267 if self.metric == "mse": 268 pbar.set_postfix(loss=loss.item(), mse=metrics[-1], batch=batch + 1) 269 elif self.metric == "accuracy": 270 pbar.set_postfix(loss=loss.item(), acc=metrics[-1], batch=batch + 1) 271 else: 272 pbar.set_postfix(loss=loss.item(), batch=batch + 1) 273 274 self.loss_history['training'].append(np.mean(losses)) 275 return np.mean(losses), np.mean(metrics) 276 277 def _validate_one_epoch(self, validation_loader: DataLoader) -> tuple[np.floating, np.floating]: 278 losses = [] 279 metrics = [] 280 pbar = tqdm(validation_loader, unit="batches", leave=False) 281 282 for batch, (x, y) in enumerate(pbar): 283 outputs = self.module(x) 284 loss = self.loss(outputs, y) 285 if self.metric == "mse": 286 metrics.append(self._mse(outputs, y)) 287 elif self.metric == "accuracy": 288 metrics.append(self._accuracy(outputs, y)) 289 losses.append(loss.item()) 290 if self.metric == "mse": 291 pbar.set_postfix(loss=loss.item(), mse=metrics[-1], batch=batch + 1) 292 elif self.metric == "accuracy": 293 pbar.set_postfix(loss=loss.item(), acc=metrics[-1], batch=batch + 1) 294 else: 295 pbar.set_postfix(loss=loss.item(), batch=batch + 1) 296 297 self.loss_history['validation'].append(np.mean(losses)) 298 return np.mean(losses), np.mean(metrics) 299
[docs] 300 def predict(self, x: Tensor | np.ndarray | pd.DataFrame) -> Tensor: 301 """ scikit-learn like predict method 302 predicts values for examples input examples x. 303 304 Parameters 305 ---------- 306 x: Tensor | np.ndarray | pd.DataFrame 307 The input samples of shape (n_samples, n_features). 308 309 Returns 310 ------- 311 y_pred: Tensor | np.ndarray | pd.DataFrame 312 The predicted values for examples x. 313 314 """ 315 x = self._x_to_tensor(x) 316 self.module.eval() 317 with torch.inference_mode(): 318 result = self.module(x).cpu() 319 return result
320
[docs] 321 def set_params(self, **params): 322 """ scikit-learn like param setting method 323 allows changing parameters of the model set in constructor. 324 325 Parameters 326 ---------- 327 **params: dict 328 Keyword arguments representing the parameters to be set. 329 """ 330 331 def _update_optimizer(): 332 333 if self.optimizer_type == HybridOptimizer: 334 if self.quantum_learning_rate != 'auto' and self.learning_rate != "auto": 335 self.optimizer = self.optimizer_type(self.module, lr_classical=self.learning_rate, 336 lr_quantum=self.quantum_learning_rate) # type: ignore 337 elif self.quantum_learning_rate != 'auto' and self.learning_rate == "auto": 338 self.optimizer = self.optimizer_type(self.module, lr_quantum=self.quantum_learning_rate) # type: ignore 339 elif self.quantum_learning_rate == 'auto' and self.learning_rate != "auto": 340 self.optimizer = self.optimizer_type(self.module, lr_classical=self.learning_rate) # type: ignore 341 else: 342 self.optimizer = self.optimizer_type(self.module) # type: ignore 343 elif self.learning_rate == 'auto': 344 self.optimizer = self.optimizer_type(self.module.parameters()) # type: ignore 345 else: 346 self.optimizer = self.optimizer_type(self.module.parameters(), lr=self.learning_rate) # type: ignore 347 348 if not params: 349 return self 350 valid_params = self.get_params(deep=False) 351 for key, value in params.items(): 352 if key not in valid_params: 353 raise ValueError( 354 f"Invalid parameter {key!r} for estimator {self}. " 355 f"Valid parameters are: {valid_params.keys()!r}." 356 ) 357 if key == "device": 358 self.device = value 359 self.module.to(self.device) 360 elif key == "optimizer_type": 361 self.optimizer_type = value 362 _update_optimizer() 363 elif key == "learning_rate": 364 self.learning_rate = value 365 _update_optimizer() 366 elif key == "quantum_learning_rate": 367 self.quantum_learning_rate = value 368 _update_optimizer() 369 elif key == "module": 370 self.module = value 371 _update_optimizer() 372 return self
373
[docs] 374 def to_torch_module(self) -> nn.Module: 375 """Returns QModel's module with torch neural network. 376 377 Returns: 378 nn.Module: Torch neural network. 379 """ 380 return self.module