Source code for qailab.torch.qmodel

  1""" Module with QModel """
  2from collections.abc import Callable
  3from typing import Literal
  4from sklearn.base import BaseEstimator
  5import torch
  6from torch import Tensor, optim, nn
  7from torch.optim import Optimizer
  8from torch.utils.data import DataLoader, TensorDataset, random_split
  9from tqdm import tqdm
 10import numpy as np
 11import pandas as pd
 12
 13try:
 14    from ptseries.optimizers import HybridOptimizer
 15except ImportError:

[docs]
 16    class HybridOptimizer():
 17        """Dummy HO"""
 18        # pylint: disable=too-few-public-methods
 19
 20        def __init__(
 21            self,
 22            model,
 23            lr_classical=0.01,
 24            lr_quantum=0.01,
 25            optimizer_quantum='SGD',
 26            optimizer_classical='Adam',
 27            betas=(0.9, 0.999),
 28            spsa_resamplings=1,
 29            spsa_gamma_decay=0.101,
 30            spsa_alpha_decay=0.602
 31        ):
 32            pass

 33
 34AVAILABLE_OPTIMIZERS: dict[str, type[Optimizer] | type[HybridOptimizer]] = {opt.__name__.lower(): opt for opt in [
 35    optim.Adam, optim.AdamW, optim.SGD, optim.Adadelta, optim.Adagrad,
 36    optim.Adamax, optim.RMSprop, optim.Rprop, optim.LBFGS, HybridOptimizer]}
 37
 38

[docs]
 39class QModel(BaseEstimator):
 40    """ Quantum model class
 41
 42    Parameters
 43    ----------
 44    module: nn.Module
 45        pytorch Module representing the quantum or classical neural network.
 46    loss: Callable
 47        pytorch loss function to be used during training.
 48    optimizer_type: type[Optimizer] | str, default = "adamw"
 49        pytorch Optimizer class to be used during training.
 50    learning_rate: float | Literal['auto'], default = "auto"
 51        learning rate used by the optimizer, "auto" sets it to optimizer's default one.
 52    quantum_learning_rate: float | Literal['auto'], default = 'auto'
 53        learning rate for quantum layers used by the HybridOptimizer, "auto" sets it to optimizer's default one.
 54    batch_size: int, default = 1
 55        number of training examples in batch.
 56    epochs: int, default = 1
 57        number of epochs to train the model.
 58    validation_fraction: float, default = 0.2
 59       share of the training dataset to be used for validation.
 60    shuffle: bool, default = True
 61        whether to shuffle data every epoch.
 62    device: {"cpu","cuda","mps"}, default="cpu"
 63        the device neural network will be trained on.
 64
 65    Attributes
 66    ----------
 67    optimizer: Optimizer
 68        pytorch optimizer object used during training
 69    loss_history: dict[str,list]
 70        history of loss values from the last fit call. dict contains keys 'training' and 'validation'
 71    """
 72
 73    # pylint: disable=too-many-instance-attributes
 74    # Reasonable amount for model training
 75
 76    module: nn.Module
 77    loss: Callable
 78    optimizer_type: type[Optimizer] | type[HybridOptimizer]
 79    optimizer: Optimizer
 80    learning_rate: float | Literal['auto']
 81    quantum_learning_rate: float | Literal['auto']
 82    batch_size: int
 83    epochs: int
 84    validation_fraction: float
 85    shuffle: bool
 86    device: Literal["cpu", "cuda", "mps"]
 87    metric: Literal["accuracy", "mse"] | None
 88
 89    def __init__(
 90        self,
 91        module: nn.Module,
 92        loss: Callable,
 93        optimizer_type: type[Optimizer] | type[HybridOptimizer] | str = 'adamw',
 94        learning_rate: float | Literal['auto'] = 'auto',
 95        quantum_learning_rate: float | Literal['auto'] = 'auto',
 96        batch_size: int = 1,
 97        epochs: int = 1,
 98        validation_fraction: float = 0.2,
 99        shuffle: bool = True,
100        device: Literal["cpu", "cuda", "mps"] = "cpu",
101        metric: Literal["accuracy", "mse"] | None = None
102    ):
103        super().__init__()
104        self.module = module
105        self.loss = loss
106        if isinstance(optimizer_type, str):
107            if optimizer_type not in AVAILABLE_OPTIMIZERS:
108                raise ValueError(
109                    f"Unknown optimizer: {optimizer_type}. Available optimizers are: {list(AVAILABLE_OPTIMIZERS.keys())}")
110            optimizer_type = AVAILABLE_OPTIMIZERS[optimizer_type]
111        self.optimizer_type = optimizer_type
112        self.learning_rate = learning_rate
113        self.quantum_learning_rate = quantum_learning_rate
114        if self.optimizer_type == HybridOptimizer:
115            if self.quantum_learning_rate != 'auto' and self.learning_rate != "auto":
116                self.optimizer = self.optimizer_type(self.module, lr_classical=self.learning_rate,
117                                                     lr_quantum=self.quantum_learning_rate)  # type: ignore
118            elif self.quantum_learning_rate != 'auto' and self.learning_rate == "auto":
119                self.optimizer = self.optimizer_type(self.module, lr_quantum=self.quantum_learning_rate)  # type: ignore
120            elif self.quantum_learning_rate == 'auto' and self.learning_rate != "auto":
121                self.optimizer = self.optimizer_type(self.module, lr_classical=self.learning_rate)  # type: ignore
122            else:
123                self.optimizer = self.optimizer_type(self.module)  # type: ignore
124        elif self.learning_rate == 'auto':
125            self.optimizer = self.optimizer_type(self.module.parameters())  # type: ignore
126        else:
127            self.optimizer = self.optimizer_type(self.module.parameters(), lr=self.learning_rate)  # type: ignore
128        self.batch_size = batch_size
129        self.epochs = epochs
130        self.validation_fraction = validation_fraction
131        self.shuffle = shuffle
132        self.device = device
133        self.metric = metric
134        self.module.to(device)
135
136        self.loss_history = {
137            'training': [],
138            'validation': []
139        }
140

[docs]
141    def reset_parameters(self) -> None:
142        """ Resets parameters of layers """
143        for layer in self.module.modules():
144            if hasattr(layer, "reset_parameters"):
145                layer.reset_parameters()  # type: ignore

146

[docs]
147    def fit(self, x: Tensor | np.ndarray | pd.DataFrame, y: Tensor | np.ndarray | pd.DataFrame | pd.Series) -> "QModel":
148        """ scikit-learn like fit method
149        trains the neural network based on training set (x,y).
150
151        Parameters
152        ----------
153        x: Tensor | np.ndarray | pd.DataFrame
154            The training input samples of shape (n_samples, n_features).
155        y: Tensor | np.array | pd.DataFrame | pd.Series
156            The training target values of shape (n_samples,) or (n_samples, n_outputs)
157
158        Returns
159        -------
160        self: QModel
161            trained NN model
162        """
163        x, y = self._x_y_to_tensor(x, y)
164        tensor_dataset = TensorDataset(x, y)
165        train_dataset, validation_dataset = random_split(tensor_dataset, [1 - self.validation_fraction, self.validation_fraction])
166        train_loader = DataLoader(train_dataset, batch_size=self.batch_size, shuffle=self.shuffle)
167        validation_loader = DataLoader(validation_dataset, batch_size=self.batch_size, shuffle=self.shuffle)
168        self._train_loop(train_loader, validation_loader, self.epochs)
169        return self

170
171    def _x_to_tensor(self, x: Tensor | np.ndarray | pd.DataFrame) -> Tensor:
172        if isinstance(x, np.ndarray):
173            x = torch.tensor(x, dtype=torch.float32)
174        elif isinstance(x, pd.DataFrame):
175            x = torch.tensor(x.values, dtype=torch.float32)
176        x = x.to(self.device)
177        return x
178
179    def _x_y_to_tensor(
180        self,
181        x: Tensor | np.ndarray | pd.DataFrame,
182        y: Tensor | np.ndarray | pd.DataFrame | pd.Series
183
184    ) -> tuple[Tensor, Tensor]:
185        x = self._x_to_tensor(x)
186        if isinstance(y, np.ndarray):
187            if y.dtype.kind == "i":
188                y = torch.tensor(y, dtype=torch.int64)
189            else:
190                y = torch.tensor(y, dtype=torch.float32)
191        elif isinstance(y, pd.DataFrame):
192            y = torch.tensor(y.values, dtype=torch.float32)
193        elif isinstance(y, pd.Series):
194            if y.dtype == np.dtype('int64'):
195                y = torch.tensor(y.values, dtype=torch.int64)
196            else:
197                y = torch.tensor(y.values, dtype=torch.float32)
198        if x.shape[0] != y.shape[0]:
199            raise ValueError("X and y tensors should have the same first dimension")
200        y = y.to(self.device)
201        return x, y
202

[docs]
203    def fit_predict(self, x: Tensor | np.ndarray | pd.DataFrame, y: Tensor | np.ndarray | pd.DataFrame | pd.Series) -> Tensor:
204        """ scikit-learn like fit_predict method
205        trains the neural network based on training set (x,y) and predicts values for training examples x
206        combines fit and predict methods into one.
207
208        Parameters
209        ----------
210        x: Tensor | np.ndarray | pd.DataFrame
211            The training input samples of shape (n_samples, n_features).
212        y: Tensor | np.array | pd.DataFrame | pd.Series
213            The training target values of shape (n_samples,) or (n_samples, n_outputs).
214
215        Returns
216        -------
217        y_pred: Tensor
218            The predicted values for the training examples x.
219        """
220        self.fit(x, y)
221        return self.predict(x)

222
223    @staticmethod
224    def _accuracy(y_pred, y_gt):
225        if y_gt.dim() == 2:
226            y_gt = torch.argmax(y_gt, dim=1)
227        if y_pred.dim() == 2:
228            y_pred = torch.argmax(y_pred, dim=1)
229        else:
230            y_pred = torch.where(y_pred > 0.5, 1.0, 0.0)
231        return (y_pred.eq(y_gt)).sum().item() / len(y_gt)
232
233    @staticmethod
234    def _mse(y_pred, y_gt):
235        return ((y_pred - y_gt)**2).sum().item() / len(y_gt)
236
237    def _train_loop(self, train_loader: DataLoader, validation_loader: DataLoader, epochs: int):
238        self.loss_history = {
239            'training': [],
240            'validation': []
241        }
242        pbar = tqdm(range(epochs), total=epochs, unit="epochs")
243        for epoch in pbar:
244
245            self.module.train()
246            self._train_one_epoch(train_loader)
247
248            self.module.eval()
249            with torch.inference_mode():
250                valid_loss, valid_metric = self._validate_one_epoch(validation_loader)
251            if self.metric == "mse":
252                pbar.set_postfix(loss=valid_loss, mse=valid_metric, epoch=epoch + 1)
253            elif self.metric == "accuracy":
254                pbar.set_postfix(loss=valid_loss, acc=valid_metric, epoch=epoch + 1)
255            else:
256                pbar.set_postfix(loss=valid_loss, epoch=epoch + 1)
257
258    def _train_one_epoch(self, train_loader: DataLoader) -> tuple[np.floating, np.floating]:
259        losses = []
260        metrics = []
261        pbar = tqdm(train_loader, unit="batches", leave=False)
262
263        for batch, (x, y) in enumerate(pbar):
264            self.optimizer.zero_grad()
265            outputs = self.module(x)
266            loss = self.loss(outputs, y)
267            if self.metric == "mse":
268                metrics.append(self._mse(outputs, y))
269            elif self.metric == "accuracy":
270                metrics.append(self._accuracy(outputs, y))
271            loss.backward()
272            self.optimizer.step()
273            losses.append(loss.item())
274            if self.metric == "mse":
275                pbar.set_postfix(loss=loss.item(), mse=metrics[-1], batch=batch + 1)
276            elif self.metric == "accuracy":
277                pbar.set_postfix(loss=loss.item(), acc=metrics[-1], batch=batch + 1)
278            else:
279                pbar.set_postfix(loss=loss.item(), batch=batch + 1)
280
281        self.loss_history['training'].append(np.mean(losses))
282        return np.mean(losses), np.mean(metrics)
283
284    def _validate_one_epoch(self, validation_loader: DataLoader) -> tuple[np.floating, np.floating]:
285        losses = []
286        metrics = []
287        pbar = tqdm(validation_loader, unit="batches", leave=False)
288
289        for batch, (x, y) in enumerate(pbar):
290            outputs = self.module(x)
291            loss = self.loss(outputs, y)
292            if self.metric == "mse":
293                metrics.append(self._mse(outputs, y))
294            elif self.metric == "accuracy":
295                metrics.append(self._accuracy(outputs, y))
296            losses.append(loss.item())
297            if self.metric == "mse":
298                pbar.set_postfix(loss=loss.item(), mse=metrics[-1], batch=batch + 1)
299            elif self.metric == "accuracy":
300                pbar.set_postfix(loss=loss.item(), acc=metrics[-1], batch=batch + 1)
301            else:
302                pbar.set_postfix(loss=loss.item(), batch=batch + 1)
303
304        self.loss_history['validation'].append(np.mean(losses))
305        return np.mean(losses), np.mean(metrics)
306

[docs]
307    def predict(self, x: Tensor | np.ndarray | pd.DataFrame) -> Tensor:
308        """ scikit-learn like predict method
309        predicts values for examples input examples x.
310
311        Parameters
312        ----------
313        x: Tensor | np.ndarray | pd.DataFrame
314           The input samples of shape (n_samples, n_features).
315
316        Returns
317        -------
318        y_pred: Tensor | np.ndarray | pd.DataFrame
319            The predicted values for examples x.
320
321        """
322        x = self._x_to_tensor(x)
323        self.module.eval()
324        with torch.inference_mode():
325            result = self.module(x).cpu()
326        return result

327

[docs]
328    def set_params(self, **params):
329        """ scikit-learn like param setting method
330        allows changing parameters of the model set in constructor.
331
332        Parameters
333        ----------
334        **params: dict
335            Keyword arguments representing the parameters to be set.
336        """
337
338        def _update_optimizer():
339
340            if self.optimizer_type == HybridOptimizer:
341                if self.quantum_learning_rate != 'auto' and self.learning_rate != "auto":
342                    self.optimizer = self.optimizer_type(self.module, lr_classical=self.learning_rate,
343                                                         lr_quantum=self.quantum_learning_rate)  # type: ignore
344                elif self.quantum_learning_rate != 'auto' and self.learning_rate == "auto":
345                    self.optimizer = self.optimizer_type(self.module, lr_quantum=self.quantum_learning_rate)  # type: ignore
346                elif self.quantum_learning_rate == 'auto' and self.learning_rate != "auto":
347                    self.optimizer = self.optimizer_type(self.module, lr_classical=self.learning_rate)  # type: ignore
348                else:
349                    self.optimizer = self.optimizer_type(self.module)  # type: ignore
350            elif self.learning_rate == 'auto':
351                self.optimizer = self.optimizer_type(self.module.parameters())  # type: ignore
352            else:
353                self.optimizer = self.optimizer_type(self.module.parameters(), lr=self.learning_rate)  # type: ignore
354
355        if not params:
356            return self
357        valid_params = self.get_params(deep=False)
358        for key, value in params.items():
359            if key not in valid_params:
360                raise ValueError(
361                    f"Invalid parameter {key!r} for estimator {self}. "
362                    f"Valid parameters are: {valid_params.keys()!r}."
363                )
364            if key == "device":
365                self.device = value
366                self.module.to(self.device)
367            elif key == "optimizer_type":
368                self.optimizer_type = value
369                _update_optimizer()
370            elif key == "learning_rate":
371                self.learning_rate = value
372                _update_optimizer()
373            elif key == "quantum_learning_rate":
374                self.quantum_learning_rate = value
375                _update_optimizer()
376            elif key == "module":
377                self.module = value
378                _update_optimizer()
379        return self

380

[docs]
381    def to_torch_module(self) -> nn.Module:
382        """Returns QModel's module with torch neural network.
383
384        Returns:
385            nn.Module: Torch neural network.
386        """
387        return self.module