-from sklearn.datasets import load_wine
-from sklearn.model_selection import train_test_split
-import matplotlib.pyplot as plt
-import numpy as np
-import torch
-
-
-class Model(torch.nn.Module):
- def __init__(self, n_features: int, n_classes: int) -> None:
- super(Model, self).__init__()
- self.fc1 = torch.nn.Linear(n_features, n_features * 4)
- self.fc2 = torch.nn.Linear(n_features * 4, n_classes)
- self.act = torch.nn.Tanh()
-
- def forward(self, x: torch.Tensor) -> torch.Tensor:
- x = self.fc1(x)
- x = self.act(x)
- return self.fc2(x)
-
-
-def get_grad_dist(model: torch.nn.Module) -> None:
- grads = []
- for param in model.parameters():
- if param.grad is not None:
- grads += param.grad.view(-1).tolist()
- return grads
-
-
-def main():
- data = load_wine()
- X = torch.Tensor(data.data)
- y = torch.LongTensor(data.target)
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1337)
- model = Model(len(X[0]), 3)
- optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)
- criterion = torch.nn.CrossEntropyLoss()
- grads = []
- for epoch in range(0, 5):
- model.train()
- epoch_loss = 0.0
- for X, y in zip(X_train, y_train):
- optimizer.zero_grad()
- y_hat = model(X)
- loss = criterion(y_hat, y)
- epoch_loss += loss.item()
- loss.backward()
- optimizer.step()
- grads += get_grad_dist(model)
-
- print(f"Loss: {epoch_loss / len(X_train)}")
-
- grads_abs = np.abs(np.array(grads))
- grads_abs = grads_abs[grads_abs > 0]
- print(f"Min magnitude: {min(grads_abs)}, Max: {max(grads_abs)}, Mean: {np.mean(grads_abs)}")
- log_min = np.log10(grads_abs.min())
- log_max = np.log10(grads_abs.max())
- bins = np.logspace(log_min, log_max, 50)
- counts, bin_edges = np.histogram(grads_abs, bins=bins)
- #bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2
- bin_centers = np.sqrt(bin_edges[:-1] * bin_edges[1:])
- bin_widths = np.diff(bin_edges)
- probability_density = counts / (np.sum(counts) * bin_widths)
- fig, ax = plt.subplots(figsize=(12, 6))
- ax.loglog(bin_centers, probability_density, linewidth=3)
- ax.set_xlabel('Gradient Magnitude', fontsize=12)
- ax.set_ylabel('Probability density', fontsize=12)
- ax.set_title('Distribution of Gradient Magnitudes', fontsize=14)
- ax.grid(True, which='both', alpha=0.3)
- plt.tight_layout()
- integral = np.sum(probability_density * bin_widths)
- print(f"Integral of PDF: {integral}") # Should be close to 1
- plt.show()
-
-if __name__ == "__main__":
- main()