Data Analyst • Business Intelligence Expert • Research Scientist
This project demostrates how to build and train a simple neural network using the torch package.
The torch package brings deep learning to R by providing bindings to the popular PyTorch library. This comprehensive project demonstrates how to build and train a simple neural network using torch in R.
#installation
#install.packages("torch")
library(torch)
library(ggplot2)
#torch::install_torch()
#set seed for reproducibility
set.seed(42)
x<- torch_randn(100, 1)
y<- 3 * x + 2 + torch_randn(100, 1) * 0.3
#display the first few data points
head(data.frame(
x = as.numeric(x$squeeze()),
y = as.numeric(y$squeeze())
))
## x y
## 1 1.5389941 6.48198080
## 2 0.5420342 3.83502460
## 3 -0.2955945 0.52947181
## 4 -0.6785704 0.09556369
## 5 -0.5023355 0.29653335
## 6 2.5532901 10.54154968
Now we define the neural network architechture using torch’s module system:
nnet <- nn_module (
initialize = function() {
#define layers
self$layer1 <- nn_linear(1, 8)
self$layer2 <- nn_linear (8, 1)
},
forward = function(x){
#define forward pass
x %>%
self$layer1() %>% #first linear transfromation
nnf_relu() %>% #ReLU activation function
self$layer2() #second linear transformation
},
)
#instantiate the model
model<- nnet()
#dsiplay model structure
print(model)
## An `nn_module` containing 25 parameters.
##
## ── Modules ─────────────────────────────────────────────────────────────────────
## • layer1: <nn_linear> #16 parameters
## • layer2: <nn_linear> #9 parameters
The training process requires defining how the model will learn from the data:
we set up an adam optimizer with learning rate at 0.02
optimizer <- optim_adam(model$parameters, lr=0.02)
#define loss function (MSE)
loss_fn <- nnf_mse_loss
store loss values for plotting
loss_history <- numeric(300)
#Training loop
for (epoch in 1:300) {
#set model to trainig mode
model$train()
#reset gradients
optimizer$zero_grad()
#forward pass
y_pred <- model(x)
#calculate loss
loss <- loss_fn(y_pred, y)
#backward pass
loss$backward()
#update parameters
optimizer$step()
#store loss for plots
loss_history [epoch] <- loss$item()
}
# Visualize the training progress
#create a data frame for plotting
training_df <- data.frame (
epoch = 1:300,
loss = loss_history
)
#plotting training loss
ggplot(training_df, aes(x = epoch, y = loss)) +
geom_line(color = "#2c3e65", size = 1) +
labs(
title = "Trainig Loss over time",
subtitle = "Neural Network Learning Progress",
x = "Epoch",
y = "Mean squared Error Loss"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray60")
)
Trainig loss function
set model evaluation mode
model$eval()
#generate predictions
with_no_grad({
y_pred <- model(x)
})
#convert to Rvectors for plottin
x_np <- as.numeric(x$squeeze())
y_np <- as.numeric(y$squeeze())
y_pred_np <- as.numeric(y_pred$squeeze())
#create dataframe for ggplot
plot_df <- data.frame(
x = x_np,
y_actual = y_np,
y_predicted = y_pred_np
)
#create the plot
ggplot(plot_df, aes(x=x)) +
geom_point(aes(y = y_actual, color = "Actual"), alpha = 0.7, size = 2) +
geom_point(aes(y = y_predicted, color = "predicted", alpha = 0.7, size = 1)) +
geom_smooth(aes(y = y_predicted), method = "loess", se = FALSE, color = "#e88c3c", linetype = "dashed") +
labs(
title = "Neural Network Regression Results",
subtitle = "Comparing actual vs predicted values",
x = "Input (x)",
y = "Output (y)",
color = "Data type"
) +
scale_color_manual(
values = c("Actual" = "#3658dc", "predicted" = "#e48c3c")) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray25"),
legend.position = "top"
)
neural network plot
mse <- mean((y_pred_np - y_np)^2)
rmse <- sqrt(mse)
mae <- mean(abs(y_pred_np - y_np))
r_squared <- cor(y_pred_np, y_np) ^2
#Create performance metrics
performance_summary <- data.frame(
metric = c("Mean Squared Error", "Root Mean Squared Error", "Mean Absolute Error", "R-Squared"),
value = c(mse, rmse, mae, r_squared)
)
print(performance_summary)
## metric value
## 1 Mean Squared Error 0.09719032
## 2 Root Mean Squared Error 0.31175362
## 3 Mean Absolute Error 0.23808641
## 4 R-Squared 0.98831562
Generate predictions on a grid for comparison
x_grid <- torch_linspace(-3, 3, 100)$unsqueeze(2)
with_no_grad({
y_grid_pred <- model(x_grid)
})
x_grid_np <- as.numeric(x_grid$squeeze())
y_grid_pred_np <- as.numeric(y_grid_pred$squeeze())
y_grid_true <- 3 * x_grid_np + 2
# Plot comparison
comparison_df <- data.frame(
x = x_grid_np,
y_true = y_grid_true,
y_predicted = y_grid_pred_np
)
ggplot(comparison_df, aes(x = x)) +
geom_line(aes(y = y_true, color = "True Function"), size = 2) +
geom_line(aes(y = y_predicted, color = "Neural Network"), size = 2, linetype = "dashed") +
geom_point(data = plot_df, aes(y = y_actual), alpha = 0.3, color = "gray50") + labs(
title = "Neural Network vs True Function",
subtitle = "Model learning assessment against the underlying pattern",
x = "Input (x)",
y = "Output (y)",
color = "Function Type"
) +
scale_color_manual(values = c("True Function" = "#2c3e50", "Neural Network" = "#e74c3c")) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray60"),
legend.position = "top"
)
plot comparison
The part reveals what the network learned by analyzing its parameters:
# Extract learned parameters
layer1_weight <- as.matrix(model$layer1$weight$detach())
layer1_bias <- as.numeric(model$layer1$bias$detach())
layer2_weight <- as.matrix(model$layer2$weight$detach())
layer2_bias <- as.numeric(model$layer2$bias$detach())
cat("First layer (fc1) parameters:\n")
layers display First layer (fc1) parameters:
cat("Weight matrix shape:", dim(layer1_weight), "\n")
layers display Weight matrix shape: 8 1
cat("Bias vector length:", length(layer1_bias), "\n\n")
layers display Bias vector length: 8
cat("Second layer (fc2) parameters:\n")
layers display Second layer (fc2) parameters:
cat("Weight matrix shape:", dim(layer2_weight), "\n")
layers display Weight matrix shape: 1 8
cat("Bias value:", layer2_bias, "\n\n")
layers display Bias value: 0.6431097
cat("First layer weights:\n")
## First layer weights:
print(round(layer1_weight, 4))
## [,1]
## [1,] 1.6058
## [2,] 0.4265
## [3,] 1.2185
## [4,] -0.8082
## [5,] -1.6228
## [6,] 0.1594
## [7,] -0.0204
## [8,] 0.3282
cat("\nFirst layer biases:\n")
##
## First layer biases:
print(round(layer2_bias, 4))
## [1] 0.6431
Defining different network architectures
create_network <- function(hidden_sizes) {
nn_module(
initialize = function() {
self$layers <- nn_module_list()
# Input layer
prev_size <- 1
for(i in seq_along(hidden_sizes)) {
self$layers$append(nn_linear(prev_size, hidden_sizes[i]))
prev_size <- hidden_sizes[i]
}
# Output layer
self$layers$append(nn_linear(prev_size, 1))
},
forward = function(x) {
for(i in 1:(length(self$layers) - 1)) {
x <- nnf_relu(self$layers[[i]](x))
}
# No activation on output layer
self$layers[[length(self$layers)]](x)
}
)
}
# Train different architectures
architectures <- list(
"Simple (8)" = c(8),
"Deep (16-8)" = c(16, 8),
"Wide (32)" = c(32),
"Very Deep (16-16-8)" = c(16, 16, 8)
)
results <- list()
for(arch_name in names(architectures)) {
# Create and train model
net_class <- create_network(architectures[[arch_name]])
model_temp <- net_class()
optimizer_temp <- optim_adam(model_temp$parameters, lr = 0.01)
# Quick training (fewer epochs for comparison)
for(epoch in 1:200) {
model_temp$train()
optimizer_temp$zero_grad()
y_pred_temp <- model_temp(x)
loss_temp <- loss_fn(y_pred_temp, y)
loss_temp$backward()
optimizer_temp$step()
}
# Generate predictions
model_temp$eval()
with_no_grad({
y_pred_arch <- model_temp(x_grid)
})
results[[arch_name]] <- data.frame(
x = x_grid_np,
y_pred = as.numeric(y_pred_arch$squeeze()),
architecture = arch_name
)
}
# Combine results
all_results <- do.call(rbind, results)
# Plot comparison
ggplot(all_results, aes(x = x, y = y_pred, color = architecture)) +
geom_line(size = 1.2) +
geom_line(data = comparison_df, aes(y = y_true, color = "True Function"),
size = 2, linetype = "solid") +
geom_point(data = plot_df, aes(x = x, y = y_actual),
color = "gray50", alpha = 0.3, inherit.aes = FALSE) + labs(
title = "Comparison of Different Neural Network Architectures",
subtitle = "Effects of network depth and width on learning performance",
x = "Input (x)",
y = "Output (y)",
color = "Architecture"
) +
theme_minimal() +
theme(
plot.title = element_text(size = 14, face = "bold"),
plot.subtitle = element_text(size = 12, color = "gray60"),
legend.position = "top"
)
comapring plots
#Key Takeaways
Simple Architecture: Even a simple 2-layer network can learn complex patterns effectively Training Process: The importance of proper training loops with gradient computation Visualization: Effective methods for visualizing both training progress and results Model Evaluation: Understanding model performance through multiple metrics *Architecture Comparison: How different network structures affect learning capabilities
The torch package provides a straightforward approach to building and experimenting with neural networks in R, bringing the power of deep learning to the R ecosystem. This approach can be extended to more complex datasets and deeper architectures as needed.