---
title: "Frequency-based tna"
output: rmarkdown::html_vignette
vignette: >
  %\VignetteIndexEntry{Frequency-based tna}
  %\VignetteEngine{knitr::rmarkdown}
  %\VignetteEncoding{UTF-8}
---

```{r, include = FALSE}
knitr::opts_chunk$set(
  collapse = TRUE,
  fig.width = 6,
  fig.height = 4,
  cache = TRUE,
  dev = "svg",
  fig.ext="svg",
  out.width = "100%",
  dpi = 100,
  comment = "#>"
)
suppressPackageStartupMessages({
  library("tna")
  library("tibble")
  library("dplyr")
  library("ggplot2")
})

 
options(scipen = 99)
options(digits = 2)
options(max.print = 30)
options(width = 83)
```


# FTNA tutorial

```{r}
# Install 'tna' package from CRAN if needed (uncomment if required).
# install.packages("tna")

# Load packages
library("tna")

# Load example data provided within the 'tna' package, 
# representing group regulatory interactions
data(group_regulation)

# Run FTNA on 'group_regulation' data using raw counts of 
# transitions ("absolute" type) and print the result
model <- ftna(group_regulation)

# Print the output to inspect the model
print(model)
```

```{r, fig.align='center'}
# Calculate the Transition Network Analysis (TNA) on the group_regulation 
# data with scaled weights between 0 and 1
model_scaled <- ftna(group_regulation, scaling = "minmax")
print(model_scaled) # Print the FTNA model with scaled weights
```

## Plotting

```{r, fig.align='center'}
# Plotting the two weights together to see if the scaling distorts the data

# Combine weights from absolute and scaled models into a data frame for plotting
weights_data <- data.frame(
  Absolute = as.vector(model$weights), # Extract absolute weights as a vector
  Scaled = as.vector(model_scaled$weights) # Extract scaled weights as a vector
)
corr <- cor(weights_data$Absolute, weights_data$Scaled, method = c("pearson")) |>
  round(digits = 2)

# Create a scatter plot comparing absolute vs. scaled weights
plot_abs_scaled <- ggplot(weights_data, aes(x = Absolute, y = Scaled)) +
  geom_point(color = "steelblue", alpha = 0.6, size = 2) +  # Add points with specified aesthetics
  geom_smooth(formula = y ~ x, method = "lm", color = "red", linetype = "dashed") + # Add a linear trend line
  # geom_smooth(method = lm, formula = y ~ x, se = FALSE) +
  geom_text(x = 0.1, y = 0.9, label = paste0('r = ', corr), color = 'red')
  # stat_cor(aes(label = after_stat(r.label)), label.x = 0.1, label.y = 0.9, 
  #          size = 4, color = "black", method = "spearman") + # Display Spearman correlation
  labs(x = "Absolute Weights", y = "Scaled Weights") + # Label axes
  theme_minimal() # Apply a minimal theme for the plot

# Display the scatter plot
plot_abs_scaled
```


```{r, fig.align='center', results = FALSE, message = FALSE}
# Calculate the Transition Network Analysis (TNA) on the `group_regulation` 
# data with ranked weights
model_ranked <- ftna(group_regulation, scaling = "rank")
print(model_ranked) # Print the FTNA model with ranked weights
```


```{r, fig.align='center'}
# Combine weights from absolute and ranked models into a data frame for plotting
weights_data <- data.frame(
  Absolute = as.vector(model$weights), # Extract absolute weights as a vector
  Ranked = as.vector(model_ranked$weights) # Extract ranked weights as a vector
)

# Create a scatter plot comparing Absolute vs. Ranked weights with correlation annotations
plot_abs_ranked <- ggplot(weights_data, aes(x = Absolute, y = Ranked)) +
  geom_point(color = "steelblue", alpha = 0.6, size = 2) +  # Add points with specified aesthetics
  geom_smooth(formula = y ~ x, method = "lm", color = "red", linetype = "dashed") +  # Add a linear trend line
  # stat_cor(aes(label = paste("Spearman: ", round(after_stat(r), 2))), 
  #          method = "spearman", label.x = 0.1, label.y = 0.9, size = 4, color = "black") + # Spearman correlation annotation
  # stat_cor(aes(label = paste("Pearson: ", round(after_stat(r), 2))), 
  #          method = "pearson", label.x = 0.1, label.y = 0.8, size = 4, color = "darkgreen") + # Pearson correlation annotation
  labs(x = "Absolute Weights", y = "Ranked Weights") + # Label axes
  theme_minimal() # Apply a minimal theme for the plot

# Display the scatter plot
plot_abs_ranked
```


## Pruning

```{r, fig.show='hold', fig.width=9, fig.height=9}
layout(matrix(1:4, ncol = 2))
# Pruning with different methods
pruned_threshold <- prune(model_scaled, method = "threshold", threshold = 0.1)
pruned_lowest <- prune(model_scaled, method = "lowest", lowest = 0.15)
pruned_disparity <- prune(model_scaled, method = "disparity", alpha = 0.5)

# Plotting for comparison
plot(pruned_threshold)
plot(pruned_lowest)
plot(pruned_disparity)
plot(model_scaled, minimum = 0.05, cut = 0.1)
```

## Patterns

```{r, fig.align='center', fig.width=3, fig.height=2}
# Identify 2-cliques (dyads) from the FTNA model with a weight threshold, 
# excluding loops in visualization.
# A 2-clique represents a pair of nodes that are strongly connected based on 
# the specified weight threshold.
layout(matrix(1:6, ncol = 3))
cliques_of_two <- cliques(
  model_scaled,      # The FTNA model with scaled edge weights
  size = 2,          # Looking for pairs of connected nodes (dyads)
  threshold = 0.1    # Only include edges with weights greater than 0.1
)

# Print and visualize the identified 2-cliques (dyads)
print(cliques_of_two)  # Display details of 2-cliques
plot(cliques_of_two, ask = F, vsize = 20)   # Visualize 2-cliques in the network
```
```{r, fig.align='center'}
layout(matrix(1:6, ncol = 3))
# Identify 3-cliques (triads) from the FTNA model.
# A 3-clique is a fully connected set of three nodes, indicating a strong 
# triplet structure.
cliques_of_three <- cliques(
  model_scaled,      # The FTNA model with scaled edge weights
  size = 3,          # Looking for triplets of fully connected nodes (triads)
  threshold = 0.05   # Only include edges with weights greater than 0.05
)

# Print and visualize the identified 3-cliques (triads)
# Uncomment the code below to view the results
print(cliques_of_three) # Display details of 3-cliques
plot(cliques_of_three, ask = F)  # Visualize 3-cliques in the network
```
```{r, fig.align='center', fig.width=6, fig.height=4}
layout(matrix(1:6, ncol = 3))
# Identify 4-cliques (quadruples) from the FTNA model.
# A 4-clique includes four nodes where each node is connected to every other 
# node in the group.
# Uncomment the code below to view the results
cliques_of_four <- cliques(
  model_scaled,      # The FTNA model with scaled edge weights
  size = 4,          # Looking for quadruples of fully connected nodes (4-cliques)
  threshold = 0.03   # Only include edges with weights greater than 0.03
)

# Print and visualize the identified 4-cliques (quadruples) 
# Uncomment the code below to view the results
print(cliques_of_four)  # Display details of 4-cliques
plot(cliques_of_four, ask = F)   # Visualize 4-cliques in the network
```
```{r, fig.align='center'}
# Identify 5-cliques (quintuples) from the FTNA model, summing edge weights.
# Here, the sum of edge weights in both directions must meet the specified 
# threshold for inclusion.
# Uncomment the code below to view the results
cliques_of_five <- cliques(
  model_scaled,      # The FTNA model with scaled edge weights
  size = 5,          # Looking for quintuples of fully connected nodes (5-cliques)
  threshold = 0.1,   # Only edges with total bidirectional weights greater than 0.1
  sum_weights = TRUE # Sum edge weight in both directions when computing  threshold
)

# Print and visualize the identified 5-cliques (quintuples)
print(cliques_of_five)  # Display details of 5-cliques
plot(cliques_of_five, ask = F)   # Visualize 5-cliques in the network
```

## Graph level measures

```{r}
summary(model_scaled)
```

```{r}
summary(pruned_disparity)
```


## Node level measures

```{r, fig.width=7, fig.height=7}
# Compute centrality measures for the FTNA model
centrality_measures <- centralities(model_scaled)

# Print the calculated centrality measures in the FTNA model
print(centrality_measures)
plot(centrality_measures)
```

```{r}
# Convert the FTNA model to an igraph object and 
# calculate HITS (Hub and Authority) scores
hits_results <- igraph::hits_scores(as.igraph(model_scaled))

# Extract the hub and authority scores from the HITS results for further analysis
hub_scores <- hits_results$hub
authority_scores <- hits_results$authority
```

```{r}
# Print the hub and authority scores to view influential nodes
print(hub_scores)
print(authority_scores)
```


## Edge level measures

```{r, fig.align='center', fig.width=5, fig.height=5}
edge_between <- betweenness_network(model_scaled)
plot(edge_between)
```

## Community detection

```{r, fig.align='center', fig.width=5, fig.height=5}
detected_communities <- communities(model_scaled)
plot(detected_communities, minimum = 0.05)
print(detected_communities)
```

## Bootstrapping

```{r}
# Perform bootstrapping on the FTNA model with a fixed seed for reproducibility
set.seed(265)
boot <- bootstrap(model_scaled, threshold = 0.05)

# Print the combined results data frame containing
print(summary(boot))

# View non-significant edges  which are less likely to be stable across bootstrap samples
print(boot, type = "nonsig")
```

## Comparing Models

```{r, fig.align='center', fig.height=5, fig.width=5, layout = c(1,1)}
# Create FTNA for the high-achievers subset (rows 1 to 1000)
Hi <- ftna(group_regulation[1:1000, ], scaling = "minmax")

# Create FTNA for the low-achievers subset (rows 1001 to 2000)
Lo <- ftna(group_regulation[1001:2000, ], scaling = "minmax")

# Plot a comparison of the "Hi" and "Lo" models
# The 'minimum' parameter is set to 0.001, so edges with weights >= 0.001 are shown
plot_compare(Hi, Lo, minimum = 0.0001)

# Run a permutation test to determine statistical significance of 
# differences between "Hi" and "Lo"
# The 'it' parameter is set to 1000, meaning 1000 permutations are performed
Permutation <- permutation_test(Hi, Lo, it = 1000)

# Plot the significant differences identified in the permutation test
plot(Permutation, minimum = 0.01)
```

## Centrality stability

```{r, fig.width=5, fig.height=3}
Centrality_stability <- estimate_centrality_stability(model_scaled, detailed = FALSE, iter = 100)
plot(Centrality_stability)
```