-
Notifications
You must be signed in to change notification settings - Fork 15
/
graph.R
221 lines (173 loc) · 7.4 KB
/
graph.R
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
#' Build tree graph
#'
#' Build a tree graph from a set of clusterings, metadata and associated
#' aesthetics
#'
#' @param clusterings numeric matrix containing clustering information, each
#' column contains clustering at a separate resolution
#' @param metadata data.frame containing metadata on each sample that can be
#' used as node aesthetics
#' @param prefix string indicating columns containing clustering information
#' @param count_filter count threshold for filtering edges in the clustering
#' graph
#' @param prop_filter in proportion threshold for filtering edges in the
#' clustering graph
#' @param node_aes_list nested list containing node aesthetics
#'
#' @return [tidygraph::tbl_graph] object containing the tree graph
#'
#' @importFrom dplyr %>%
#' @importFrom rlang .data
build_tree_graph <- function(clusterings, prefix, count_filter, prop_filter,
metadata, node_aes_list) {
nodes <- get_tree_nodes(clusterings, prefix, metadata, node_aes_list)
edges <- get_tree_edges(clusterings, prefix) %>%
dplyr::filter(.data$count > count_filter) %>%
dplyr::filter(.data$in_prop > prop_filter)
graph <- tidygraph::tbl_graph(nodes = nodes, edges = edges)
# Convert resolution to factor
igraph::vertex_attr(graph)[[prefix]] <-
factor(as.numeric(igraph::vertex_attr(graph)[[prefix]]))
# Convert cluster to factor, check numeric so order is correct
numeric_clusters <- suppressWarnings(
all(!is.na(as.numeric(igraph::vertex_attr(graph)[["cluster"]])))
)
if (numeric_clusters) {
igraph::vertex_attr(graph)[["cluster"]] <-
as.numeric(igraph::vertex_attr(graph)[["cluster"]])
}
igraph::vertex_attr(graph)[["cluster"]] <-
factor(igraph::vertex_attr(graph)[["cluster"]])
graph <- store_node_aes(graph, node_aes_list, metadata)
return(graph)
}
#' Get tree nodes
#'
#' Extract the nodes from a set of clusterings and add relevant attributes
#'
#' @param clusterings numeric matrix containing clustering information, each
#' column contains clustering at a separate resolution
#' @param metadata data.frame containing metadata on each sample that can be
#' used as node aesthetics
#' @param prefix string indicating columns containing clustering information
#' @param node_aes_list nested list containing node aesthetics
#'
#' @return data.frame containing node information
get_tree_nodes <- function(clusterings, prefix, metadata, node_aes_list) {
nodes <- lapply(colnames(clusterings), function(res) {
clustering <- clusterings[, res]
clusters <- sort(unique(clustering))
node <- lapply(clusters, function(cluster) {
is_cluster <- clustering == cluster
size <- sum(is_cluster)
res_clean <- as.numeric(gsub(prefix, "", res))
node_name <- paste0(prefix, res_clean, "C", cluster)
node_data <- list(node_name, res_clean, cluster, size)
names(node_data) <- c("node", prefix, "cluster", "size")
for (aes in node_aes_list) {
node_data <- aggr_metadata(node_data, aes[[1]], aes[[2]],
metadata, is_cluster)
}
node_data <- data.frame(node_data, stringsAsFactors = FALSE)
return(node_data)
})
node <- do.call("rbind", node)
})
nodes <- do.call("rbind", nodes)
stabilities <- calc_sc3_stability(clusterings)
nodes$sc3_stability <- as.numeric(stabilities[, 3])
return(nodes)
}
#' Get tree edges
#'
#' Extract the edges from a set of clusterings
#'
#' @param clusterings numeric matrix containing clustering information, each
#' column contains clustering at a separate resolution
#' @param prefix string indicating columns containing clustering information
#'
#' @return data.frame containing edge information
#'
#' @importFrom dplyr %>%
#' @importFrom rlang .data :=
get_tree_edges <- function(clusterings, prefix) {
res_values <- colnames(clusterings)
edges <- lapply(seq_len(ncol(clusterings) - 1), function(idx) {
from_res <- res_values[idx]
to_res <- res_values[idx + 1]
from_clusters <- sort(unique(clusterings[, from_res]))
to_clusters <- sort(unique(clusterings[, to_res]))
from_tos <- expand.grid(from_clust = from_clusters,
to_clust = to_clusters,
stringsAsFactors = FALSE)
transitions <- apply(from_tos, 1, function(from_to) {
from_clust <- from_to[1]
to_clust <- from_to[2]
is_from <- clusterings[, from_res] == from_clust
is_to <- clusterings[, to_res] == to_clust
trans_count <- sum(is_from & is_to)
to_size <- sum(is_to)
in_prop <- trans_count / to_size
return(c(trans_count, in_prop))
})
from_tos$from_res <- as.numeric(gsub(prefix, "", from_res))
from_tos$to_res <- as.numeric(gsub(prefix, "", to_res))
from_tos$count <- transitions[1, ]
from_tos$in_prop <- transitions[2, ]
return(from_tos)
})
edges <- dplyr::bind_rows(edges) %>%
dplyr::mutate(from_node = paste0(prefix, .data$from_res,
"C", .data$from_clust)) %>%
dplyr::mutate(to_node = paste0(prefix, .data$to_res,
"C", .data$to_clust)) %>%
dplyr::select(.data$from_node, .data$to_node, dplyr::everything()) %>%
dplyr::rename(!!as.name(paste0("from_", prefix)) := .data$from_res,
!!as.name(paste0("to_", prefix)) := .data$to_res)
return(edges)
}
#' Aggregate metadata
#'
#' Aggregate a metadata column to get a summarized value for a cluster node
#'
#' @param node_data data.frame containing information about a set of cluster
#' nodes
#' @param col_name the name of the metadata column to aggregate
#' @param col_aggr string naming a function used to aggregate the column
#' @param metadata data.frame providing metadata on samples
#' @param is_cluster logical vector indicating which rows of metadata are in the
#' node to be summarized
#'
#' @return data.frame with aggregated data
aggr_metadata <- function(node_data, col_name, col_aggr, metadata,
is_cluster) {
if (col_name %in% colnames(metadata)) {
clust_meta <- metadata[is_cluster, col_name]
col_aggr_fun <- match.fun(col_aggr)
aggr_col_name <- paste0(col_aggr, "_", col_name)
node_data[aggr_col_name] <- col_aggr_fun(clust_meta)
}
return(node_data)
}
#' Store node aesthetics
#'
#' Store the names of node attributes to use as aesthetics as graph attributes
#'
#' @param graph graph to store attributes in
#' @param node_aes_list nested list containing node aesthetics
#' @param metadata data.frame containing metadata that can be used as aesthetics
#'
#' @return graph with additional attributes
store_node_aes <- function(graph, node_aes_list, metadata) {
for (node_aes_name in names(node_aes_list)) {
node_aes <- node_aes_list[[node_aes_name]]$value
node_aes_value <- node_aes
node_aes_aggr <- node_aes_list[[node_aes_name]]$aggr
if (node_aes %in% colnames(metadata)) {
node_aes_value <- paste0(node_aes_aggr, "_", node_aes)
}
graph <- igraph::set_graph_attr(graph, paste0("node_", node_aes_name),
node_aes_value)
}
return(graph)
}