CRAN Package Check Results for Package seqtrie

Last updated on 2025-03-01 06:51:23 CET.

Flavor	Version	T_install	T_check	T_total	Status
r-devel-linux-x86_64-debian-clang	0.2.8	45.39	407.37	452.76	ERROR
r-devel-linux-x86_64-debian-gcc	0.2.8	36.29	465.70	501.99	ERROR
r-devel-linux-x86_64-fedora-clang	0.2.8			761.06	ERROR
r-devel-linux-x86_64-fedora-gcc	0.2.8			1078.52	ERROR
r-devel-macos-arm64	0.2.8			457.00	NOTE
r-devel-macos-x86_64	0.2.8			677.00	NOTE
r-devel-windows-x86_64	0.2.8	62.00	531.00	593.00	NOTE
r-patched-linux-x86_64	0.2.8	51.06	1189.44	1240.50	NOTE
r-release-linux-x86_64	0.2.8	47.99	1132.98	1180.97	NOTE
r-release-macos-arm64	0.2.8			275.00	NOTE
r-release-macos-x86_64	0.2.8			363.00	NOTE
r-release-windows-x86_64	0.2.8	59.00	540.00	599.00	NOTE
r-oldrel-macos-arm64	0.2.8				NOTE
r-oldrel-macos-x86_64	0.2.8			840.00	NOTE
r-oldrel-windows-x86_64	0.2.8	73.00	758.00	831.00	NOTE

Check Details

Version: 0.2.8
Check: compiled code
Result: NOTE File ‘seqtrie/libs/seqtrie.so’: Found non-API call to R: ‘STRING_PTR’ Compiled code should not call non-API entry points in R. See ‘Writing portable packages’ in the ‘Writing R Extensions’ manual, and section ‘Moving into C API compliance’ for issues with the use of non-API entry points. Flavors: r-devel-linux-x86_64-debian-clang, r-devel-linux-x86_64-debian-gcc, r-devel-linux-x86_64-fedora-clang, r-devel-linux-x86_64-fedora-gcc, r-devel-macos-arm64, r-devel-macos-x86_64

Version: 0.2.8
Check: tests
Result: ERROR Running ‘test_RadixForest.R’ [16s/14s] Running ‘test_RadixTree.R’ [268s/179s] Running ‘test_pairwise.R’ [79s/55s] Running the tests in ‘tests/test_pairwise.R’ failed. Complete output: > # This test file tests the `dist_matrix` and `dist_pairwise` functions > # These two functions are simple dynamic programming algorithms for computing pairwise distances and are themselves used to validate > # the RadixTree imeplementation (see test_radix_tree.R) > > if(requireNamespace("seqtrie", quietly=TRUE) && + requireNamespace("stringi", quietly=TRUE) && + requireNamespace("stringdist", quietly=TRUE) && + requireNamespace("Biostrings", quietly=TRUE) && + requireNamespace("dplyr", quietly=TRUE) && + # pwalign is only required for Biostrings >= 2.72.0 in R 4.4+ + ( (packageVersion("Biostrings") < "2.72.0") || requireNamespace("pwalign", quietly=TRUE) ) + ) { + + library(seqtrie) + library(stringi) + library(stringdist) + library(Biostrings) + library(dplyr) + + # Use 2 threads on github actions and CRAN, 4 threads locally + IS_LOCAL <- Sys.getenv("IS_LOCAL") != "" + NTHREADS <- ifelse(IS_LOCAL, 4, 2) + NITER <- ifelse(IS_LOCAL, 4, 1) + NSEQS <- 10000 + MAXSEQLEN <- 200 + CHARSET <- "ACGT" + + random_strings <- function(N, charset = "abcdefghijklmnopqrstuvwxyz") { + charset_stri <- paste0("[", charset, "]") + len <- sample(0:MAXSEQLEN, N, replace=TRUE) + result <- lapply(0:MAXSEQLEN, function(x) { + nx <- sum(len == x) + if(nx == 0) return(character()) + stringi::stri_rand_strings(nx, x, pattern = charset_stri) + }) + sample(unlist(result)) + } + + mutate_strings <- function(x, prob = 0.025, indel_prob = 0.025, charset = "abcdefghijklmnopqrstuvwxyz") { + charset <- unlist(strsplit(charset, "")) + xsplit <- strsplit(x, "") + sapply(xsplit, function(a) { + r <- runif(length(a)) < prob + a[r] <- sample(charset, sum(r), replace=TRUE) + ins <- runif(length(a)) < indel_prob + a[ins] <- paste0(sample(charset, sum(ins), replace=TRUE), sample(charset, sum(ins), replace=TRUE)) + del <- runif(length(a)) < indel_prob + a[del] <- "" + paste0(a, collapse = "") + }) + } + + # Biostrings notes: + # subject (target) must be of length 1 or equal to pattern (query) + # To get a distance matrix, iterate over target and perform a column bind + # special_zero_case -- if both query and target are empty, Biostrings fails with an error + pairwiseAlignmentFix <- function(pattern, subject, ...) { + results <- rep(0, length(subject)) + special_zero_case <- nchar(pattern) == 0 & nchar(subject) == 0 + if(all(special_zero_case)) { + results + } else { + results[!special_zero_case] <- Biostrings::pairwiseAlignment(pattern=pattern[!special_zero_case], subject=subject[!special_zero_case], ...) + results + } + } + + biostrings_matrix_global <- function(query, target, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + lapply(query, function(x) { + query2 <- rep(x, length(target)) + -pairwiseAlignmentFix(pattern=query2, subject=target, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + }) %>% do.call(rbind, .) + } + + biostrings_pairwise_global <- function(query, target, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + -pairwiseAlignment(pattern=query, subject=target, substitutionMatrix = substitutionMatrix,gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + } + + biostrings_matrix_anchored <- function(query, target, query_size, target_size, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + lapply(seq_along(query), function(i) { + query2 <- substring(query[i], 1, query_size[i,,drop=TRUE]) + target2 <- substring(target, 1, target_size[i,,drop=TRUE]) + -pairwiseAlignmentFix(pattern=query2, subject=target2, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + }) %>% do.call(rbind, .) + } + + biostrings_pairwise_anchored <- function(query, target, query_size, target_size, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + query2 <- substring(query, 1, query_size) + target2 <- substring(target, 1, target_size) + -pairwiseAlignmentFix(pattern=query2, subject=target2, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + } + + for(. in 1:NITER) { + + print("Checking hamming search correctness") + local({ + # Note: seqtrie returns `NA_integer_` for hamming distance when the lengths are different + # whereas stringdist returns `Inf` + # This is why we need to replace `NA_integer_` with `Inf` when comparing results + + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "hamming", nthreads=NTHREADS) + results_seqtrie[is.na(results_seqtrie)] <- Inf + results_stringdist <- stringdist::stringdistmatrix(query, target, method = "hamming", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "hamming", nthreads=NTHREADS) + results_seqtrie[is.na(results_seqtrie)] <- Inf + results_stringdist <- stringdist::stringdist(query_pairwise, target, method = "hamming", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking levenshtein search correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", nthreads=NTHREADS) + results_stringdist <- stringdist::stringdistmatrix(query, target, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", nthreads=NTHREADS) + results_stringdist <- stringdist::stringdist(query_pairwise, target, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking anchored search correctness") + local({ + # There is no anchored search in stringdist (or elsewhere). To get the same results, we substring the query and target sequences + # By the results of the seqtrie anchored search and then compare the results + + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "anchored", nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_stringdist <- lapply(seq_along(query), function(i) { + query_size2 <- query_size[i,,drop=TRUE] + target_size2 <- target_size[i,,drop=TRUE] + query2 <- substring(query[i], 1, query_size2) # query[i] is recycled + target2 <- substring(target, 1, target_size2) + stringdist::stringdist(query2, target2, method = "lv", nthread=NTHREADS) + }) %>% do.call(rbind, .) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + query2 <- substring(query_pairwise, 1, query_size) + target2 <- substring(target, 1, target_size) + results_stringdist <- stringdist::stringdist(query2, target2, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking global search with linear gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_matrix_global(query, target, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_pairwise_global(query_pairwise, target, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + print("Checking anchored search with linear gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_matrix_anchored(query, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_pairwise_anchored(query_pairwise, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + + print("Checking global search with affine gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + gap_open_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cos=gap_open_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_matrix_global(query, target, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_pairwise_global(query_pairwise, target, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + print("Checking anchored search with affine gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + gap_open_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_matrix_anchored(query, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_pairwise_anchored(query_pairwise, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + } + + } Loading required package: BiocGenerics Loading required package: generics Attaching package: 'generics' The following objects are masked from 'package:base': as.difftime, as.factor, as.ordered, intersect, is.element, setdiff, setequal, union Attaching package: 'BiocGenerics' The following objects are masked from 'package:stats': IQR, mad, sd, var, xtabs The following objects are masked from 'package:base': Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, get, grep, grepl, is.unsorted, lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind, rownames, sapply, saveRDS, table, tapply, unique, unsplit, which.max, which.min Loading required package: S4Vectors Loading required package: stats4 Attaching package: 'S4Vectors' The following object is masked from 'package:utils': findMatches The following objects are masked from 'package:base': I, expand.grid, unname Loading required package: IRanges Loading required package: XVector Loading required package: GenomeInfoDb Attaching package: 'Biostrings' The following object is masked from 'package:base': strsplit Attaching package: 'dplyr' The following objects are masked from 'package:Biostrings': collapse, intersect, setdiff, setequal, union The following object is masked from 'package:GenomeInfoDb': intersect The following object is masked from 'package:XVector': slice The following objects are masked from 'package:IRanges': collapse, desc, intersect, setdiff, slice, union The following objects are masked from 'package:S4Vectors': first, intersect, rename, setdiff, setequal, union The following objects are masked from 'package:BiocGenerics': combine, intersect, setdiff, setequal, union The following object is masked from 'package:generics': explain The following objects are masked from 'package:stats': filter, lag The following objects are masked from 'package:base': intersect, setdiff, setequal, union [1] "Checking hamming search correctness" [1] "Checking levenshtein search correctness" [1] "Checking anchored search correctness" [1] "Checking global search with linear gap for correctness" Error in h(simpleError(msg, call)) : error in evaluating the argument 'args' in selecting a method for function 'do.call': 'recursive' must be a length-1 vector Calls: <Anonymous> ... mpi.XStringSet.pairwiseAlignment -> XStringSet.pairwiseAlignment -> array -> unlist In addition: Warning message: In .call_fun_in_pwalign("pairwiseAlignment", ...) : pairwiseAlignment() has moved from Biostrings to the pwalign package, and is formally deprecated in Biostrings >= 2.75.1. Please call pwalign::pairwiseAlignment() to get rid of this warning. Execution halted Flavor: r-devel-linux-x86_64-debian-clang

Version: 0.2.8
Check: tests
Result: ERROR Running ‘test_RadixForest.R’ [20s/18s] Running ‘test_RadixTree.R’ [335s/207s] Running ‘test_pairwise.R’ [77s/59s] Running the tests in ‘tests/test_pairwise.R’ failed. Complete output: > # This test file tests the `dist_matrix` and `dist_pairwise` functions > # These two functions are simple dynamic programming algorithms for computing pairwise distances and are themselves used to validate > # the RadixTree imeplementation (see test_radix_tree.R) > > if(requireNamespace("seqtrie", quietly=TRUE) && + requireNamespace("stringi", quietly=TRUE) && + requireNamespace("stringdist", quietly=TRUE) && + requireNamespace("Biostrings", quietly=TRUE) && + requireNamespace("dplyr", quietly=TRUE) && + # pwalign is only required for Biostrings >= 2.72.0 in R 4.4+ + ( (packageVersion("Biostrings") < "2.72.0") || requireNamespace("pwalign", quietly=TRUE) ) + ) { + + library(seqtrie) + library(stringi) + library(stringdist) + library(Biostrings) + library(dplyr) + + # Use 2 threads on github actions and CRAN, 4 threads locally + IS_LOCAL <- Sys.getenv("IS_LOCAL") != "" + NTHREADS <- ifelse(IS_LOCAL, 4, 2) + NITER <- ifelse(IS_LOCAL, 4, 1) + NSEQS <- 10000 + MAXSEQLEN <- 200 + CHARSET <- "ACGT" + + random_strings <- function(N, charset = "abcdefghijklmnopqrstuvwxyz") { + charset_stri <- paste0("[", charset, "]") + len <- sample(0:MAXSEQLEN, N, replace=TRUE) + result <- lapply(0:MAXSEQLEN, function(x) { + nx <- sum(len == x) + if(nx == 0) return(character()) + stringi::stri_rand_strings(nx, x, pattern = charset_stri) + }) + sample(unlist(result)) + } + + mutate_strings <- function(x, prob = 0.025, indel_prob = 0.025, charset = "abcdefghijklmnopqrstuvwxyz") { + charset <- unlist(strsplit(charset, "")) + xsplit <- strsplit(x, "") + sapply(xsplit, function(a) { + r <- runif(length(a)) < prob + a[r] <- sample(charset, sum(r), replace=TRUE) + ins <- runif(length(a)) < indel_prob + a[ins] <- paste0(sample(charset, sum(ins), replace=TRUE), sample(charset, sum(ins), replace=TRUE)) + del <- runif(length(a)) < indel_prob + a[del] <- "" + paste0(a, collapse = "") + }) + } + + # Biostrings notes: + # subject (target) must be of length 1 or equal to pattern (query) + # To get a distance matrix, iterate over target and perform a column bind + # special_zero_case -- if both query and target are empty, Biostrings fails with an error + pairwiseAlignmentFix <- function(pattern, subject, ...) { + results <- rep(0, length(subject)) + special_zero_case <- nchar(pattern) == 0 & nchar(subject) == 0 + if(all(special_zero_case)) { + results + } else { + results[!special_zero_case] <- Biostrings::pairwiseAlignment(pattern=pattern[!special_zero_case], subject=subject[!special_zero_case], ...) + results + } + } + + biostrings_matrix_global <- function(query, target, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + lapply(query, function(x) { + query2 <- rep(x, length(target)) + -pairwiseAlignmentFix(pattern=query2, subject=target, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + }) %>% do.call(rbind, .) + } + + biostrings_pairwise_global <- function(query, target, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + -pairwiseAlignment(pattern=query, subject=target, substitutionMatrix = substitutionMatrix,gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + } + + biostrings_matrix_anchored <- function(query, target, query_size, target_size, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + lapply(seq_along(query), function(i) { + query2 <- substring(query[i], 1, query_size[i,,drop=TRUE]) + target2 <- substring(target, 1, target_size[i,,drop=TRUE]) + -pairwiseAlignmentFix(pattern=query2, subject=target2, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + }) %>% do.call(rbind, .) + } + + biostrings_pairwise_anchored <- function(query, target, query_size, target_size, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + query2 <- substring(query, 1, query_size) + target2 <- substring(target, 1, target_size) + -pairwiseAlignmentFix(pattern=query2, subject=target2, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + } + + for(. in 1:NITER) { + + print("Checking hamming search correctness") + local({ + # Note: seqtrie returns `NA_integer_` for hamming distance when the lengths are different + # whereas stringdist returns `Inf` + # This is why we need to replace `NA_integer_` with `Inf` when comparing results + + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "hamming", nthreads=NTHREADS) + results_seqtrie[is.na(results_seqtrie)] <- Inf + results_stringdist <- stringdist::stringdistmatrix(query, target, method = "hamming", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "hamming", nthreads=NTHREADS) + results_seqtrie[is.na(results_seqtrie)] <- Inf + results_stringdist <- stringdist::stringdist(query_pairwise, target, method = "hamming", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking levenshtein search correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", nthreads=NTHREADS) + results_stringdist <- stringdist::stringdistmatrix(query, target, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", nthreads=NTHREADS) + results_stringdist <- stringdist::stringdist(query_pairwise, target, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking anchored search correctness") + local({ + # There is no anchored search in stringdist (or elsewhere). To get the same results, we substring the query and target sequences + # By the results of the seqtrie anchored search and then compare the results + + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "anchored", nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_stringdist <- lapply(seq_along(query), function(i) { + query_size2 <- query_size[i,,drop=TRUE] + target_size2 <- target_size[i,,drop=TRUE] + query2 <- substring(query[i], 1, query_size2) # query[i] is recycled + target2 <- substring(target, 1, target_size2) + stringdist::stringdist(query2, target2, method = "lv", nthread=NTHREADS) + }) %>% do.call(rbind, .) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + query2 <- substring(query_pairwise, 1, query_size) + target2 <- substring(target, 1, target_size) + results_stringdist <- stringdist::stringdist(query2, target2, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking global search with linear gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_matrix_global(query, target, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_pairwise_global(query_pairwise, target, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + print("Checking anchored search with linear gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_matrix_anchored(query, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_pairwise_anchored(query_pairwise, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + + print("Checking global search with affine gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + gap_open_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cos=gap_open_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_matrix_global(query, target, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_pairwise_global(query_pairwise, target, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + print("Checking anchored search with affine gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + gap_open_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_matrix_anchored(query, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_pairwise_anchored(query_pairwise, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + } + + } Loading required package: BiocGenerics Loading required package: generics Attaching package: 'generics' The following objects are masked from 'package:base': as.difftime, as.factor, as.ordered, intersect, is.element, setdiff, setequal, union Attaching package: 'BiocGenerics' The following objects are masked from 'package:stats': IQR, mad, sd, var, xtabs The following objects are masked from 'package:base': Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, get, grep, grepl, is.unsorted, lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind, rownames, sapply, saveRDS, table, tapply, unique, unsplit, which.max, which.min Loading required package: S4Vectors Loading required package: stats4 Attaching package: 'S4Vectors' The following object is masked from 'package:utils': findMatches The following objects are masked from 'package:base': I, expand.grid, unname Loading required package: IRanges Loading required package: XVector Loading required package: GenomeInfoDb Attaching package: 'Biostrings' The following object is masked from 'package:base': strsplit Attaching package: 'dplyr' The following objects are masked from 'package:Biostrings': collapse, intersect, setdiff, setequal, union The following object is masked from 'package:GenomeInfoDb': intersect The following object is masked from 'package:XVector': slice The following objects are masked from 'package:IRanges': collapse, desc, intersect, setdiff, slice, union The following objects are masked from 'package:S4Vectors': first, intersect, rename, setdiff, setequal, union The following objects are masked from 'package:BiocGenerics': combine, intersect, setdiff, setequal, union The following object is masked from 'package:generics': explain The following objects are masked from 'package:stats': filter, lag The following objects are masked from 'package:base': intersect, setdiff, setequal, union [1] "Checking hamming search correctness" [1] "Checking levenshtein search correctness" [1] "Checking anchored search correctness" [1] "Checking global search with linear gap for correctness" Error in h(simpleError(msg, call)) : error in evaluating the argument 'args' in selecting a method for function 'do.call': 'recursive' must be a length-1 vector Calls: <Anonymous> ... mpi.XStringSet.pairwiseAlignment -> XStringSet.pairwiseAlignment -> array -> unlist In addition: Warning message: In .call_fun_in_pwalign("pairwiseAlignment", ...) : pairwiseAlignment() has moved from Biostrings to the pwalign package, and is formally deprecated in Biostrings >= 2.75.1. Please call pwalign::pairwiseAlignment() to get rid of this warning. Execution halted Flavor: r-devel-linux-x86_64-debian-gcc

Version: 0.2.8
Check: tests
Result: ERROR Running ‘test_RadixForest.R’ [30s/25s] Running ‘test_RadixTree.R’ [449s/301s] Running ‘test_pairwise.R’ [124s/86s] Running the tests in ‘tests/test_pairwise.R’ failed. Complete output: > # This test file tests the `dist_matrix` and `dist_pairwise` functions > # These two functions are simple dynamic programming algorithms for computing pairwise distances and are themselves used to validate > # the RadixTree imeplementation (see test_radix_tree.R) > > if(requireNamespace("seqtrie", quietly=TRUE) && + requireNamespace("stringi", quietly=TRUE) && + requireNamespace("stringdist", quietly=TRUE) && + requireNamespace("Biostrings", quietly=TRUE) && + requireNamespace("dplyr", quietly=TRUE) && + # pwalign is only required for Biostrings >= 2.72.0 in R 4.4+ + ( (packageVersion("Biostrings") < "2.72.0") || requireNamespace("pwalign", quietly=TRUE) ) + ) { + + library(seqtrie) + library(stringi) + library(stringdist) + library(Biostrings) + library(dplyr) + + # Use 2 threads on github actions and CRAN, 4 threads locally + IS_LOCAL <- Sys.getenv("IS_LOCAL") != "" + NTHREADS <- ifelse(IS_LOCAL, 4, 2) + NITER <- ifelse(IS_LOCAL, 4, 1) + NSEQS <- 10000 + MAXSEQLEN <- 200 + CHARSET <- "ACGT" + + random_strings <- function(N, charset = "abcdefghijklmnopqrstuvwxyz") { + charset_stri <- paste0("[", charset, "]") + len <- sample(0:MAXSEQLEN, N, replace=TRUE) + result <- lapply(0:MAXSEQLEN, function(x) { + nx <- sum(len == x) + if(nx == 0) return(character()) + stringi::stri_rand_strings(nx, x, pattern = charset_stri) + }) + sample(unlist(result)) + } + + mutate_strings <- function(x, prob = 0.025, indel_prob = 0.025, charset = "abcdefghijklmnopqrstuvwxyz") { + charset <- unlist(strsplit(charset, "")) + xsplit <- strsplit(x, "") + sapply(xsplit, function(a) { + r <- runif(length(a)) < prob + a[r] <- sample(charset, sum(r), replace=TRUE) + ins <- runif(length(a)) < indel_prob + a[ins] <- paste0(sample(charset, sum(ins), replace=TRUE), sample(charset, sum(ins), replace=TRUE)) + del <- runif(length(a)) < indel_prob + a[del] <- "" + paste0(a, collapse = "") + }) + } + + # Biostrings notes: + # subject (target) must be of length 1 or equal to pattern (query) + # To get a distance matrix, iterate over target and perform a column bind + # special_zero_case -- if both query and target are empty, Biostrings fails with an error + pairwiseAlignmentFix <- function(pattern, subject, ...) { + results <- rep(0, length(subject)) + special_zero_case <- nchar(pattern) == 0 & nchar(subject) == 0 + if(all(special_zero_case)) { + results + } else { + results[!special_zero_case] <- Biostrings::pairwiseAlignment(pattern=pattern[!special_zero_case], subject=subject[!special_zero_case], ...) + results + } + } + + biostrings_matrix_global <- function(query, target, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + lapply(query, function(x) { + query2 <- rep(x, length(target)) + -pairwiseAlignmentFix(pattern=query2, subject=target, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + }) %>% do.call(rbind, .) + } + + biostrings_pairwise_global <- function(query, target, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + -pairwiseAlignment(pattern=query, subject=target, substitutionMatrix = substitutionMatrix,gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + } + + biostrings_matrix_anchored <- function(query, target, query_size, target_size, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + lapply(seq_along(query), function(i) { + query2 <- substring(query[i], 1, query_size[i,,drop=TRUE]) + target2 <- substring(target, 1, target_size[i,,drop=TRUE]) + -pairwiseAlignmentFix(pattern=query2, subject=target2, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + }) %>% do.call(rbind, .) + } + + biostrings_pairwise_anchored <- function(query, target, query_size, target_size, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + query2 <- substring(query, 1, query_size) + target2 <- substring(target, 1, target_size) + -pairwiseAlignmentFix(pattern=query2, subject=target2, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + } + + for(. in 1:NITER) { + + print("Checking hamming search correctness") + local({ + # Note: seqtrie returns `NA_integer_` for hamming distance when the lengths are different + # whereas stringdist returns `Inf` + # This is why we need to replace `NA_integer_` with `Inf` when comparing results + + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "hamming", nthreads=NTHREADS) + results_seqtrie[is.na(results_seqtrie)] <- Inf + results_stringdist <- stringdist::stringdistmatrix(query, target, method = "hamming", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "hamming", nthreads=NTHREADS) + results_seqtrie[is.na(results_seqtrie)] <- Inf + results_stringdist <- stringdist::stringdist(query_pairwise, target, method = "hamming", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking levenshtein search correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", nthreads=NTHREADS) + results_stringdist <- stringdist::stringdistmatrix(query, target, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", nthreads=NTHREADS) + results_stringdist <- stringdist::stringdist(query_pairwise, target, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking anchored search correctness") + local({ + # There is no anchored search in stringdist (or elsewhere). To get the same results, we substring the query and target sequences + # By the results of the seqtrie anchored search and then compare the results + + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "anchored", nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_stringdist <- lapply(seq_along(query), function(i) { + query_size2 <- query_size[i,,drop=TRUE] + target_size2 <- target_size[i,,drop=TRUE] + query2 <- substring(query[i], 1, query_size2) # query[i] is recycled + target2 <- substring(target, 1, target_size2) + stringdist::stringdist(query2, target2, method = "lv", nthread=NTHREADS) + }) %>% do.call(rbind, .) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + query2 <- substring(query_pairwise, 1, query_size) + target2 <- substring(target, 1, target_size) + results_stringdist <- stringdist::stringdist(query2, target2, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking global search with linear gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_matrix_global(query, target, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_pairwise_global(query_pairwise, target, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + print("Checking anchored search with linear gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_matrix_anchored(query, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_pairwise_anchored(query_pairwise, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + + print("Checking global search with affine gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + gap_open_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cos=gap_open_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_matrix_global(query, target, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_pairwise_global(query_pairwise, target, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + print("Checking anchored search with affine gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + gap_open_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_matrix_anchored(query, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_pairwise_anchored(query_pairwise, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + } + + } Loading required package: BiocGenerics Loading required package: generics Attaching package: 'generics' The following objects are masked from 'package:base': as.difftime, as.factor, as.ordered, intersect, is.element, setdiff, setequal, union Attaching package: 'BiocGenerics' The following objects are masked from 'package:stats': IQR, mad, sd, var, xtabs The following objects are masked from 'package:base': Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, get, grep, grepl, is.unsorted, lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind, rownames, sapply, saveRDS, table, tapply, unique, unsplit, which.max, which.min Loading required package: S4Vectors Loading required package: stats4 Attaching package: 'S4Vectors' The following object is masked from 'package:utils': findMatches The following objects are masked from 'package:base': I, expand.grid, unname Loading required package: IRanges Loading required package: XVector Loading required package: GenomeInfoDb Attaching package: 'Biostrings' The following object is masked from 'package:base': strsplit Attaching package: 'dplyr' The following objects are masked from 'package:Biostrings': collapse, intersect, setdiff, setequal, union The following object is masked from 'package:GenomeInfoDb': intersect The following object is masked from 'package:XVector': slice The following objects are masked from 'package:IRanges': collapse, desc, intersect, setdiff, slice, union The following objects are masked from 'package:S4Vectors': first, intersect, rename, setdiff, setequal, union The following objects are masked from 'package:BiocGenerics': combine, intersect, setdiff, setequal, union The following object is masked from 'package:generics': explain The following objects are masked from 'package:stats': filter, lag The following objects are masked from 'package:base': intersect, setdiff, setequal, union [1] "Checking hamming search correctness" [1] "Checking levenshtein search correctness" [1] "Checking anchored search correctness" [1] "Checking global search with linear gap for correctness" Error in h(simpleError(msg, call)) : error in evaluating the argument 'args' in selecting a method for function 'do.call': 'recursive' must be a length-1 vector Calls: <Anonymous> ... mpi.XStringSet.pairwiseAlignment -> XStringSet.pairwiseAlignment -> array -> unlist In addition: Warning message: In .call_fun_in_pwalign("pairwiseAlignment", ...) : pairwiseAlignment() has moved from Biostrings to the pwalign package, and is formally deprecated in Biostrings >= 2.75.1. Please call pwalign::pairwiseAlignment() to get rid of this warning. Execution halted Flavor: r-devel-linux-x86_64-fedora-clang

Version: 0.2.8
Check: tests
Result: ERROR Running ‘test_RadixForest.R’ [24s/19s] Running ‘test_RadixTree.R’ [694s/361s] Running ‘test_pairwise.R’ [177s/99s] Running the tests in ‘tests/test_pairwise.R’ failed. Complete output: > # This test file tests the `dist_matrix` and `dist_pairwise` functions > # These two functions are simple dynamic programming algorithms for computing pairwise distances and are themselves used to validate > # the RadixTree imeplementation (see test_radix_tree.R) > > if(requireNamespace("seqtrie", quietly=TRUE) && + requireNamespace("stringi", quietly=TRUE) && + requireNamespace("stringdist", quietly=TRUE) && + requireNamespace("Biostrings", quietly=TRUE) && + requireNamespace("dplyr", quietly=TRUE) && + # pwalign is only required for Biostrings >= 2.72.0 in R 4.4+ + ( (packageVersion("Biostrings") < "2.72.0") || requireNamespace("pwalign", quietly=TRUE) ) + ) { + + library(seqtrie) + library(stringi) + library(stringdist) + library(Biostrings) + library(dplyr) + + # Use 2 threads on github actions and CRAN, 4 threads locally + IS_LOCAL <- Sys.getenv("IS_LOCAL") != "" + NTHREADS <- ifelse(IS_LOCAL, 4, 2) + NITER <- ifelse(IS_LOCAL, 4, 1) + NSEQS <- 10000 + MAXSEQLEN <- 200 + CHARSET <- "ACGT" + + random_strings <- function(N, charset = "abcdefghijklmnopqrstuvwxyz") { + charset_stri <- paste0("[", charset, "]") + len <- sample(0:MAXSEQLEN, N, replace=TRUE) + result <- lapply(0:MAXSEQLEN, function(x) { + nx <- sum(len == x) + if(nx == 0) return(character()) + stringi::stri_rand_strings(nx, x, pattern = charset_stri) + }) + sample(unlist(result)) + } + + mutate_strings <- function(x, prob = 0.025, indel_prob = 0.025, charset = "abcdefghijklmnopqrstuvwxyz") { + charset <- unlist(strsplit(charset, "")) + xsplit <- strsplit(x, "") + sapply(xsplit, function(a) { + r <- runif(length(a)) < prob + a[r] <- sample(charset, sum(r), replace=TRUE) + ins <- runif(length(a)) < indel_prob + a[ins] <- paste0(sample(charset, sum(ins), replace=TRUE), sample(charset, sum(ins), replace=TRUE)) + del <- runif(length(a)) < indel_prob + a[del] <- "" + paste0(a, collapse = "") + }) + } + + # Biostrings notes: + # subject (target) must be of length 1 or equal to pattern (query) + # To get a distance matrix, iterate over target and perform a column bind + # special_zero_case -- if both query and target are empty, Biostrings fails with an error + pairwiseAlignmentFix <- function(pattern, subject, ...) { + results <- rep(0, length(subject)) + special_zero_case <- nchar(pattern) == 0 & nchar(subject) == 0 + if(all(special_zero_case)) { + results + } else { + results[!special_zero_case] <- Biostrings::pairwiseAlignment(pattern=pattern[!special_zero_case], subject=subject[!special_zero_case], ...) + results + } + } + + biostrings_matrix_global <- function(query, target, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + lapply(query, function(x) { + query2 <- rep(x, length(target)) + -pairwiseAlignmentFix(pattern=query2, subject=target, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + }) %>% do.call(rbind, .) + } + + biostrings_pairwise_global <- function(query, target, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + -pairwiseAlignment(pattern=query, subject=target, substitutionMatrix = substitutionMatrix,gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + } + + biostrings_matrix_anchored <- function(query, target, query_size, target_size, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + lapply(seq_along(query), function(i) { + query2 <- substring(query[i], 1, query_size[i,,drop=TRUE]) + target2 <- substring(target, 1, target_size[i,,drop=TRUE]) + -pairwiseAlignmentFix(pattern=query2, subject=target2, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + }) %>% do.call(rbind, .) + } + + biostrings_pairwise_anchored <- function(query, target, query_size, target_size, cost_matrix, gap_cost, gap_open_cost = 0) { + substitutionMatrix <- -cost_matrix + query2 <- substring(query, 1, query_size) + target2 <- substring(target, 1, target_size) + -pairwiseAlignmentFix(pattern=query2, subject=target2, substitutionMatrix = substitutionMatrix, gapOpening=gap_open_cost, gapExtension=gap_cost, scoreOnly=TRUE, type="global") + } + + for(. in 1:NITER) { + + print("Checking hamming search correctness") + local({ + # Note: seqtrie returns `NA_integer_` for hamming distance when the lengths are different + # whereas stringdist returns `Inf` + # This is why we need to replace `NA_integer_` with `Inf` when comparing results + + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "hamming", nthreads=NTHREADS) + results_seqtrie[is.na(results_seqtrie)] <- Inf + results_stringdist <- stringdist::stringdistmatrix(query, target, method = "hamming", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "hamming", nthreads=NTHREADS) + results_seqtrie[is.na(results_seqtrie)] <- Inf + results_stringdist <- stringdist::stringdist(query_pairwise, target, method = "hamming", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking levenshtein search correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", nthreads=NTHREADS) + results_stringdist <- stringdist::stringdistmatrix(query, target, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", nthreads=NTHREADS) + results_stringdist <- stringdist::stringdist(query_pairwise, target, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking anchored search correctness") + local({ + # There is no anchored search in stringdist (or elsewhere). To get the same results, we substring the query and target sequences + # By the results of the seqtrie anchored search and then compare the results + + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + results_seqtrie <- dist_matrix(query, target, mode = "anchored", nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_stringdist <- lapply(seq_along(query), function(i) { + query_size2 <- query_size[i,,drop=TRUE] + target_size2 <- target_size[i,,drop=TRUE] + query2 <- substring(query[i], 1, query_size2) # query[i] is recycled + target2 <- substring(target, 1, target_size2) + stringdist::stringdist(query2, target2, method = "lv", nthread=NTHREADS) + }) %>% do.call(rbind, .) + stopifnot(all(results_seqtrie == results_stringdist)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + query2 <- substring(query_pairwise, 1, query_size) + target2 <- substring(target, 1, target_size) + results_stringdist <- stringdist::stringdist(query2, target2, method = "lv", nthread=NTHREADS) + stopifnot(all(results_seqtrie == results_stringdist)) + }) + + print("Checking global search with linear gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_matrix_global(query, target, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_pairwise_global(query_pairwise, target, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + print("Checking anchored search with linear gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_matrix_anchored(query, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_pairwise_anchored(query_pairwise, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + + print("Checking global search with affine gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + gap_open_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cos=gap_open_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_matrix_global(query, target, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "levenshtein", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + results_biostrings <- biostrings_pairwise_global(query_pairwise, target, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + + print("Checking anchored search with affine gap for correctness") + local({ + target <- c(random_strings(NSEQS, CHARSET),"") %>% unique + query <- sample(c(sample(target, NSEQS/1000), random_strings(NSEQS/1000, CHARSET))) + query <- c(mutate_strings(query, indel_prob=0, charset = CHARSET), "") %>% unique + + # Check matrix results + cost_matrix <- matrix(sample(1:3, size = nchar(CHARSET)^2, replace=TRUE), nrow=nchar(CHARSET)) + diag(cost_matrix) <- 0 + colnames(cost_matrix) <- rownames(cost_matrix) <- strsplit(CHARSET, "")[[1]] + gap_cost <- sample(1:3, size = 1) + gap_open_cost <- sample(1:3, size = 1) + results_seqtrie <- dist_matrix(query, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_matrix_anchored(query, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + + # Check pairwise results + query_pairwise <- mutate_strings(target, prob=0.025, indel_prob=0.05, charset = CHARSET) + results_seqtrie <- dist_pairwise(query_pairwise, target, mode = "anchored", cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost, nthreads=NTHREADS) + query_size <- attr(results_seqtrie, "query_size") + target_size <- attr(results_seqtrie, "target_size") + results_biostrings <- biostrings_pairwise_anchored(query_pairwise, target, query_size, target_size, cost_matrix = cost_matrix, gap_cost = gap_cost, gap_open_cost=gap_open_cost) + stopifnot(all(results_seqtrie == results_biostrings)) + }) + } + + } Loading required package: BiocGenerics Loading required package: generics Attaching package: 'generics' The following objects are masked from 'package:base': as.difftime, as.factor, as.ordered, intersect, is.element, setdiff, setequal, union Attaching package: 'BiocGenerics' The following objects are masked from 'package:stats': IQR, mad, sd, var, xtabs The following objects are masked from 'package:base': Filter, Find, Map, Position, Reduce, anyDuplicated, aperm, append, as.data.frame, basename, cbind, colnames, dirname, do.call, duplicated, eval, evalq, get, grep, grepl, is.unsorted, lapply, mapply, match, mget, order, paste, pmax, pmax.int, pmin, pmin.int, rank, rbind, rownames, sapply, saveRDS, table, tapply, unique, unsplit, which.max, which.min Loading required package: S4Vectors Loading required package: stats4 Attaching package: 'S4Vectors' The following object is masked from 'package:utils': findMatches The following objects are masked from 'package:base': I, expand.grid, unname Loading required package: IRanges Loading required package: XVector Loading required package: GenomeInfoDb Attaching package: 'Biostrings' The following object is masked from 'package:base': strsplit Attaching package: 'dplyr' The following objects are masked from 'package:Biostrings': collapse, intersect, setdiff, setequal, union The following object is masked from 'package:GenomeInfoDb': intersect The following object is masked from 'package:XVector': slice The following objects are masked from 'package:IRanges': collapse, desc, intersect, setdiff, slice, union The following objects are masked from 'package:S4Vectors': first, intersect, rename, setdiff, setequal, union The following objects are masked from 'package:BiocGenerics': combine, intersect, setdiff, setequal, union The following object is masked from 'package:generics': explain The following objects are masked from 'package:stats': filter, lag The following objects are masked from 'package:base': intersect, setdiff, setequal, union [1] "Checking hamming search correctness" [1] "Checking levenshtein search correctness" [1] "Checking anchored search correctness" [1] "Checking global search with linear gap for correctness" Error in h(simpleError(msg, call)) : error in evaluating the argument 'args' in selecting a method for function 'do.call': 'recursive' must be a length-1 vector Calls: <Anonymous> ... mpi.XStringSet.pairwiseAlignment -> XStringSet.pairwiseAlignment -> array -> unlist In addition: Warning message: In .call_fun_in_pwalign("pairwiseAlignment", ...) : pairwiseAlignment() has moved from Biostrings to the pwalign package, and is formally deprecated in Biostrings >= 2.75.1. Please call pwalign::pairwiseAlignment() to get rid of this warning. Execution halted Flavor: r-devel-linux-x86_64-fedora-gcc

Version: 0.2.8
Check: compiled code
Result: NOTE File 'seqtrie/libs/x64/seqtrie.dll': Found non-API call to R: 'STRING_PTR' Compiled code should not call non-API entry points in R. See 'Writing portable packages' in the 'Writing R Extensions' manual, and section 'Moving into C API compliance' for issues with the use of non-API entry points. Flavor: r-devel-windows-x86_64

Version: 0.2.8
Check: for GNU extensions in Makefiles
Result: NOTE GNU make is a SystemRequirements. Flavors: r-patched-linux-x86_64, r-release-linux-x86_64, r-release-macos-arm64, r-release-macos-x86_64, r-release-windows-x86_64, r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-x86_64

Version: 0.2.8
Check: installed package size
Result: NOTE installed size is 5.9Mb sub-directories of 1Mb or more: data 1.1Mb libs 4.3Mb Flavors: r-release-macos-arm64, r-release-macos-x86_64, r-oldrel-macos-arm64, r-oldrel-macos-x86_64

Version: 0.2.8
Check: package dependencies
Result: NOTE Package suggested but not available for checking: ‘pwalign’ Flavors: r-release-macos-x86_64, r-oldrel-macos-arm64, r-oldrel-macos-x86_64, r-oldrel-windows-x86_64