Steps to remove primers with cutadapt in windows: 1/ Install Linux Bash on Windows 10 following the description here for example: https://itsfoss.com/install-bash-on-windows/ 2/ Install cutadapt using Bash as described here for example (you will need also python installed for this step): https://cutadapt.readthedocs.io/en/stable/installation.html (Quick installation) 3/ Run with R: You can run cutadapt directly in Linux. But if you run it outside Linux, the system may not understand where the program is located. I had some struggle with this. So, in R, on the place of "cutadapt <- "cutadapt" you will need to add "wsl " (indicating that you run the program in Linux) and then your path to the program (for me for example: 'wsl /home/fanito/.local/bin/cutadapt'). If your data is on an external drive/usb, you should mount it every time you run the program with the following command (run directly in Bash because of sudo) : 'sudo mount -t drvfs f: /mnt/f' (replacing the last part by your usb path) 4/ R code up to the beginning of DADA2 pipeline: path <- "C:/tchonova/DADA2_course_TC/DADA2_2103/practical_work/data_workshop" # put your path here path_results <- file.path(path, "results") if(!dir.exists(path_results)) dir.create(path_results) # Set patterns to discriminate your forward and reverse read files file_pattern <- c("F" = "_L001_R1_001.fastq", "R" = "_L001_R2_001.fastq") #### REMOVAL OF PRIMERS #### fas_Fs_raw <- sort(list.files(path, pattern = file_pattern["F"], full.names = TRUE)) fas_Rs_raw <- sort(list.files(path, pattern = file_pattern["R"], full.names = TRUE)) # This is our set of primers (from Vasselon et al. 2017) FWD <- c("AGGTGAAGTAAAAGGTTCWTACTTAAA", "AGGTGAAGTTAAAGGTTCWTAYTTAAA", "AGGTGAAACTAAAGGTTCWTACTTAAA") REV <- c("CCTTCTAATTTACCWACWACTG", "CCTTCTAATTTACCWACAACAG") FWD_RC <- dada2:::rc(FWD) REV_RC <- dada2:::rc(REV) path_cut <- file.path(path, "cutadapt") if(!dir.exists(path_cut)) dir.create(path_cut) fas_Fs_cut <- file.path(path_cut, basename(fas_Fs_raw)) fas_Rs_cut <- file.path(path_cut, basename(fas_Rs_raw)) R1_flags <- paste(paste("-g", FWD, collapse = " "), paste("-a", REV_RC, collapse = " ")) R2_flags <- paste(paste("-G", REV, collapse = " "), paste("-A", FWD_RC, collapse = " ")) cutadapt <- "wsl /usr/bin/cutadapt" # To access the cutadapt in the linux Path to the executable; put your path here for(i in seq_along(fas_Fs_raw)) { cat("Processing", "-----------", i, "/", length(fas_Fs_raw), "-----------\n") system(paste(cutadapt, R1_flags, R2_flags, "--discard-untrimmed", "--max-n 0", # Optional - strong constraint on expected length #paste0("-m ", 250-nchar(FWD)[1], ":", 250-nchar(REV)[1]), #paste0("-M ", 250-nchar(FWD)[1], ":", 250-nchar(REV)[1]), "-o", file.path('/mnt/c/tchonova/DADA2_course_TC/DADA2_2103/practical_work/data_workshop/cutadapt', basename(fas_Fs_cut))[i], # change with your location "-p", file.path('/mnt/c/tchonova/DADA2_course_TC/DADA2_2103/practical_work/data_workshop/cutadapt', basename(fas_Rs_cut))[i], # change with your location file.path('/mnt/c/tchonova/DADA2_course_TC/DADA2_2103/practical_work/data_workshop', basename(fas_Fs_raw))[i], # change with your location file.path('/mnt/c/tchonova/tchonova/DADA2_course_TC/DADA2_2103/practical_work/data_workshop', basename(fas_Rs_raw))[i])) # change with your location } # filter empty sequences from fastq (added later, because we did not remove seqs according to lenght with cutadapt - see above) path_postcut <- file.path(path, "post_cutadapt_lengthmin") if(!dir.exists(path_postcut)) dir.create(path_postcut) fas_Fs_postcut <- file.path(path_postcut, basename(fas_Fs_raw)) fas_Rs_postcut <- file.path(path_postcut, basename(fas_Rs_raw)) filterAndTrim(fas_Fs_cut, fas_Fs_postcut, fas_Rs_cut, fas_Rs_postcut) out_1 <- cbind(ShortRead::qa(fas_Fs_raw)[["readCounts"]][,"read", drop = FALSE], ShortRead::qa(fas_Fs_postcut)[["readCounts"]][,"read", drop = FALSE]) # shows nr of reads after each step head(out_1)