# Reading a csv files with tolerance to empty files
readCSV = function(target, header = TRUE) {
output=try(read.csv(target, header = header, stringsAsFactors = FALSE, skipNul = TRUE, encoding = "UTF-8"), silent = TRUE)
if (inherits(output, "try-error")) return(NA) else return(output)
}
# Combination of elements from two string vectors
outer(c("a", "b", "c"), c(1, 2), paste0)
# Take the first element from each vector item after strsplit
sapply(strsplit(vector, "_"), "[[", 1)
sapply(strsplit(vector, "_"), head, 1)
# Take the second last element from each vector item after strsplit
sapply(strsplit(vector, "_"), tail, 2)
# Creating empty data frame with column names
data.frame(matrix(NA, nrow = 10, ncol = 3, dimnames = list(c(), c("V1", "V2", "V3"))))
setNames(data.frame(matrix(NA, nrow = 10, ncol = 3)), c("V1", "V2", "V3"))
# Converting string time to Unix time
time = strptime(stringTime, format = "%Y-%m-%d %H:%M:%S", tz = "America/Los_Angeles")
# function to detect number of digits in the hr/min and add 0 if it is 1
digit = function(timepoint) {
inputtime = strsplit(timepoint, ":")[[1]]
outputtime = paste0(paste(ifelse(lapply(inputtime, nchar) == 1, paste0("0", inputtime), inputtime), collapse = ":"), ":00")
return(outputtime)
}
# convert seconds since midnight to actual time
convSec = function (x) {
add0 = function (y) {paste0(rep("0", 2-nchar(y)), y)}
x = as.numeric(x)
hr = x %/% 3600
min = (x-hr*3600) %/% 60
sec = x-hr*3600-min*60
return(paste0(add0(hr), ":", add0(min), ":", add0(sec)))
}
# roughly calculate age
as.numeric(difftime(currentDate, birthday, units = "days")) %/% 365.25
# Parallel computing
library(foreach)
library(doParallel)
# Function to split a vector down to job list for each node
triage=function(jobs, nCL) {
splits=rep(1:nCL, length.out=length(jobs))
return(split(jobs, splits))
}
nCL=4
cl=makeCluster(nCL)
registerDoParallel(cl)
# export object from global environment to parallel environment if necessary
clusterExport(cl, c("data"))
# return results that require combining
jobList=triage(MATCHs, nCL)
checklist=foreach(m=1:nCL, .combine = rbind) %dopar% {
job=jobList[[m]]
checklistX=data.frame(job=job, survey=NA, stringsAsFactors = FALSE)
for (j in 1:length(job)) {checklistX[j, "survey"]=any(grepl("survey", list.files(paste0(job[j], "/.match"))))}
print(checklistX)
}
stopCluster(cl)
# grep any csv file without DataTable in filename
grep("^((?!DataTable).)*\\.csv$", c(ListX), perl = TRUE, value = TRUE)
# grep content between patterns (substitute matches to empty string)
sub("^.*survey/(.*?)/Bed_Wake.*", "\\1", vectors)
# max column numbers in a file
col.num = max(count.fields(fileX, skip = 10, sep = ","))
# reading files from the 11th line and only the first 4 columns
read.csv(fileX, skip = 10, header = FALSE, colClasses = c(rep("integer", 4), rep("NULL", col.num - 4)))
# count consecutive appearance of elements (less than 120 consecutive appearances of integers less than 100 are considered 0s) in a vector (data$y)
vert=ifelse(data$y>0 & data$y<100, 1, data$y) # change numeric 1 to 100 into 1
chunk=data.frame(values=rle(vert)$values, lengths=rle(vert)$lengths) # consecutive 0, 1 and other numbers
chunk$filtered=ifelse(chunk$values==1 & chunk$lengths<=120, 0, chunk$values) # change consecutive 1s (0<integer<100) whose length is less than 120 into 0
rleX=rle(chunk$filtered) # extract smoothed movement (consecutive 0s (including integers<100))
chunk$indicator=rep(seq_along(rleX$lengths), rleX$lengths) # assign a new class to each repeating integer
compress=data.frame(values=rleX$values, aggregate(chunk$lengths, by=list(c(chunk$indicator)), FUN=sum, na.rm=TRUE))
# aggregate
aggregate(chunk$lengths, by=list(c(chunk$indicator)), FUN=sum, na.rm=TRUE)
aggregate(lengths~indicator, data=chunk, FUN=sum, na.rm=TRUE)