diff --git a/.Rbuildignore b/.Rbuildignore index 9d51083..f1a940c 100644 --- a/.Rbuildignore +++ b/.Rbuildignore @@ -3,3 +3,4 @@ ^\.Rproj\.user$ ^revdep$ ^cran-comments\.md$ +^\.httr-oauth$ diff --git a/.gitignore b/.gitignore index 807ea25..f427a7f 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,4 @@ .Rproj.user .Rhistory .RData +.httr-oauth diff --git a/DESCRIPTION b/DESCRIPTION index 96315a9..bf3f026 100644 --- a/DESCRIPTION +++ b/DESCRIPTION @@ -1,18 +1,27 @@ Encoding: UTF-8 Package: memoise Title: Memoisation of Functions -Version: 1.0.0.9000 +Description: Memoisation allows the results of functions to be cached based on input parameters + xmemoise offers both local and remote caches, enabling memoisation across computers. + Additional cache types include google datastore and amazon aws. +Version: 1.0.1 Authors@R: c( person("Hadley", "Wickham", , "hadley@rstudio.com", role = "aut"), person("Jim", "Hester", , "jim.hester@rstudio.com", role = c("aut", "cre")), - person("Kirill", "Müller", , "krlmlr+r@mailbox.org", role = "aut")) + person("Kirill", "Müller", , "krlmlr+r@mailbox.org", role = "aut"), + person("Daniel", "Cook", , "danielecook@gmail.com", role = "aut")) Description: Cache the results of a function so that when you call it again with the same arguments it returns the pre-computed value. -URL: https://github.com/hadley/memoise +URL: https://github.com/danielecook/xmemoise BugReports: https://github.com/hadley/memoise/issues Imports: - digest (>= 0.6.3) + digest (>= 0.6.3), + base64enc Suggests: - testthat + testthat, + googleAuthR, + aws.s3 +Remotes: + cloudyr/aws.s3 License: MIT + file LICENSE RoxygenNote: 5.0.1 diff --git a/NAMESPACE b/NAMESPACE index 0bf84c1..a9cfc23 100644 --- a/NAMESPACE +++ b/NAMESPACE @@ -1,6 +1,10 @@ # Generated by roxygen2: do not edit by hand S3method(print,memoised) +export(cache_aws_s3) +export(cache_datastore) +export(cache_filesystem) +export(cache_local) export(forget) export(has_cache) export(is.memoised) diff --git a/R/cache_aws_s3.r b/R/cache_aws_s3.r new file mode 100644 index 0000000..1f010f0 --- /dev/null +++ b/R/cache_aws_s3.r @@ -0,0 +1,72 @@ +#' @name cache_aws_s3 +#' @title Amazon Web Services S3 Cache +#' @description Initiate an Amazon Web Services Cache +#' +#' @examples +#' +#' \dontrun{ +#' # Set AWS credentials. +#' Sys.setenv("AWS_ACCESS_KEY_ID" = "", +#' "AWS_SECRET_ACCESS_KEY" = "") +#' +#' # Set up a unique bucket name. +#' s3 <- cache_aws_s3("unique-bucket-name") +#' mem_runif <- memoise(runif, cache = s3) +#' } +#' +#' +#' @param cache_name Bucket name for storing cache files. +#' @export + +cache_aws_s3 <- function(cache_name) { + + # Can't get this check to pass... + # if (!("aws.s3" %in% installed.packages()[,"Package"])) { stop("aws.s3 required for datastore cache.") } + + if (!(aws.s3::bucket_exists(cache_name))) { + aws.s3::put_bucket(cache_name) + if (!(aws.s3::bucket_exists(cache_name))) { + stop("Cache name must use unique bucket name") + } + } + + cache <- NULL + cache_reset <- function() { + aws.s3::delete_bucket(cache_name) + aws.s3::put_bucket(cache_name) + } + + cache_set <- function(key, value) { + tfile = tempfile() + save(value, file = tfile) + aws.s3::put_object(tfile, object = key, bucket = cache_name) + } + + cache_get <- function(key) { + suppressWarnings(aws.s3::s3load(object = key, bucket = cache_name)) + base::get(ls()[ls() != "key"][[1]]) + } + + cache_has_key <- function(key) { + aws.s3::head_object(object = key, bucket = cache_name) + } + + cache_keys <- function() { + items <- lapply(aws.s3::get_bucket(bucket = cache_name), function(x) { + if ("Key" %in% names(x)) { + return(x$Key) + } else { + return(NULL) + } + }) + unlist(Filter(Negate(is.null), items)) + } + + list( + reset = cache_reset, + set = cache_set, + get = cache_get, + has_key = cache_has_key, + keys = cache_keys + ) +} diff --git a/R/cache_datastore.r b/R/cache_datastore.r new file mode 100644 index 0000000..1f44cb1 --- /dev/null +++ b/R/cache_datastore.r @@ -0,0 +1,136 @@ +#' @name cache_datastore +#' @title Google Datastore Cache +#' @description Initiate a Google Datastore cache. +#' @param project Google Cloud project +#' @param cache_name datastore kind to use for storing cache entities. +#' +#' @examples +#' \dontrun{ +#' ds <- cache_datastore(project = "", cache_name = "rcache") +#' mem_memoise(runif, cache = ds) +#' } +#' +#' @seealso \url{https://cloud.google.com/} +#' @seealso \url{https://cloud.google.com/datastore/docs/concepts/overview} +#' +#' @export + +cache_datastore <- function(project, cache_name = "rcache") { + + if (!("googleAuthR" %in% installed.packages()[,"Package"])) { stop("googleAuthR required for datastore cache.") } + + options("googleAuthR.scopes.selected" = c("https://www.googleapis.com/auth/datastore", + "https://www.googleapis.com/auth/userinfo.email")) + + googleAuthR::gar_auth() + + base_url <- paste0("https://datastore.googleapis.com/v1beta3/projects/", project) + + transaction <- googleAuthR::gar_api_generator(paste0(base_url, ":beginTransaction"), + "POST", + data_parse_function = function(x) x$transaction) + + commit_ds <- googleAuthR::gar_api_generator(paste0(base_url, ":commit"), + "POST", + data_parse_function = function(x) x) + + load_ds <- googleAuthR::gar_api_generator(paste0(base_url, ":lookup"), + "POST", + data_parse_function = function(resp) { + # Unserialize and return + if ("found" %in% names(resp)) { + resp <- resp$found + value <- resp$entity$properties$object$blobValue + response <- unserialize(memDecompress(base64enc::base64decode(value), type = "gzip")) + } else if ("missing" %in% names(resp)) { + "!cache-not-found" + } else { + stop("Error") + } + }) + + query_ds <- googleAuthR::gar_api_generator(paste0(base_url, ":runQuery"), + "POST", + data_parse_function = function(resp) resp) + + + cache_reset <- function() { + query_results <- query_ds(the_body = list(gqlQuery = list(queryString = paste0("SELECT * FROM ", cache_name)))) + while((query_results$batch$moreResults != "NO_MORE_RESULTS") | is.null(query_results$batch$entityResults) == FALSE) { + + + ids <- (sapply(query_results$batch$entityResults$entity$key$path, function(x) x$name)) + + item_groups <- split(ids, (1:length(ids)) %/% 25) + sapply(item_groups, function(idset) { + mutations <- lapply(idset, function(x) { + c(list("delete" = list(path = list(kind = cache_name, name = x)))) + }) + body <- list(mutations = mutations, transaction = transaction()) + resp <- try(commit_ds(the_body = body), silent = T) + message("Clearing Cache") + }) + query_results <- query_ds(the_body = list(gqlQuery = list(queryString = paste0("SELECT * FROM ", cache_name)))) + } + } + + + cache_set <- function(key, value) { + # Serialize value + svalue <- base64enc::base64encode(memCompress(serialize(value, NULL, ascii=T), type = "gzip")) + path_item <- list( + kind = cache_name, + name = key + ) + prop = list( + object = list(blobValue = svalue, excludeFromIndexes = T) + ) + + transaction_id <- transaction() + + key_obj <- c(list(key = list(path = path_item), + properties = prop)) + mutation = list() + mutation[["upsert"]] = key_obj + body <- list(mutations = mutation, + transaction = transaction_id + ) + + resp <- try(commit_ds(the_body = body), silent = T) + if (class(resp) == "try-error") { + warning(attr(resp, "condition")) + } + } + + cache_get <- function(key) { + path_item <- list( + kind = cache_name, + name = key + ) + + resp <- load_ds(the_body = list(keys = list(path = path_item))) + suppressWarnings( if(resp == "!cache-not-found") { + stop("Cache Not Found") + }) + resp + } + + cache_has_key <- function(key) { + res <- try(suppressWarnings(cache_get(key)), silent = TRUE) + if (class(res) != "try-error") { + message("Using Cached Version") + } + class(res) != "try-error" + } + + list( + reset = cache_reset, + set = cache_set, + get = cache_get, + has_key = cache_has_key, + keys = function() message("Keys can't be listed with the google datastore cache.") + ) +} + + + diff --git a/R/cache_filesystem.r b/R/cache_filesystem.r new file mode 100644 index 0000000..d0f5aa6 --- /dev/null +++ b/R/cache_filesystem.r @@ -0,0 +1,65 @@ +#' @name cache_filesystem +#' @title Filesystem Cache +#' @description +#' Initiate a filesystem cache. +#' +#' @param path Directory in which to store cached items. +#' +#' @examples +#' +#' \dontrun{ +#' # Use with Dropbox +#' +#' db <- cache_filesystem("~/Dropbox/.rcache") +#' +#' mem_runif <- memoise(runif, cache = db) +#' +#' # Use with Google Drive +#' +#' gd <- cache_filesystem("~/Google Drive/.rcache") +#' +#' mem_runif <- memoise(runif, cache = gd) +#' +#' } +#' +#' @export + +cache_filesystem <- function(path) { + + dir.create(file.path(path), showWarnings = FALSE) + + cache_reset <- function() { + cache_files <- list.files(path, full.names = TRUE) + # Use an environment for loaded items. + cache <- new.env(TRUE, emptyenv()) + if (length(cache_files) > 0) { + rm_status <- file.remove(list.files(path, full.names = TRUE)) + if (rm_status) { + message("Cached files removed.") + } + } else { + message("No files in Cache.") + } + } + + cache_set <- function(key, value) { + save(value, file = paste(path, key, sep="/")) + } + + cache_get <- function(key) { + load(file = paste(path, key, sep="/")) + value + } + + cache_has_key <- function(key) { + file.exists(paste(path, key, sep="/")) + } + + list( + reset = cache_reset, + set = cache_set, + get = cache_get, + has_key = cache_has_key, + keys = function() list.files(path) + ) +} diff --git a/R/cache.r b/R/cache_local.r similarity index 70% rename from R/cache.r rename to R/cache_local.r index 23d542f..bbe1208 100644 --- a/R/cache.r +++ b/R/cache_local.r @@ -1,4 +1,12 @@ -new_cache <- function() { +#' @name cache_local +#' @title In Memory Cache +#' @description Initiate an in memory cache. +#' +#' cache_local() stores cached items in memory. It is the default cache. +#' +#' @export + +cache_local <- function() { cache <- NULL cache_reset <- function() { diff --git a/R/memoise.r b/R/memoise.r index ac0ae73..e73388c 100644 --- a/R/memoise.r +++ b/R/memoise.r @@ -38,6 +38,7 @@ #' @param ... optional variables specified as formulas with no RHS to use as #' additional restrictions on caching. See Examples for usage. #' @param envir Environment of the returned function. +#' @param cache Cache function. #' @seealso \code{\link{forget}}, \code{\link{is.memoised}}, #' \code{\link{timeout}}, \url{http://en.wikipedia.org/wiki/Memoization} #' @aliases memoise memoize @@ -97,7 +98,7 @@ #' memA4 <- memoise(a, ~timeout(10)) #' memA4(2) #' @importFrom stats setNames -memoise <- memoize <- function(f, ..., envir = environment(f)) { +memoise <- memoize <- function(f, ..., envir = environment(f), cache = cache_local()) { f_formals <- formals(args(f)) if(is.memoised(f)) { stop("`f` must not be memoised.", call. = FALSE) @@ -113,8 +114,6 @@ memoise <- memoize <- function(f, ..., envir = environment(f)) { init_call_args <- setNames(f_formal_name_list, f_formal_names) init_call <- make_call(quote(`_f`), init_call_args) - cache <- new_cache() - validate_formulas(...) additional <- list(...) diff --git a/README.md b/README.md index f2b4566..0219c36 100644 --- a/README.md +++ b/README.md @@ -1,10 +1,19 @@ -# memoise [![Travis-CI Build Status](https://travis-ci.org/hadley/memoise.svg?branch=master)](https://travis-ci.org/hadley/memoise) [![Coverage Status](https://img.shields.io/codecov/c/github/hadley/memoise/master.svg)](https://codecov.io/github/hadley/memoise?branch=master) +# memoise +Forked from [hadley/memoise](https://github.com/hadley/memoise) + +# Installation + +``` +devtools::install_github("danielecook/memoise") +``` + +# Memoization If a function is called multiple times with the same input, you can often speed things up by keeping a cache of known answers that it can retrieve. This is called memoisation . -The `memoise` package provides a simple syntax +This is a fork of the `memoise` package built by Hadley Wickham: [hadley/memoise](https://github.com/hadley/memoise), which provides a simple syntax mf <- memoise(f) @@ -18,4 +27,59 @@ cache with is.memoised(mf) # TRUE is.memoised(f) # FALSE -. + +`memoise` extends upon `memoise` by adding in additional types of caches. Items can be cached using the original cache implemented in `memoise` in addition to other options: + +* [x] Google Datastore +* [x] cache_filesystem allows caching using dropbox/google drive. +* [X] AWS + +# Caches + +## Google Datastore + +Use `cache_datastore` to set up a cache on google datastore. Requires you to set a `project` and `cache_name`. The `cache_name` +is used to set the kind for each entity stored on google datastore. + +```r +library(xmemoise) + +# Generate a memoised function. +mrunif <- memoise(runif, cache = cache_datastore("", "rcache")) + +mrunif(10) # First run, saves cache +mrunif(10) # Loads cache, results should be identical +``` + +## AWS S3 + +Use `cache_s3` to cache objects using s3 storage. Requires you to specify a bucket using `cache_name`. When creating buckets, they must be unique among all s3 users when created. + +```r +Sys.setenv("AWS_ACCESS_KEY_ID" = "", + "AWS_SECRET_ACCESS_KEY" = "") + +mrunif <- memoise(runif, cache = cache_s3("")) + +mrunif(10) # First run, saves cache +mrunif(10) # Loads cache, results should be identical + +``` + +## Filesystem + +`cache_filesystem` can be used to cache between computers using Google Drive or Dropbox. + +``` +dbc <- cache_filesystem("~/Dropbox/.rcache") +mrunif <- memoise(runif, cache = dbc) +mrunif(20) # Results stored in Dropbox .rcache folder will be synced between computers. +``` + +``` +gdc <- cache_filesystem("~/Google Drive/.rcache") +mrunif <- memoise(runif, cache = dbc) +mrunif(20) # Results stored in Google Drive .rcache folder will be synced between computers. +``` + + diff --git a/man/cache_aws_s3.Rd b/man/cache_aws_s3.Rd new file mode 100644 index 0000000..be064af --- /dev/null +++ b/man/cache_aws_s3.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cache_aws_s3.r +\name{cache_aws_s3} +\alias{cache_aws_s3} +\title{Amazon Web Services S3 Cache} +\usage{ +cache_aws_s3(cache_name) +} +\arguments{ +\item{cache_name}{Bucket name for storing cache files.} +} +\description{ +Initiate an Amazon Web Services Cache +} +\examples{ + +\dontrun{ +# Set AWS credentials. +Sys.setenv("AWS_ACCESS_KEY_ID" = "", + "AWS_SECRET_ACCESS_KEY" = "") + +# Set up a unique bucket name. +s3 <- cache_aws_s3("unique-bucket-name") +mem_runif <- memoise(runif, cache = s3) +} + + +} + diff --git a/man/cache_datastore.Rd b/man/cache_datastore.Rd new file mode 100644 index 0000000..bee1637 --- /dev/null +++ b/man/cache_datastore.Rd @@ -0,0 +1,29 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cache_datastore.r +\name{cache_datastore} +\alias{cache_datastore} +\title{Google Datastore Cache} +\usage{ +cache_datastore(project, cache_name = "rcache") +} +\arguments{ +\item{project}{Google Cloud project} + +\item{cache_name}{datastore kind to use for storing cache entities.} +} +\description{ +Initiate a Google Datastore cache. +} +\examples{ +\dontrun{ +ds <- cache_datastore(project = "", cache_name = "rcache") +mem_memoise(runif, cache = ds) +} + +} +\seealso{ +\url{https://cloud.google.com/} + +\url{https://cloud.google.com/datastore/docs/concepts/overview} +} + diff --git a/man/cache_filesystem.Rd b/man/cache_filesystem.Rd new file mode 100644 index 0000000..4193258 --- /dev/null +++ b/man/cache_filesystem.Rd @@ -0,0 +1,33 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cache_filesystem.r +\name{cache_filesystem} +\alias{cache_filesystem} +\title{Filesystem Cache} +\usage{ +cache_filesystem(path) +} +\arguments{ +\item{path}{Directory in which to store cached items.} +} +\description{ +Initiate a filesystem cache. +} +\examples{ + +\dontrun{ +# Use with Dropbox + +db <- cache_filesystem("~/Dropbox/.rcache") + +mem_runif <- memoise(runif, cache = db) + +# Use with Google Drive + +gd <- cache_filesystem("~/Google Drive/.rcache") + +mem_runif <- memoise(runif, cache = gd) + +} + +} + diff --git a/man/cache_local.Rd b/man/cache_local.Rd new file mode 100644 index 0000000..ff0da3a --- /dev/null +++ b/man/cache_local.Rd @@ -0,0 +1,14 @@ +% Generated by roxygen2: do not edit by hand +% Please edit documentation in R/cache_local.r +\name{cache_local} +\alias{cache_local} +\title{In Memory Cache} +\usage{ +cache_local() +} +\description{ +Initiate an in memory cache. + +cache_local() stores cached items in memory. It is the default cache. +} + diff --git a/man/memoise.Rd b/man/memoise.Rd index c3ba39a..97d2c4e 100644 --- a/man/memoise.Rd +++ b/man/memoise.Rd @@ -5,7 +5,7 @@ \alias{memoize} \title{Memoise a function.} \usage{ -memoise(f, ..., envir = environment(f)) +memoise(f, ..., envir = environment(f), cache = cache_local()) } \arguments{ \item{f}{Function of which to create a memoised copy.} @@ -14,6 +14,8 @@ memoise(f, ..., envir = environment(f)) additional restrictions on caching. See Examples for usage.} \item{envir}{Environment of the returned function.} + +\item{cache}{Cache function.} } \description{ \code{mf <- memoise(f)} creates \code{mf}, a memoised copy of