Skip to content

Commit b3b9ae6

Browse files
committed
Support for caching on local filesystem and external data stores
Squashed commit of the following: commit baed71c Author: Daniel E Cook <[email protected]> Date: Sun Jul 31 13:36:28 2016 -0500 Update README.md commit d86494d Author: Daniel E Cook <[email protected]> Date: Sun Jul 31 13:31:42 2016 -0500 Update README.md commit f6ddf99 Author: Daniel E Cook <[email protected]> Date: Sun Jul 31 13:29:05 2016 -0500 Update README.md commit f60558d Author: Daniel E Cook <[email protected]> Date: Thu Jul 28 19:26:18 2016 -0500 Update README.md commit 5b3e9ee Author: Daniel Cook <[email protected]> Date: Thu Jul 28 19:25:47 2016 -0500 improve documentation with examples. commit 9f4a914 Author: Dan Cook <[email protected]> Date: Thu Jul 28 13:56:48 2016 -0500 Fixed final warnings. commit 324e5a5 Author: Dan Cook <[email protected]> Date: Thu Jul 28 13:42:23 2016 -0500 remove documentation items not allowed... commit c32fd35 Author: Dan Cook <[email protected]> Date: Thu Jul 28 13:04:05 2016 -0500 Improve documentation. commit a952d5d Merge: f435e95 68e8636 Author: Dan Cook <[email protected]> Date: Thu Jul 28 12:51:14 2016 -0500 Merge branch 'master' of https://github.com/danielecook/xmemoise commit f435e95 Author: Dan Cook <[email protected]> Date: Thu Jul 28 12:51:04 2016 -0500 Restructure for fork commit 68e8636 Author: Daniel E Cook <[email protected]> Date: Wed Jul 27 16:31:16 2016 -0500 Update README.md commit 25cbe38 Author: Daniel Cook <[email protected]> Date: Tue Jul 26 08:56:52 2016 -0500 Remotes were fine... commit 4c6df0d Author: Daniel Cook <[email protected]> Date: Tue Jul 26 08:51:49 2016 -0500 use git repo instead commit df3b92f Author: Daniel Cook <[email protected]> Date: Tue Jul 26 08:51:04 2016 -0500 fix remotes. commit cd16c33 Author: Daniel Cook <[email protected]> Date: Tue Jul 26 08:50:38 2016 -0500 fix remotes. commit 73c11b4 Author: Daniel Cook <[email protected]> Date: Tue Jul 26 08:49:49 2016 -0500 fix remotes commit 6fad8e3 Author: Daniel Cook <[email protected]> Date: Tue Jul 26 08:49:21 2016 -0500 fix description commit c2f0bce Author: Daniel Cook <[email protected]> Date: Tue Jul 26 08:45:27 2016 -0500 Update description. commit 27fe9e8 Author: Daniel Cook <[email protected]> Date: Tue Jul 26 08:39:02 2016 -0500 Rename, update commit 9e1ac4c Author: Daniel E Cook <[email protected]> Date: Mon Jul 25 16:34:20 2016 -0500 Update README.md commit 0448ff8 Author: Dan Cook <[email protected]> Date: Mon Jul 25 16:33:04 2016 -0500 Cleanup and readme. commit fc44834 Author: Dan Cook <[email protected]> Date: Mon Jul 25 16:23:16 2016 -0500 Add AWS Service commit 2e690c6 Author: Dan Cook <[email protected]> Date: Mon Jul 25 14:00:27 2016 -0500 cleanup namespace commit 20ce739 Author: Dan Cook <[email protected]> Date: Mon Jul 25 13:19:12 2016 -0500 Finish datastore cache. commit 514514a Author: Dan Cook <[email protected]> Date: Mon Jul 25 12:39:27 2016 -0500 Fix cache return value and don't automatically reset cache. commit 1e8e809 Author: Dan Cook <[email protected]> Date: Mon Jul 25 12:32:36 2016 -0500 Rewrite to use googleauthR Add cache_reset commit 56a6c8f Author: Daniel Cook <[email protected]> Date: Mon Jul 25 09:17:15 2016 -0500 update readme. commit 51a4544 Author: Daniel Cook <[email protected]> Date: Mon Jul 25 09:16:55 2016 -0500 Add to do and additional cache types. commit e3569a6 Author: Daniel Cook <[email protected]> Date: Sun Jul 24 23:32:53 2016 -0500 add error messages and compression. commit fb271f1 Author: Daniel Cook <[email protected]> Date: Sun Jul 24 19:44:08 2016 -0500 Add google datastore memoization option.
1 parent a411634 commit b3b9ae6

15 files changed

+479
-13
lines changed

.Rbuildignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,3 +3,4 @@
33
^\.Rproj\.user$
44
^revdep$
55
^cran-comments\.md$
6+
^\.httr-oauth$

.gitignore

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
11
.Rproj.user
22
.Rhistory
33
.RData
4+
.httr-oauth

DESCRIPTION

Lines changed: 14 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,27 @@
11
Encoding: UTF-8
22
Package: memoise
33
Title: Memoisation of Functions
4-
Version: 1.0.0.9000
4+
Description: Memoisation allows the results of functions to be cached based on input parameters
5+
xmemoise offers both local and remote caches, enabling memoisation across computers.
6+
Additional cache types include google datastore and amazon aws.
7+
Version: 1.0.1
58
Authors@R: c(
69
person("Hadley", "Wickham", , "[email protected]", role = "aut"),
710
person("Jim", "Hester", , "[email protected]", role = c("aut", "cre")),
8-
person("Kirill", "Müller", , "[email protected]", role = "aut"))
11+
person("Kirill", "Müller", , "[email protected]", role = "aut"),
12+
person("Daniel", "Cook", , "[email protected]", role = "aut"))
913
Description: Cache the results of a function so that when you call it
1014
again with the same arguments it returns the pre-computed value.
11-
URL: https://github.com/hadley/memoise
15+
URL: https://github.com/danielecook/xmemoise
1216
BugReports: https://github.com/hadley/memoise/issues
1317
Imports:
14-
digest (>= 0.6.3)
18+
digest (>= 0.6.3),
19+
base64enc
1520
Suggests:
16-
testthat
21+
testthat,
22+
googleAuthR,
23+
aws.s3
24+
Remotes:
25+
cloudyr/aws.s3
1726
License: MIT + file LICENSE
1827
RoxygenNote: 5.0.1

NAMESPACE

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,10 @@
11
# Generated by roxygen2: do not edit by hand
22

33
S3method(print,memoised)
4+
export(cache_aws_s3)
5+
export(cache_datastore)
6+
export(cache_filesystem)
7+
export(cache_local)
48
export(forget)
59
export(has_cache)
610
export(is.memoised)

R/cache_aws_s3.r

Lines changed: 72 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,72 @@
1+
#' @name cache_aws_s3
2+
#' @title Amazon Web Services S3 Cache
3+
#' @description Initiate an Amazon Web Services Cache
4+
#'
5+
#' @examples
6+
#'
7+
#' \dontrun{
8+
#' # Set AWS credentials.
9+
#' Sys.setenv("AWS_ACCESS_KEY_ID" = "<access key>",
10+
#' "AWS_SECRET_ACCESS_KEY" = "<access secret>")
11+
#'
12+
#' # Set up a unique bucket name.
13+
#' s3 <- cache_aws_s3("unique-bucket-name")
14+
#' mem_runif <- memoise(runif, cache = s3)
15+
#' }
16+
#'
17+
#'
18+
#' @param cache_name Bucket name for storing cache files.
19+
#' @export
20+
21+
cache_aws_s3 <- function(cache_name) {
22+
23+
# Can't get this check to pass...
24+
# if (!("aws.s3" %in% installed.packages()[,"Package"])) { stop("aws.s3 required for datastore cache.") }
25+
26+
if (!(aws.s3::bucket_exists(cache_name))) {
27+
aws.s3::put_bucket(cache_name)
28+
if (!(aws.s3::bucket_exists(cache_name))) {
29+
stop("Cache name must use unique bucket name")
30+
}
31+
}
32+
33+
cache <- NULL
34+
cache_reset <- function() {
35+
aws.s3::delete_bucket(cache_name)
36+
aws.s3::put_bucket(cache_name)
37+
}
38+
39+
cache_set <- function(key, value) {
40+
tfile = tempfile()
41+
save(value, file = tfile)
42+
aws.s3::put_object(tfile, object = key, bucket = cache_name)
43+
}
44+
45+
cache_get <- function(key) {
46+
suppressWarnings(aws.s3::s3load(object = key, bucket = cache_name))
47+
base::get(ls()[ls() != "key"][[1]])
48+
}
49+
50+
cache_has_key <- function(key) {
51+
aws.s3::head_object(object = key, bucket = cache_name)
52+
}
53+
54+
cache_keys <- function() {
55+
items <- lapply(aws.s3::get_bucket(bucket = cache_name), function(x) {
56+
if ("Key" %in% names(x)) {
57+
return(x$Key)
58+
} else {
59+
return(NULL)
60+
}
61+
})
62+
unlist(Filter(Negate(is.null), items))
63+
}
64+
65+
list(
66+
reset = cache_reset,
67+
set = cache_set,
68+
get = cache_get,
69+
has_key = cache_has_key,
70+
keys = cache_keys
71+
)
72+
}

R/cache_datastore.r

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
#' @name cache_datastore
2+
#' @title Google Datastore Cache
3+
#' @description Initiate a Google Datastore cache.
4+
#' @param project Google Cloud project
5+
#' @param cache_name datastore kind to use for storing cache entities.
6+
#'
7+
#' @examples
8+
#' \dontrun{
9+
#' ds <- cache_datastore(project = "<project-id>", cache_name = "rcache")
10+
#' mem_memoise(runif, cache = ds)
11+
#' }
12+
#'
13+
#' @seealso \url{https://cloud.google.com/}
14+
#' @seealso \url{https://cloud.google.com/datastore/docs/concepts/overview}
15+
#'
16+
#' @export
17+
18+
cache_datastore <- function(project, cache_name = "rcache") {
19+
20+
if (!("googleAuthR" %in% installed.packages()[,"Package"])) { stop("googleAuthR required for datastore cache.") }
21+
22+
options("googleAuthR.scopes.selected" = c("https://www.googleapis.com/auth/datastore",
23+
"https://www.googleapis.com/auth/userinfo.email"))
24+
25+
googleAuthR::gar_auth()
26+
27+
base_url <- paste0("https://datastore.googleapis.com/v1beta3/projects/", project)
28+
29+
transaction <- googleAuthR::gar_api_generator(paste0(base_url, ":beginTransaction"),
30+
"POST",
31+
data_parse_function = function(x) x$transaction)
32+
33+
commit_ds <- googleAuthR::gar_api_generator(paste0(base_url, ":commit"),
34+
"POST",
35+
data_parse_function = function(x) x)
36+
37+
load_ds <- googleAuthR::gar_api_generator(paste0(base_url, ":lookup"),
38+
"POST",
39+
data_parse_function = function(resp) {
40+
# Unserialize and return
41+
if ("found" %in% names(resp)) {
42+
resp <- resp$found
43+
value <- resp$entity$properties$object$blobValue
44+
response <- unserialize(memDecompress(base64enc::base64decode(value), type = "gzip"))
45+
} else if ("missing" %in% names(resp)) {
46+
"!cache-not-found"
47+
} else {
48+
stop("Error")
49+
}
50+
})
51+
52+
query_ds <- googleAuthR::gar_api_generator(paste0(base_url, ":runQuery"),
53+
"POST",
54+
data_parse_function = function(resp) resp)
55+
56+
57+
cache_reset <- function() {
58+
query_results <- query_ds(the_body = list(gqlQuery = list(queryString = paste0("SELECT * FROM ", cache_name))))
59+
while((query_results$batch$moreResults != "NO_MORE_RESULTS") | is.null(query_results$batch$entityResults) == FALSE) {
60+
61+
62+
ids <- (sapply(query_results$batch$entityResults$entity$key$path, function(x) x$name))
63+
64+
item_groups <- split(ids, (1:length(ids)) %/% 25)
65+
sapply(item_groups, function(idset) {
66+
mutations <- lapply(idset, function(x) {
67+
c(list("delete" = list(path = list(kind = cache_name, name = x))))
68+
})
69+
body <- list(mutations = mutations, transaction = transaction())
70+
resp <- try(commit_ds(the_body = body), silent = T)
71+
message("Clearing Cache")
72+
})
73+
query_results <- query_ds(the_body = list(gqlQuery = list(queryString = paste0("SELECT * FROM ", cache_name))))
74+
}
75+
}
76+
77+
78+
cache_set <- function(key, value) {
79+
# Serialize value
80+
svalue <- base64enc::base64encode(memCompress(serialize(value, NULL, ascii=T), type = "gzip"))
81+
path_item <- list(
82+
kind = cache_name,
83+
name = key
84+
)
85+
prop = list(
86+
object = list(blobValue = svalue, excludeFromIndexes = T)
87+
)
88+
89+
transaction_id <- transaction()
90+
91+
key_obj <- c(list(key = list(path = path_item),
92+
properties = prop))
93+
mutation = list()
94+
mutation[["upsert"]] = key_obj
95+
body <- list(mutations = mutation,
96+
transaction = transaction_id
97+
)
98+
99+
resp <- try(commit_ds(the_body = body), silent = T)
100+
if (class(resp) == "try-error") {
101+
warning(attr(resp, "condition"))
102+
}
103+
}
104+
105+
cache_get <- function(key) {
106+
path_item <- list(
107+
kind = cache_name,
108+
name = key
109+
)
110+
111+
resp <- load_ds(the_body = list(keys = list(path = path_item)))
112+
suppressWarnings( if(resp == "!cache-not-found") {
113+
stop("Cache Not Found")
114+
})
115+
resp
116+
}
117+
118+
cache_has_key <- function(key) {
119+
res <- try(suppressWarnings(cache_get(key)), silent = TRUE)
120+
if (class(res) != "try-error") {
121+
message("Using Cached Version")
122+
}
123+
class(res) != "try-error"
124+
}
125+
126+
list(
127+
reset = cache_reset,
128+
set = cache_set,
129+
get = cache_get,
130+
has_key = cache_has_key,
131+
keys = function() message("Keys can't be listed with the google datastore cache.")
132+
)
133+
}
134+
135+
136+

R/cache_filesystem.r

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,65 @@
1+
#' @name cache_filesystem
2+
#' @title Filesystem Cache
3+
#' @description
4+
#' Initiate a filesystem cache.
5+
#'
6+
#' @param path Directory in which to store cached items.
7+
#'
8+
#' @examples
9+
#'
10+
#' \dontrun{
11+
#' # Use with Dropbox
12+
#'
13+
#' db <- cache_filesystem("~/Dropbox/.rcache")
14+
#'
15+
#' mem_runif <- memoise(runif, cache = db)
16+
#'
17+
#' # Use with Google Drive
18+
#'
19+
#' gd <- cache_filesystem("~/Google Drive/.rcache")
20+
#'
21+
#' mem_runif <- memoise(runif, cache = gd)
22+
#'
23+
#' }
24+
#'
25+
#' @export
26+
27+
cache_filesystem <- function(path) {
28+
29+
dir.create(file.path(path), showWarnings = FALSE)
30+
31+
cache_reset <- function() {
32+
cache_files <- list.files(path, full.names = TRUE)
33+
# Use an environment for loaded items.
34+
cache <- new.env(TRUE, emptyenv())
35+
if (length(cache_files) > 0) {
36+
rm_status <- file.remove(list.files(path, full.names = TRUE))
37+
if (rm_status) {
38+
message("Cached files removed.")
39+
}
40+
} else {
41+
message("No files in Cache.")
42+
}
43+
}
44+
45+
cache_set <- function(key, value) {
46+
save(value, file = paste(path, key, sep="/"))
47+
}
48+
49+
cache_get <- function(key) {
50+
load(file = paste(path, key, sep="/"))
51+
value
52+
}
53+
54+
cache_has_key <- function(key) {
55+
file.exists(paste(path, key, sep="/"))
56+
}
57+
58+
list(
59+
reset = cache_reset,
60+
set = cache_set,
61+
get = cache_get,
62+
has_key = cache_has_key,
63+
keys = function() list.files(path)
64+
)
65+
}

R/cache.r renamed to R/cache_local.r

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,12 @@
1-
new_cache <- function() {
1+
#' @name cache_local
2+
#' @title In Memory Cache
3+
#' @description Initiate an in memory cache.
4+
#'
5+
#' cache_local() stores cached items in memory. It is the default cache.
6+
#'
7+
#' @export
8+
9+
cache_local <- function() {
210

311
cache <- NULL
412
cache_reset <- function() {

R/memoise.r

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,7 @@
3838
#' @param ... optional variables specified as formulas with no RHS to use as
3939
#' additional restrictions on caching. See Examples for usage.
4040
#' @param envir Environment of the returned function.
41+
#' @param cache Cache function.
4142
#' @seealso \code{\link{forget}}, \code{\link{is.memoised}},
4243
#' \code{\link{timeout}}, \url{http://en.wikipedia.org/wiki/Memoization}
4344
#' @aliases memoise memoize
@@ -97,7 +98,7 @@
9798
#' memA4 <- memoise(a, ~timeout(10))
9899
#' memA4(2)
99100
#' @importFrom stats setNames
100-
memoise <- memoize <- function(f, ..., envir = environment(f)) {
101+
memoise <- memoize <- function(f, ..., envir = environment(f), cache = cache_local()) {
101102
f_formals <- formals(args(f))
102103
if(is.memoised(f)) {
103104
stop("`f` must not be memoised.", call. = FALSE)
@@ -113,8 +114,6 @@ memoise <- memoize <- function(f, ..., envir = environment(f)) {
113114
init_call_args <- setNames(f_formal_name_list, f_formal_names)
114115
init_call <- make_call(quote(`_f`), init_call_args)
115116

116-
cache <- new_cache()
117-
118117
validate_formulas(...)
119118
additional <- list(...)
120119

0 commit comments

Comments
 (0)