Skip to content

Commit 26e7021

Browse files
committed
objstorage: add readahead configuration
This commit adds a way to configure the read-ahead method, for both cases of "informed readahead" (currently just compactions, but in the future possibly backup scans as well) and "speculative readahead".
1 parent 4a5d7e3 commit 26e7021

File tree

7 files changed

+246
-26
lines changed

7 files changed

+246
-26
lines changed

objstorage/objstorageprovider/provider.go

Lines changed: 48 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,17 @@ type Settings struct {
9292
// out a large chunk of dirty filesystem buffers.
9393
BytesPerSync int
9494

95+
// Local contains fields that are only relevant for files stored on the local
96+
// filesystem.
97+
Local struct {
98+
// TODO(radu): move FSCleaner, NoSyncOnClose, BytesPerSync here.
99+
100+
// ReadaheadConfigFn is a function used to retrieve the current readahead
101+
// mode. This function is run whenever a local object is open for reading.
102+
// If it is nil, DefaultReadaheadConfig is used.
103+
ReadaheadConfigFn func() ReadaheadConfig
104+
}
105+
95106
// Fields here are set only if the provider is to support remote objects
96107
// (experimental).
97108
Remote struct {
@@ -129,6 +140,43 @@ type Settings struct {
129140
}
130141
}
131142

143+
// ReadaheadConfig controls the use of read-ahead.
144+
type ReadaheadConfig struct {
145+
// Informed is the type of read-ahead for operations that are known to read a
146+
// large consecutive chunk of a file.
147+
Informed ReadaheadMode
148+
149+
// Speculative is the type of read-ahead used automatically, when consecutive
150+
// reads are detected.
151+
Speculative ReadaheadMode
152+
}
153+
154+
// DefaultReadaheadConfig is the readahead config used when ReadaheadConfigFn is
155+
// not specified.
156+
var DefaultReadaheadConfig = ReadaheadConfig{
157+
Informed: FadviseSequential,
158+
Speculative: FadviseSequential,
159+
}
160+
161+
// ReadaheadMode indicates the type of read-ahead to use, either for informed
162+
// read-ahead (e.g. compactions) or speculative read-ahead.
163+
type ReadaheadMode uint8
164+
165+
const (
166+
// NoReadahead disables readahead altogether.
167+
NoReadahead ReadaheadMode = iota
168+
169+
// SysReadahead enables the use of SYS_READAHEAD call to prefetch data.
170+
// The prefetch window grows dynamically as consecutive writes are detected.
171+
SysReadahead
172+
173+
// FadviseSequential enables to use of FADV_SEQUENTIAL. For informed
174+
// read-ahead, FADV_SEQUENTIAL is used from the beginning. For speculative
175+
// read-ahead SYS_READAHEAD is first used until the window reaches the maximum
176+
// size, then we siwtch to FADV_SEQUENTIAL.
177+
FadviseSequential
178+
)
179+
132180
// DefaultSettings initializes default settings (with no remote storage),
133181
// suitable for tests and tools.
134182
func DefaultSettings(fs vfs.FS, dirName string) Settings {

objstorage/objstorageprovider/provider_test.go

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -40,7 +40,9 @@ func TestProvider(t *testing.T) {
4040
backings := make(map[string]objstorage.RemoteObjectBacking)
4141
backingHandles := make(map[string]objstorage.RemoteObjectBackingHandle)
4242
var curProvider objstorage.Provider
43+
readaheadConfig := DefaultReadaheadConfig
4344
datadriven.RunTest(t, path, func(t *testing.T, d *datadriven.TestData) string {
45+
readaheadConfig = DefaultReadaheadConfig
4446
scanArgs := func(desc string, args ...interface{}) {
4547
t.Helper()
4648
if len(d.CmdArgs) != len(args) {
@@ -68,6 +70,9 @@ func TestProvider(t *testing.T) {
6870
st.Remote.CreateOnShared = remote.CreateOnSharedAll
6971
st.Remote.CreateOnSharedLocator = ""
7072
}
73+
st.Local.ReadaheadConfigFn = func() ReadaheadConfig {
74+
return readaheadConfig
75+
}
7176
require.NoError(t, fs.MkdirAll(fsDir, 0755))
7277
p, err := Open(st)
7378
require.NoError(t, err)
@@ -170,13 +175,29 @@ func TestProvider(t *testing.T) {
170175
return log.String()
171176

172177
case "read":
173-
forCompaction := false
174-
if len(d.CmdArgs) == 2 && d.CmdArgs[1].Key == "for-compaction" {
175-
d.CmdArgs = d.CmdArgs[:1]
176-
forCompaction = true
178+
forCompaction := d.HasArg("for-compaction")
179+
if arg, ok := d.Arg("readahead"); ok {
180+
var mode ReadaheadMode
181+
switch arg.Vals[0] {
182+
case "off":
183+
mode = NoReadahead
184+
case "sys-readahead":
185+
mode = SysReadahead
186+
case "fadvise-sequential":
187+
mode = FadviseSequential
188+
default:
189+
d.Fatalf(t, "unknown readahead mode %s", arg.Vals[0])
190+
}
191+
if forCompaction {
192+
readaheadConfig.Informed = mode
193+
} else {
194+
readaheadConfig.Speculative = mode
195+
}
177196
}
197+
198+
d.CmdArgs = d.CmdArgs[:1]
178199
var fileNum base.FileNum
179-
scanArgs("<file-num> [for-compaction]", &fileNum)
200+
scanArgs("<file-num> [for-compaction] [readahead|speculative-overhead=off|sys-readahead|fadvise-sequential]", &fileNum)
180201
r, err := curProvider.OpenForReading(ctx, base.FileTypeTable, fileNum.DiskFileNum(), objstorage.OpenOptions{})
181202
if err != nil {
182203
return err.Error()

objstorage/objstorageprovider/testdata/provider/local_readahead

Lines changed: 118 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,3 +64,121 @@ size: 2000000
6464
1000 15000: ok (salt 1)
6565
<local fs> close: p1/000001.sst
6666
<local fs> close: p1/000001.sst
67+
68+
# Test non-default readahead modes.
69+
70+
read 1 readahead=off
71+
0 1000
72+
1000 15000
73+
16000 30000
74+
46000 10000
75+
56000 50000
76+
106000 30000
77+
140000 80000
78+
----
79+
<local fs> open: p1/000001.sst (options: *vfs.randomReadsOption)
80+
size: 2000000
81+
<local fs> read-at(0, 1000): p1/000001.sst
82+
0 1000: ok (salt 1)
83+
<local fs> read-at(1000, 15000): p1/000001.sst
84+
1000 15000: ok (salt 1)
85+
<local fs> read-at(16000, 30000): p1/000001.sst
86+
16000 30000: ok (salt 1)
87+
<local fs> read-at(46000, 10000): p1/000001.sst
88+
46000 10000: ok (salt 1)
89+
<local fs> read-at(56000, 50000): p1/000001.sst
90+
56000 50000: ok (salt 1)
91+
<local fs> read-at(106000, 30000): p1/000001.sst
92+
106000 30000: ok (salt 1)
93+
<local fs> read-at(140000, 80000): p1/000001.sst
94+
140000 80000: ok (salt 1)
95+
<local fs> close: p1/000001.sst
96+
97+
read 1 for-compaction readahead=off
98+
0 1000
99+
1000 15000
100+
16000 30000
101+
46000 10000
102+
56000 50000
103+
106000 30000
104+
140000 80000
105+
----
106+
<local fs> open: p1/000001.sst (options: *vfs.randomReadsOption)
107+
size: 2000000
108+
<local fs> read-at(0, 1000): p1/000001.sst
109+
0 1000: ok (salt 1)
110+
<local fs> read-at(1000, 15000): p1/000001.sst
111+
1000 15000: ok (salt 1)
112+
<local fs> read-at(16000, 30000): p1/000001.sst
113+
16000 30000: ok (salt 1)
114+
<local fs> read-at(46000, 10000): p1/000001.sst
115+
46000 10000: ok (salt 1)
116+
<local fs> read-at(56000, 50000): p1/000001.sst
117+
56000 50000: ok (salt 1)
118+
<local fs> read-at(106000, 30000): p1/000001.sst
119+
106000 30000: ok (salt 1)
120+
<local fs> read-at(140000, 80000): p1/000001.sst
121+
140000 80000: ok (salt 1)
122+
<local fs> close: p1/000001.sst
123+
124+
read 1 readahead=sys-readahead
125+
0 1000
126+
1000 15000
127+
16000 30000
128+
46000 10000
129+
56000 50000
130+
106000 30000
131+
140000 80000
132+
----
133+
<local fs> open: p1/000001.sst (options: *vfs.randomReadsOption)
134+
size: 2000000
135+
<local fs> read-at(0, 1000): p1/000001.sst
136+
0 1000: ok (salt 1)
137+
<local fs> read-at(1000, 15000): p1/000001.sst
138+
1000 15000: ok (salt 1)
139+
<local fs> prefetch(16000, 65536): p1/000001.sst
140+
<local fs> read-at(16000, 30000): p1/000001.sst
141+
16000 30000: ok (salt 1)
142+
<local fs> read-at(46000, 10000): p1/000001.sst
143+
46000 10000: ok (salt 1)
144+
<local fs> prefetch(56000, 131072): p1/000001.sst
145+
<local fs> read-at(56000, 50000): p1/000001.sst
146+
56000 50000: ok (salt 1)
147+
<local fs> read-at(106000, 30000): p1/000001.sst
148+
106000 30000: ok (salt 1)
149+
<local fs> prefetch(140000, 262144): p1/000001.sst
150+
<local fs> read-at(140000, 80000): p1/000001.sst
151+
140000 80000: ok (salt 1)
152+
<local fs> close: p1/000001.sst
153+
154+
# TODO(radu): for informed/sys-readahead, we should start with the maximum
155+
# prefetch window.
156+
read 1 for-compaction readahead=sys-readahead
157+
0 1000
158+
1000 15000
159+
16000 30000
160+
46000 10000
161+
56000 50000
162+
106000 30000
163+
140000 80000
164+
----
165+
<local fs> open: p1/000001.sst (options: *vfs.randomReadsOption)
166+
size: 2000000
167+
<local fs> read-at(0, 1000): p1/000001.sst
168+
0 1000: ok (salt 1)
169+
<local fs> read-at(1000, 15000): p1/000001.sst
170+
1000 15000: ok (salt 1)
171+
<local fs> prefetch(16000, 65536): p1/000001.sst
172+
<local fs> read-at(16000, 30000): p1/000001.sst
173+
16000 30000: ok (salt 1)
174+
<local fs> read-at(46000, 10000): p1/000001.sst
175+
46000 10000: ok (salt 1)
176+
<local fs> prefetch(56000, 131072): p1/000001.sst
177+
<local fs> read-at(56000, 50000): p1/000001.sst
178+
56000 50000: ok (salt 1)
179+
<local fs> read-at(106000, 30000): p1/000001.sst
180+
106000 30000: ok (salt 1)
181+
<local fs> prefetch(140000, 262144): p1/000001.sst
182+
<local fs> read-at(140000, 80000): p1/000001.sst
183+
140000 80000: ok (salt 1)
184+
<local fs> close: p1/000001.sst

objstorage/objstorageprovider/vfs.go

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,11 @@ func (p *provider) vfsOpenForReading(
3131
}
3232
return nil, err
3333
}
34-
return newFileReadable(file, p.st.FS, filename)
34+
readaheadConfig := DefaultReadaheadConfig
35+
if f := p.st.Local.ReadaheadConfigFn; f != nil {
36+
readaheadConfig = f()
37+
}
38+
return newFileReadable(file, p.st.FS, readaheadConfig, filename)
3539
}
3640

3741
func (p *provider) vfsCreate(

objstorage/objstorageprovider/vfs_readable.go

Lines changed: 35 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -27,22 +27,26 @@ type fileReadable struct {
2727

2828
// The following fields are used to possibly open the file again using the
2929
// sequential reads option (see vfsReadHandle).
30-
filename string
31-
fs vfs.FS
30+
filename string
31+
fs vfs.FS
32+
readaheadConfig ReadaheadConfig
3233
}
3334

3435
var _ objstorage.Readable = (*fileReadable)(nil)
3536

36-
func newFileReadable(file vfs.File, fs vfs.FS, filename string) (*fileReadable, error) {
37+
func newFileReadable(
38+
file vfs.File, fs vfs.FS, readaheadConfig ReadaheadConfig, filename string,
39+
) (*fileReadable, error) {
3740
info, err := file.Stat()
3841
if err != nil {
3942
return nil, err
4043
}
4144
r := &fileReadable{
42-
file: file,
43-
size: info.Size(),
44-
filename: filename,
45-
fs: fs,
45+
file: file,
46+
size: info.Size(),
47+
filename: filename,
48+
fs: fs,
49+
readaheadConfig: readaheadConfig,
4650
}
4751
invariants.SetFinalizer(r, func(obj interface{}) {
4852
if obj.(*fileReadable).file != nil {
@@ -81,8 +85,9 @@ func (r *fileReadable) NewReadHandle(_ context.Context) objstorage.ReadHandle {
8185
}
8286

8387
type vfsReadHandle struct {
84-
r *fileReadable
85-
rs readaheadState
88+
r *fileReadable
89+
rs readaheadState
90+
readaheadMode ReadaheadMode
8691

8792
// sequentialFile holds a file descriptor to the same underlying File,
8893
// except with fadvise(FADV_SEQUENTIAL) called on it to take advantage of
@@ -109,8 +114,9 @@ var readHandlePool = sync.Pool{
109114

110115
func (rh *vfsReadHandle) init(r *fileReadable) {
111116
*rh = vfsReadHandle{
112-
r: r,
113-
rs: makeReadaheadState(fileMaxReadaheadSize),
117+
r: r,
118+
rs: makeReadaheadState(fileMaxReadaheadSize),
119+
readaheadMode: r.readaheadConfig.Speculative,
114120
}
115121
}
116122

@@ -127,23 +133,26 @@ func (rh *vfsReadHandle) Close() error {
127133

128134
// ReadAt is part of the objstorage.ReadHandle interface.
129135
func (rh *vfsReadHandle) ReadAt(_ context.Context, p []byte, offset int64) error {
130-
var n int
131-
var err error
132136
if rh.sequentialFile != nil {
133137
// Use OS-level read-ahead.
134-
n, err = rh.sequentialFile.ReadAt(p, offset)
135-
} else {
138+
n, err := rh.sequentialFile.ReadAt(p, offset)
139+
if invariants.Enabled && err == nil && n != len(p) {
140+
panic("short read")
141+
}
142+
return err
143+
}
144+
if rh.readaheadMode != NoReadahead {
136145
if readaheadSize := rh.rs.maybeReadahead(offset, int64(len(p))); readaheadSize > 0 {
137-
if readaheadSize >= fileMaxReadaheadSize {
146+
if rh.readaheadMode == FadviseSequential && readaheadSize >= fileMaxReadaheadSize {
138147
// We've reached the maximum readahead size. Beyond this point, rely on
139148
// OS-level readahead.
140149
rh.switchToOSReadahead()
141150
} else {
142151
_ = rh.r.file.Prefetch(offset, readaheadSize)
143152
}
144153
}
145-
n, err = rh.r.file.ReadAt(p, offset)
146154
}
155+
n, err := rh.r.file.ReadAt(p, offset)
147156
if invariants.Enabled && err == nil && n != len(p) {
148157
panic("short read")
149158
}
@@ -152,10 +161,16 @@ func (rh *vfsReadHandle) ReadAt(_ context.Context, p []byte, offset int64) error
152161

153162
// SetupForCompaction is part of the objstorage.ReadHandle interface.
154163
func (rh *vfsReadHandle) SetupForCompaction() {
155-
rh.switchToOSReadahead()
164+
rh.readaheadMode = rh.r.readaheadConfig.Informed
165+
if rh.readaheadMode == FadviseSequential {
166+
rh.switchToOSReadahead()
167+
}
156168
}
157169

158170
func (rh *vfsReadHandle) switchToOSReadahead() {
171+
if invariants.Enabled && rh.readaheadMode != FadviseSequential {
172+
panic("readheadMode not respected")
173+
}
159174
if rh.sequentialFile != nil {
160175
return
161176
}
@@ -170,8 +185,8 @@ func (rh *vfsReadHandle) switchToOSReadahead() {
170185

171186
// RecordCacheHit is part of the objstorage.ReadHandle interface.
172187
func (rh *vfsReadHandle) RecordCacheHit(_ context.Context, offset, size int64) {
173-
if rh.sequentialFile != nil {
174-
// Using OS-level readahead, so do nothing.
188+
if rh.sequentialFile != nil || rh.readaheadMode == NoReadahead {
189+
// Using OS-level or no readahead, so do nothing.
175190
return
176191
}
177192
rh.rs.recordCacheHit(offset, size)

open.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -310,6 +310,7 @@ func Open(dirname string, opts *Options) (db *DB, _ error) {
310310
NoSyncOnClose: opts.NoSyncOnClose,
311311
BytesPerSync: opts.BytesPerSync,
312312
}
313+
providerSettings.Local.ReadaheadConfigFn = opts.Local.ReadaheadConfigFn
313314
providerSettings.Remote.StorageFactory = opts.Experimental.RemoteStorage
314315
providerSettings.Remote.CreateOnShared = opts.Experimental.CreateOnShared
315316
providerSettings.Remote.CreateOnSharedLocator = opts.Experimental.CreateOnSharedLocator

0 commit comments

Comments
 (0)