Skip to content

Commit b7c47dd

Browse files
committed
Revert "Merge branch 'jt/path-filter-fix' into next"
This reverts commit d99958c, reversing changes made to 996db74.
1 parent c5ef244 commit b7c47dd

File tree

12 files changed

+39
-350
lines changed

12 files changed

+39
-350
lines changed

Documentation/config/commitgraph.txt

Lines changed: 3 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -9,26 +9,6 @@ commitGraph.maxNewFilters::
99
commit-graph write` (c.f., linkgit:git-commit-graph[1]).
1010

1111
commitGraph.readChangedPaths::
12-
Deprecated. Equivalent to commitGraph.changedPathsVersion=-1 if true, and
13-
commitGraph.changedPathsVersion=0 if false. (If commitGraph.changedPathVersion
14-
is also set, commitGraph.changedPathsVersion takes precedence.)
15-
16-
commitGraph.changedPathsVersion::
17-
Specifies the version of the changed-path Bloom filters that Git will read and
18-
write. May be -1, 0, 1, or 2.
19-
+
20-
Defaults to -1.
21-
+
22-
If -1, Git will use the version of the changed-path Bloom filters in the
23-
repository, defaulting to 1 if there are none.
24-
+
25-
If 0, Git will not read any Bloom filters, and will write version 1 Bloom
26-
filters when instructed to write.
27-
+
28-
If 1, Git will only read version 1 Bloom filters, and will write version 1
29-
Bloom filters.
30-
+
31-
If 2, Git will only read version 2 Bloom filters, and will write version 2
32-
Bloom filters.
33-
+
34-
See linkgit:git-commit-graph[1] for more information.
12+
If true, then git will use the changed-path Bloom filters in the
13+
commit-graph file (if it exists, and they are present). Defaults to
14+
true. See linkgit:git-commit-graph[1] for more information.

Documentation/gitformat-commit-graph.txt

Lines changed: 3 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -142,16 +142,13 @@ All multi-byte numbers are in network byte order.
142142

143143
==== Bloom Filter Data (ID: {'B', 'D', 'A', 'T'}) [Optional]
144144
* It starts with header consisting of three unsigned 32-bit integers:
145-
- Version of the hash algorithm being used. We currently support
146-
value 2 which corresponds to the 32-bit version of the murmur3 hash
145+
- Version of the hash algorithm being used. We currently only support
146+
value 1 which corresponds to the 32-bit version of the murmur3 hash
147147
implemented exactly as described in
148148
https://en.wikipedia.org/wiki/MurmurHash#Algorithm and the double
149149
hashing technique using seed values 0x293ae76f and 0x7e646e2 as
150150
described in https://doi.org/10.1007/978-3-540-30494-4_26 "Bloom Filters
151-
in Probabilistic Verification". Version 1 Bloom filters have a bug that appears
152-
when char is signed and the repository has path names that have characters >=
153-
0x80; Git supports reading and writing them, but this ability will be removed
154-
in a future version of Git.
151+
in Probabilistic Verification"
155152
- The number of times a path is hashed and hence the number of bit positions
156153
that cumulatively determine whether a file is present in the commit.
157154
- The minimum number of bits 'b' per entry in the Bloom filter. If the filter

bloom.c

Lines changed: 6 additions & 69 deletions
Original file line numberDiff line numberDiff line change
@@ -29,9 +29,9 @@ static inline unsigned char get_bitmask(uint32_t pos)
2929
return ((unsigned char)1) << (pos & (BITS_PER_WORD - 1));
3030
}
3131

32-
int load_bloom_filter_from_graph(struct commit_graph *g,
33-
struct bloom_filter *filter,
34-
uint32_t graph_pos)
32+
static int load_bloom_filter_from_graph(struct commit_graph *g,
33+
struct bloom_filter *filter,
34+
uint32_t graph_pos)
3535
{
3636
uint32_t lex_pos, start_index, end_index;
3737

@@ -66,64 +66,7 @@ int load_bloom_filter_from_graph(struct commit_graph *g,
6666
* Not considered to be cryptographically secure.
6767
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
6868
*/
69-
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len)
70-
{
71-
const uint32_t c1 = 0xcc9e2d51;
72-
const uint32_t c2 = 0x1b873593;
73-
const uint32_t r1 = 15;
74-
const uint32_t r2 = 13;
75-
const uint32_t m = 5;
76-
const uint32_t n = 0xe6546b64;
77-
int i;
78-
uint32_t k1 = 0;
79-
const char *tail;
80-
81-
int len4 = len / sizeof(uint32_t);
82-
83-
uint32_t k;
84-
for (i = 0; i < len4; i++) {
85-
uint32_t byte1 = (uint32_t)(unsigned char)data[4*i];
86-
uint32_t byte2 = ((uint32_t)(unsigned char)data[4*i + 1]) << 8;
87-
uint32_t byte3 = ((uint32_t)(unsigned char)data[4*i + 2]) << 16;
88-
uint32_t byte4 = ((uint32_t)(unsigned char)data[4*i + 3]) << 24;
89-
k = byte1 | byte2 | byte3 | byte4;
90-
k *= c1;
91-
k = rotate_left(k, r1);
92-
k *= c2;
93-
94-
seed ^= k;
95-
seed = rotate_left(seed, r2) * m + n;
96-
}
97-
98-
tail = (data + len4 * sizeof(uint32_t));
99-
100-
switch (len & (sizeof(uint32_t) - 1)) {
101-
case 3:
102-
k1 ^= ((uint32_t)(unsigned char)tail[2]) << 16;
103-
/*-fallthrough*/
104-
case 2:
105-
k1 ^= ((uint32_t)(unsigned char)tail[1]) << 8;
106-
/*-fallthrough*/
107-
case 1:
108-
k1 ^= ((uint32_t)(unsigned char)tail[0]) << 0;
109-
k1 *= c1;
110-
k1 = rotate_left(k1, r1);
111-
k1 *= c2;
112-
seed ^= k1;
113-
break;
114-
}
115-
116-
seed ^= (uint32_t)len;
117-
seed ^= (seed >> 16);
118-
seed *= 0x85ebca6b;
119-
seed ^= (seed >> 13);
120-
seed *= 0xc2b2ae35;
121-
seed ^= (seed >> 16);
122-
123-
return seed;
124-
}
125-
126-
static uint32_t murmur3_seeded_v1(uint32_t seed, const char *data, size_t len)
69+
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len)
12770
{
12871
const uint32_t c1 = 0xcc9e2d51;
12972
const uint32_t c2 = 0x1b873593;
@@ -188,14 +131,8 @@ void fill_bloom_key(const char *data,
188131
int i;
189132
const uint32_t seed0 = 0x293ae76f;
190133
const uint32_t seed1 = 0x7e646e2c;
191-
uint32_t hash0, hash1;
192-
if (settings->hash_version == 2) {
193-
hash0 = murmur3_seeded_v2(seed0, data, len);
194-
hash1 = murmur3_seeded_v2(seed1, data, len);
195-
} else {
196-
hash0 = murmur3_seeded_v1(seed0, data, len);
197-
hash1 = murmur3_seeded_v1(seed1, data, len);
198-
}
134+
const uint32_t hash0 = murmur3_seeded(seed0, data, len);
135+
const uint32_t hash1 = murmur3_seeded(seed1, data, len);
199136

200137
key->hashes = (uint32_t *)xcalloc(settings->num_hashes, sizeof(uint32_t));
201138
for (i = 0; i < settings->num_hashes; i++)

bloom.h

Lines changed: 3 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -3,16 +3,13 @@
33

44
struct commit;
55
struct repository;
6-
struct commit_graph;
76

87
struct bloom_filter_settings {
98
/*
109
* The version of the hashing technique being used.
11-
* The newest version is 2, which is
10+
* We currently only support version = 1 which is
1211
* the seeded murmur3 hashing technique implemented
13-
* in bloom.c. Bloom filters of version 1 were created
14-
* with prior versions of Git, which had a bug in the
15-
* implementation of the hash function.
12+
* in bloom.c.
1613
*/
1714
uint32_t hash_version;
1815

@@ -71,18 +68,14 @@ struct bloom_key {
7168
uint32_t *hashes;
7269
};
7370

74-
int load_bloom_filter_from_graph(struct commit_graph *g,
75-
struct bloom_filter *filter,
76-
uint32_t graph_pos);
77-
7871
/*
7972
* Calculate the murmur3 32-bit hash value for the given data
8073
* using the given seed.
8174
* Produces a uniformly distributed hash value.
8275
* Not considered to be cryptographically secure.
8376
* Implemented as described in https://en.wikipedia.org/wiki/MurmurHash#Algorithm
8477
*/
85-
uint32_t murmur3_seeded_v2(uint32_t seed, const char *data, size_t len);
78+
uint32_t murmur3_seeded(uint32_t seed, const char *data, size_t len);
8679

8780
void fill_bloom_key(const char *data,
8881
size_t len,

commit-graph.c

Lines changed: 6 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -304,26 +304,17 @@ static int graph_read_oid_lookup(const unsigned char *chunk_start,
304304
return 0;
305305
}
306306

307-
struct graph_read_bloom_data_context {
308-
struct commit_graph *g;
309-
int *commit_graph_changed_paths_version;
310-
};
311-
312307
static int graph_read_bloom_data(const unsigned char *chunk_start,
313308
size_t chunk_size, void *data)
314309
{
315-
struct graph_read_bloom_data_context *c = data;
316-
struct commit_graph *g = c->g;
310+
struct commit_graph *g = data;
317311
uint32_t hash_version;
312+
g->chunk_bloom_data = chunk_start;
318313
hash_version = get_be32(chunk_start);
319314

320-
if (*c->commit_graph_changed_paths_version == -1) {
321-
*c->commit_graph_changed_paths_version = hash_version;
322-
} else if (hash_version != *c->commit_graph_changed_paths_version) {
315+
if (hash_version != 1)
323316
return 0;
324-
}
325317

326-
g->chunk_bloom_data = chunk_start;
327318
g->bloom_filter_settings = xmalloc(sizeof(struct bloom_filter_settings));
328319
g->bloom_filter_settings->hash_version = hash_version;
329320
g->bloom_filter_settings->num_hashes = get_be32(chunk_start + 4);
@@ -410,15 +401,11 @@ struct commit_graph *parse_commit_graph(struct repo_settings *s,
410401
graph->read_generation_data = 1;
411402
}
412403

413-
if (s->commit_graph_changed_paths_version) {
414-
struct graph_read_bloom_data_context context = {
415-
.g = graph,
416-
.commit_graph_changed_paths_version = &s->commit_graph_changed_paths_version
417-
};
404+
if (s->commit_graph_read_changed_paths) {
418405
pair_chunk(cf, GRAPH_CHUNKID_BLOOMINDEXES,
419406
&graph->chunk_bloom_indexes);
420407
read_chunk(cf, GRAPH_CHUNKID_BLOOMDATA,
421-
graph_read_bloom_data, &context);
408+
graph_read_bloom_data, graph);
422409
}
423410

424411
if (graph->chunk_bloom_indexes && graph->chunk_bloom_data) {
@@ -2401,14 +2388,6 @@ int write_commit_graph(struct object_directory *odb,
24012388
ctx->write_generation_data = (get_configured_generation_version(r) == 2);
24022389
ctx->num_generation_data_overflows = 0;
24032390

2404-
if (r->settings.commit_graph_changed_paths_version < -1
2405-
|| r->settings.commit_graph_changed_paths_version > 2) {
2406-
warning(_("attempting to write a commit-graph, but 'commitgraph.changedPathsVersion' (%d) is not supported"),
2407-
r->settings.commit_graph_changed_paths_version);
2408-
return 0;
2409-
}
2410-
bloom_settings.hash_version = r->settings.commit_graph_changed_paths_version == 2
2411-
? 2 : 1;
24122391
bloom_settings.bits_per_entry = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_BITS_PER_ENTRY",
24132392
bloom_settings.bits_per_entry);
24142393
bloom_settings.num_hashes = git_env_ulong("GIT_TEST_BLOOM_SETTINGS_NUM_HASHES",
@@ -2438,7 +2417,7 @@ int write_commit_graph(struct object_directory *odb,
24382417
g = ctx->r->objects->commit_graph;
24392418

24402419
/* We have changed-paths already. Keep them in the next graph */
2441-
if (g && g->bloom_filter_settings) {
2420+
if (g && g->chunk_bloom_data) {
24422421
ctx->changed_paths = 1;
24432422
ctx->bloom_settings = g->bloom_filter_settings;
24442423
}

oss-fuzz/fuzz-commit-graph.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ int LLVMFuzzerTestOneInput(const uint8_t *data, size_t size)
1919
* possible.
2020
*/
2121
the_repository->settings.commit_graph_generation_version = 2;
22-
the_repository->settings.commit_graph_changed_paths_version = 1;
22+
the_repository->settings.commit_graph_read_changed_paths = 1;
2323
g = parse_commit_graph(&the_repository->settings, (void *)data, size);
2424
repo_clear(the_repository);
2525
free_commit_graph(g);

repo-settings.c

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -24,7 +24,6 @@ void prepare_repo_settings(struct repository *r)
2424
int value;
2525
const char *strval;
2626
int manyfiles;
27-
int read_changed_paths;
2827

2928
if (!r->gitdir)
3029
BUG("Cannot add settings for uninitialized repository");
@@ -55,10 +54,7 @@ void prepare_repo_settings(struct repository *r)
5554
/* Commit graph config or default, does not cascade (simple) */
5655
repo_cfg_bool(r, "core.commitgraph", &r->settings.core_commit_graph, 1);
5756
repo_cfg_int(r, "commitgraph.generationversion", &r->settings.commit_graph_generation_version, 2);
58-
repo_cfg_bool(r, "commitgraph.readchangedpaths", &read_changed_paths, 1);
59-
repo_cfg_int(r, "commitgraph.changedpathsversion",
60-
&r->settings.commit_graph_changed_paths_version,
61-
read_changed_paths ? -1 : 0);
57+
repo_cfg_bool(r, "commitgraph.readchangedpaths", &r->settings.commit_graph_read_changed_paths, 1);
6258
repo_cfg_bool(r, "gc.writecommitgraph", &r->settings.gc_write_commit_graph, 1);
6359
repo_cfg_bool(r, "fetch.writecommitgraph", &r->settings.fetch_write_commit_graph, 0);
6460

repository.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -29,7 +29,7 @@ struct repo_settings {
2929

3030
int core_commit_graph;
3131
int commit_graph_generation_version;
32-
int commit_graph_changed_paths_version;
32+
int commit_graph_read_changed_paths;
3333
int gc_write_commit_graph;
3434
int fetch_write_commit_graph;
3535
int command_requires_full_index;

t/helper/test-bloom.c

Lines changed: 1 addition & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -50,7 +50,6 @@ static void get_bloom_filter_for_commit(const struct object_id *commit_oid)
5050

5151
static const char *bloom_usage = "\n"
5252
" test-tool bloom get_murmur3 <string>\n"
53-
" test-tool bloom get_murmur3_seven_highbit\n"
5453
" test-tool bloom generate_filter <string> [<string>...]\n"
5554
" test-tool bloom get_filter_for_commit <commit-hex>\n";
5655

@@ -65,13 +64,7 @@ int cmd__bloom(int argc, const char **argv)
6564
uint32_t hashed;
6665
if (argc < 3)
6766
usage(bloom_usage);
68-
hashed = murmur3_seeded_v2(0, argv[2], strlen(argv[2]));
69-
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
70-
}
71-
72-
if (!strcmp(argv[1], "get_murmur3_seven_highbit")) {
73-
uint32_t hashed;
74-
hashed = murmur3_seeded_v2(0, "\x99\xaa\xbb\xcc\xdd\xee\xff", 7);
67+
hashed = murmur3_seeded(0, argv[2], strlen(argv[2]));
7568
printf("Murmur3 Hash with seed=0:0x%08x\n", hashed);
7669
}
7770

0 commit comments

Comments
 (0)