diff --git a/Makefile b/Makefile index a9fa8f8208fbdb..58b870dc2c6693 100644 --- a/Makefile +++ b/Makefile @@ -875,6 +875,7 @@ TEST_BUILTINS_OBJS += test-submodule-config.o TEST_BUILTINS_OBJS += test-submodule-nested-repo-config.o TEST_BUILTINS_OBJS += test-submodule.o TEST_BUILTINS_OBJS += test-subprocess.o +TEST_BUILTINS_OBJS += test-synthesize.o TEST_BUILTINS_OBJS += test-trace2.o TEST_BUILTINS_OBJS += test-truncate.o TEST_BUILTINS_OBJS += test-userdiff.o diff --git a/builtin/index-pack.c b/builtin/index-pack.c index b67fb0256cc831..1ea64089055887 100644 --- a/builtin/index-pack.c +++ b/builtin/index-pack.c @@ -37,7 +37,7 @@ static const char index_pack_usage[] = struct object_entry { struct pack_idx_entry idx; - unsigned long size; + size_t size; unsigned char hdr_size; signed char type; signed char real_type; @@ -469,7 +469,7 @@ static int is_delta_type(enum object_type type) return (type == OBJ_REF_DELTA || type == OBJ_OFS_DELTA); } -static void *unpack_entry_data(off_t offset, unsigned long size, +static void *unpack_entry_data(off_t offset, size_t size, enum object_type type, struct object_id *oid) { static char fixed_buf[8192]; @@ -524,7 +524,8 @@ static void *unpack_raw_entry(struct object_entry *obj, struct object_id *oid) { unsigned char *p; - unsigned long size, c; + size_t size; + unsigned long c; off_t base_offset; unsigned shift; void *data; @@ -542,7 +543,7 @@ static void *unpack_raw_entry(struct object_entry *obj, p = fill(1); c = *p; use(1); - size += (c & 0x7f) << shift; + size += ((size_t)c & 0x7f) << shift; shift += 7; } obj->size = size; diff --git a/builtin/unpack-objects.c b/builtin/unpack-objects.c index 6fc64e9e4b8d5a..883440ccafef19 100644 --- a/builtin/unpack-objects.c +++ b/builtin/unpack-objects.c @@ -533,7 +533,8 @@ static void unpack_one(unsigned nr) { unsigned shift; unsigned char *pack; - unsigned long size, c; + size_t size; + unsigned long c; enum object_type type; obj_list[nr].offset = consumed_bytes; @@ -548,7 +549,7 @@ static void unpack_one(unsigned nr) pack = fill(1); c = *pack; use(1); - size += (c & 0x7f) << shift; + size += ((size_t)c & 0x7f) << shift; shift += 7; } diff --git a/compat/zlib-compat.h b/compat/zlib-compat.h index ac0827662298af..5078c5ef6ce0e8 100644 --- a/compat/zlib-compat.h +++ b/compat/zlib-compat.h @@ -7,6 +7,8 @@ # define z_stream_s zng_stream_s # define gz_header_s zng_gz_header_s +# define adler32(adler, buf, len) zng_adler32(adler, buf, len) + # define crc32(crc, buf, len) zng_crc32(crc, buf, len) # define inflate(strm, bits) zng_inflate(strm, bits) diff --git a/delta.h b/delta.h index 8a56ec07992c75..fad68cfc45f6f4 100644 --- a/delta.h +++ b/delta.h @@ -86,8 +86,11 @@ void *patch_delta(const void *src_buf, unsigned long src_size, * This must be called twice on the delta data buffer, first to get the * expected source buffer size, and again to get the target buffer size. */ -static inline unsigned long get_delta_hdr_size(const unsigned char **datap, - const unsigned char *top) +/* + * Size_t variant that doesn't truncate - use for >4GB objects on Windows. + */ +static inline size_t get_delta_hdr_size_sz(const unsigned char **datap, + const unsigned char *top) { const unsigned char *data = *datap; size_t cmd, size = 0; @@ -98,6 +101,13 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap, i += 7; } while (cmd & 0x80 && data < top); *datap = data; + return size; +} + +static inline unsigned long get_delta_hdr_size(const unsigned char **datap, + const unsigned char *top) +{ + size_t size = get_delta_hdr_size_sz(datap, top); return cast_size_t_to_ulong(size); } diff --git a/git-zlib.c b/git-zlib.c index df9604910e3fdf..b91cb323aee916 100644 --- a/git-zlib.c +++ b/git-zlib.c @@ -30,6 +30,9 @@ static const char *zerr_to_string(int status) */ /* #define ZLIB_BUF_MAX ((uInt)-1) */ #define ZLIB_BUF_MAX ((uInt) 1024 * 1024 * 1024) /* 1GB */ + +/* uLong is 32-bit on Windows, even on 64-bit systems */ +#define ULONG_MAX_VALUE maximum_unsigned_value_of_type(uLong) static inline uInt zlib_buf_cap(unsigned long len) { return (ZLIB_BUF_MAX < len) ? ZLIB_BUF_MAX : len; @@ -39,31 +42,37 @@ static void zlib_pre_call(git_zstream *s) { s->z.next_in = s->next_in; s->z.next_out = s->next_out; - s->z.total_in = s->total_in; - s->z.total_out = s->total_out; + s->z.total_in = (uLong)(s->total_in & ULONG_MAX_VALUE); + s->z.total_out = (uLong)(s->total_out & ULONG_MAX_VALUE); s->z.avail_in = zlib_buf_cap(s->avail_in); s->z.avail_out = zlib_buf_cap(s->avail_out); } static void zlib_post_call(git_zstream *s, int status) { - unsigned long bytes_consumed; - unsigned long bytes_produced; + size_t bytes_consumed; + size_t bytes_produced; bytes_consumed = s->z.next_in - s->next_in; bytes_produced = s->z.next_out - s->next_out; - if (s->z.total_out != s->total_out + bytes_produced) + /* + * zlib's total_out/total_in are uLong which may wrap for >4GB. + * We track our own totals and verify only the low bits match. + */ + if ((s->z.total_out & ULONG_MAX_VALUE) != + ((s->total_out + bytes_produced) & ULONG_MAX_VALUE)) BUG("total_out mismatch"); /* * zlib does not update total_in when it returns Z_NEED_DICT, * causing a mismatch here. Skip the sanity check in that case. */ if (status != Z_NEED_DICT && - s->z.total_in != s->total_in + bytes_consumed) + (s->z.total_in & ULONG_MAX_VALUE) != + ((s->total_in + bytes_consumed) & ULONG_MAX_VALUE)) BUG("total_in mismatch"); - s->total_out = s->z.total_out; - s->total_in = s->z.total_in; + s->total_out += bytes_produced; + s->total_in += bytes_consumed; /* zlib-ng marks `next_in` as `const`, so we have to cast it away. */ s->next_in = (unsigned char *) s->z.next_in; s->next_out = s->z.next_out; diff --git a/git-zlib.h b/git-zlib.h index 0e66fefa8c9f05..44380e8ad38305 100644 --- a/git-zlib.h +++ b/git-zlib.h @@ -7,8 +7,8 @@ typedef struct git_zstream { struct z_stream_s z; unsigned long avail_in; unsigned long avail_out; - unsigned long total_in; - unsigned long total_out; + size_t total_in; + size_t total_out; unsigned char *next_in; unsigned char *next_out; } git_zstream; diff --git a/object-file.c b/object-file.c index e55bf1bfff670f..6baa36526ef9f6 100644 --- a/object-file.c +++ b/object-file.c @@ -1086,7 +1086,7 @@ int odb_source_loose_write_stream(struct odb_source *source, } while (ret == Z_OK || ret == Z_BUF_ERROR); if (stream.total_in != len + hdrlen) - die(_("write stream object %ld != %"PRIuMAX), stream.total_in, + die(_("write stream object %"PRIuMAX" != %"PRIuMAX), (uintmax_t)stream.total_in, (uintmax_t)len + hdrlen); /* @@ -2108,6 +2108,7 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct object_info oi = OBJECT_INFO_INIT; struct odb_loose_read_stream *st; unsigned long mapsize; + unsigned long size_ul; void *mapped; mapped = odb_source_loose_map_object(source, oid, &mapsize); @@ -2131,11 +2132,18 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out, goto error; } - oi.sizep = &st->base.size; + /* + * object_info.sizep is unsigned long* (32-bit on Windows), but + * st->base.size is size_t (64-bit). Use temporary variable. + * Note: loose objects >4GB would still truncate here, but such + * large loose objects are uncommon (they'd normally be packed). + */ + oi.sizep = &size_ul; oi.typep = &st->base.type; if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) goto error; + st->base.size = size_ul; st->mapped = mapped; st->mapsize = mapsize; diff --git a/odb/streaming.c b/odb/streaming.c index 4a4474f891a07f..bd460f9adc2cb2 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -158,15 +158,26 @@ static int open_istream_incore(struct odb_read_stream **out, .base.read = read_istream_incore, }; struct odb_incore_read_stream *st; + unsigned long size_ul; int ret; oi.typep = &stream.base.type; - oi.sizep = &stream.base.size; + /* + * object_info.sizep is unsigned long* (32-bit on Windows), but + * stream.base.size is size_t (64-bit). We use a temporary variable + * because the types are incompatible. Note: this path still truncates + * for >4GB objects, but large objects should use pack streaming + * (packfile_store_read_object_stream) which handles size_t properly. + * This incore fallback is only used for small objects or when pack + * streaming is unavailable. + */ + oi.sizep = &size_ul; oi.contentp = (void **)&stream.buf; ret = odb_read_object_info_extended(odb, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; + stream.base.size = size_ul; CALLOC_ARRAY(st, 1); *st = stream; diff --git a/odb/streaming.h b/odb/streaming.h index c7861f7e13c606..517e2ea2d3f5c3 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -21,7 +21,7 @@ struct odb_read_stream { odb_read_stream_close_fn close; odb_read_stream_read_fn read; enum object_type type; - unsigned long size; /* inflated size of full object */ + size_t size; /* inflated size of full object */ }; /* diff --git a/packfile.c b/packfile.c index 402c3b5dc73131..e198c9793f089f 100644 --- a/packfile.c +++ b/packfile.c @@ -1130,8 +1130,8 @@ unsigned long repo_approximate_object_count(struct repository *r) return r->objects->approximate_object_count; } -unsigned long unpack_object_header_buffer(const unsigned char *buf, - unsigned long len, enum object_type *type, unsigned long *sizep) +static unsigned long unpack_object_header_buffer_internal(const unsigned char *buf, + unsigned long len, enum object_type *type, size_t *sizep) { unsigned shift; size_t size, c; @@ -1142,7 +1142,11 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, size = c & 15; shift = 4; while (c & 0x80) { - if (len <= used || (bitsizeof(long) - 7) < shift) { + /* + * Each continuation byte adds 7 bits. Ensure shift won't + * overflow size_t (use size_t not long for 64-bit on Windows). + */ + if (len <= used || (bitsizeof(size_t) - 7) < shift) { error("bad object header"); size = used = 0; break; @@ -1151,13 +1155,25 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, size = st_add(size, st_left_shift(c & 0x7f, shift)); shift += 7; } + *sizep = size; + return used; +} + +unsigned long unpack_object_header_buffer(const unsigned char *buf, + unsigned long len, enum object_type *type, unsigned long *sizep) +{ + size_t size; + unsigned long used = unpack_object_header_buffer_internal(buf, len, type, &size); *sizep = cast_size_t_to_ulong(size); return used; } -unsigned long get_size_from_delta(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos) +/* + * Size_t variant for >4GB delta results on Windows. + */ +static size_t get_size_from_delta_sz(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos) { const unsigned char *data; unsigned char delta_head[20], *in; @@ -1204,10 +1220,44 @@ unsigned long get_size_from_delta(struct packed_git *p, data = delta_head; /* ignore base size */ - get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); + get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head)); /* Read the result size */ - return get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); + return get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head)); +} + +unsigned long get_size_from_delta(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos) +{ + size_t size = get_size_from_delta_sz(p, w_curs, curpos); + return cast_size_t_to_ulong(size); +} + +/* + * Like unpack_object_header(), but returns size via size_t* instead of + * unsigned long*. This is needed for >4GB objects on Windows where + * unsigned long is 32-bit but size_t is 64-bit. Used by streaming code + * to get the correct untruncated object size. + */ +static int unpack_object_header_sz(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + size_t *sizep) +{ + unsigned char *base; + unsigned long left; + unsigned long used; + enum object_type type; + + base = use_pack(p, w_curs, *curpos, &left); + used = unpack_object_header_buffer_internal(base, left, &type, sizep); + if (!used) { + type = OBJ_BAD; + } else + *curpos += used; + + return type; } int unpack_object_header(struct packed_git *p, @@ -1582,7 +1632,7 @@ int packed_object_info(struct packed_git *p, off_t obj_offset, struct object_info *oi) { struct pack_window *w_curs = NULL; - unsigned long size; + size_t size; off_t curpos = obj_offset; enum object_type type = OBJ_NONE; int ret; @@ -1597,7 +1647,8 @@ int packed_object_info(struct packed_git *p, if (!*oi->contentp) type = OBJ_BAD; } else if (oi->sizep || oi->typep || oi->delta_base_oid) { - type = unpack_object_header(p, &w_curs, &curpos, &size); + /* Use size_t variant to handle >4GB objects on Windows */ + type = unpack_object_header_sz(p, &w_curs, &curpos, &size); } if (!oi->contentp && oi->sizep) { @@ -1609,14 +1660,18 @@ int packed_object_info(struct packed_git *p, ret = -1; goto out; } - *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos); - if (*oi->sizep == 0) { + /* + * Use size_t variant to avoid die() on >4GB deltas. + * oi->sizep is unsigned long, so truncation may occur, + * but streaming code uses its own size_t tracking. + */ + size = get_size_from_delta_sz(p, &w_curs, tmp_pos); + if (size == 0) { ret = -1; goto out; } - } else { - *oi->sizep = size; } + *oi->sizep = (unsigned long)size; } if (oi->disk_sizep) { @@ -2561,21 +2616,29 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, struct pack_window *window = NULL; struct object_info oi = OBJECT_INFO_INIT; enum object_type in_pack_type; - unsigned long size; - - oi.sizep = &size; + size_t size; + /* + * We need to check if this is a delta or if the object is smaller + * than the big file threshold. For the initial check, we don't need + * the exact size, just whether it qualifies for streaming. + */ if (packfile_store_read_object_info(store, oid, &oi, 0) || oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA || - oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA || - repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) + oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA) return -1; - in_pack_type = unpack_object_header(oi.u.packed.pack, - &window, - &oi.u.packed.offset, - &size); + /* Read the actual size using size_t to handle >4GB objects on Windows */ + in_pack_type = unpack_object_header_sz(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); unuse_pack(&window); + + /* Now check the big file threshold with the correct size */ + if (repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) + return -1; + switch (in_pack_type) { default: return -1; /* we do not do deltas for now */ diff --git a/t/helper/meson.build b/t/helper/meson.build index cba4a9bf4f1434..d4499d26a9af1f 100644 --- a/t/helper/meson.build +++ b/t/helper/meson.build @@ -70,6 +70,7 @@ test_tool_sources = [ 'test-submodule-nested-repo-config.c', 'test-submodule.c', 'test-subprocess.c', + 'test-synthesize.c', 'test-tool.c', 'test-trace2.c', 'test-truncate.c', diff --git a/t/helper/test-synthesize.c b/t/helper/test-synthesize.c new file mode 100644 index 00000000000000..d4135b115bc9a2 --- /dev/null +++ b/t/helper/test-synthesize.c @@ -0,0 +1,250 @@ +#define USE_THE_REPOSITORY_VARIABLE + +#include "test-tool.h" +#include "git-compat-util.h" +#include "git-zlib.h" +#include "hash.h" +#include "hex.h" +#include "object.h" +#include "parse-options.h" +#include "repository.h" +#include "setup.h" +#include "strbuf.h" + +#define BLOCK_SIZE 0xffff +static const unsigned char zeros[BLOCK_SIZE]; + +/* + * Write data as an uncompressed zlib stream. + * For data larger than 64KB, writes multiple uncompressed blocks. + * If data is NULL, writes zeros. + * Updates the pack checksum context. + */ +static void write_uncompressed_zlib(FILE *f, struct git_hash_ctx *pack_ctx, + const void *data, size_t len, + const struct git_hash_algo *algo) +{ + unsigned char zlib_header[2] = { 0x78, 0x01 }; /* CMF, FLG */ + unsigned char block_header[5]; + const unsigned char *p = data; + size_t remaining = len; + uint32_t adler = 1L; /* adler32 initial value */ + unsigned char adler_buf[4]; + + /* Write zlib header */ + fwrite(zlib_header, 1, 2, f); + algo->update_fn(pack_ctx, zlib_header, 2); + + /* Write uncompressed blocks (max 64KB each) */ + do { + size_t block_len = remaining > BLOCK_SIZE ? BLOCK_SIZE : remaining; + int is_final = (block_len == remaining); + const unsigned char *block_data = data ? p : zeros; + + block_header[0] = is_final ? 0x01 : 0x00; + block_header[1] = block_len & 0xff; + block_header[2] = (block_len >> 8) & 0xff; + block_header[3] = block_header[1] ^ 0xff; + block_header[4] = block_header[2] ^ 0xff; + + fwrite(block_header, 1, 5, f); + algo->update_fn(pack_ctx, block_header, 5); + + if (block_len) { + fwrite(block_data, 1, block_len, f); + algo->update_fn(pack_ctx, block_data, block_len); + adler = adler32(adler, block_data, block_len); + } + + if (data) + p += block_len; + remaining -= block_len; + } while (remaining > 0); + + /* Write adler32 checksum */ + put_be32(adler_buf, adler); + fwrite(adler_buf, 1, 4, f); + algo->update_fn(pack_ctx, adler_buf, 4); +} + +/* + * Write a pack object header for the given type and size. + * Returns the number of bytes written to the buffer. + */ +static size_t object_header(char *buf, enum object_type type, size_t size) +{ + unsigned char *p = (unsigned char *)buf; + *p = (type << 4) | (size & 0xf); + size >>= 4; + while (size) { + *p++ |= 0x80; + *p = size & 0x7f; + size >>= 7; + } + p++; + return p - (unsigned char *)buf; +} + +/* + * Write an uncompressed object to the pack file. + * If `data == NULL`, it is treated like a buffer to NUL bytes. + * Updates the pack checksum context. + */ +static void write_pack_object(FILE *f, struct git_hash_ctx *pack_ctx, + enum object_type type, + const void *data, size_t len, + struct object_id *oid, + const struct git_hash_algo *algo) +{ + char header[32]; + size_t header_len; + struct git_hash_ctx ctx; + + /* Write pack object header */ + header_len = object_header(header, type, len); + fwrite(header, 1, header_len, f); + algo->update_fn(pack_ctx, header, header_len); + + /* Write the data as uncompressed zlib */ + write_uncompressed_zlib(f, pack_ctx, data, len, algo); + + algo->init_fn(&ctx); + header_len = xsnprintf(header, sizeof(header), "%s %"PRIuMAX, + type_name(type), (uintmax_t)len) + 1; + algo->update_fn(&ctx, header, header_len); + if (data) + algo->update_fn(&ctx, data, len); + else { + for (size_t i = len / BLOCK_SIZE; i; i--) + algo->update_fn(&ctx, zeros, BLOCK_SIZE); + algo->update_fn(&ctx, zeros, len % BLOCK_SIZE); + } + algo->final_oid_fn(oid, &ctx); +} + +/* + * Generate a pack file with a single large (>4GB) reachable object. + * + * Creates: + * 1. A large blob (all NUL bytes) + * 2. A tree containing that blob as "file" + * 3. A commit using that tree + * 4. The empty tree + * 5. A child commit using the empty tree + * + * This is useful for testing that Git can handle objects larger than 4GB. + */ +static int generate_pack_with_large_object(const char *path, size_t blob_size, + const struct git_hash_algo *algo) +{ + FILE *f = fopen_for_writing(path); + struct git_hash_ctx pack_ctx; + char header[1024]; + struct object_id blob_oid, tree_oid, commit_oid, empty_tree_oid, final_commit_oid; + struct strbuf buf = STRBUF_INIT; + size_t object_count = 5; /* large blob, tree, commit, empty tree, final commit */ + + algo->init_fn(&pack_ctx); + + /* Write pack header */ + memcpy(header, "PACK", 4); + put_be32(header + 4, 2); + put_be32(header + 8, object_count); + fwrite(header, 1, 12, f); + algo->update_fn(&pack_ctx, header, 12); + + /* 1. Write the large blob */ + write_pack_object(f, &pack_ctx, OBJ_BLOB, NULL, blob_size, &blob_oid, algo); + + /* 2. Write tree containing the blob as "file" */ + strbuf_addf(&buf, "100644 file%c", '\0'); + strbuf_add(&buf, blob_oid.hash, algo->rawsz); + write_pack_object(f, &pack_ctx, OBJ_TREE, buf.buf, buf.len, &tree_oid, algo); + + /* 3. Write commit using that tree */ + strbuf_reset(&buf); + strbuf_addf(&buf, + "tree %s\n" + "author A U Thor 1234567890 +0000\n" + "committer C O Mitter 1234567890 +0000\n" + "\n" + "Large blob commit\n", + oid_to_hex(&tree_oid)); + write_pack_object(f, &pack_ctx, OBJ_COMMIT, buf.buf, buf.len, &commit_oid, algo); + + /* 4. Write the empty tree */ + write_pack_object(f, &pack_ctx, OBJ_TREE, "", 0, &empty_tree_oid, algo); + + /* 5. Write final commit using empty tree, with previous commit as parent */ + strbuf_reset(&buf); + strbuf_addf(&buf, + "tree %s\n" + "parent %s\n" + "author A U Thor 1234567890 +0000\n" + "committer C O Mitter 1234567890 +0000\n" + "\n" + "Empty tree commit\n", + oid_to_hex(&empty_tree_oid), + oid_to_hex(&commit_oid)); + write_pack_object(f, &pack_ctx, OBJ_COMMIT, buf.buf, buf.len, &final_commit_oid, algo); + + /* Write pack trailer (checksum) */ + algo->final_fn((unsigned char *)header, &pack_ctx); + fwrite(header, 1, algo->rawsz, f); + + fclose(f); + + strbuf_release(&buf); + + /* Print the final commit OID so caller can set up refs */ + printf("%s\n", oid_to_hex(&final_commit_oid)); + + return 0; +} + +static int cmd__synthesize__pack(int argc, const char **argv, + const char *prefix UNUSED, + struct repository *repo) +{ + int non_git; + const struct git_hash_algo *algo; + size_t count; + const char *path; + const char * const usage[] = { + "test-tool synthesize pack ", + NULL + }; + struct option options[] = { + OPT_END() + }; + + setup_git_directory_gently(&non_git); + repo = the_repository; + algo = repo->hash_algo; + + argc = parse_options(argc, argv, NULL, options, usage, + PARSE_OPT_KEEP_ARGV0); + if (argc != 3) + usage_with_options(usage, options); + + count = strtoumax(argv[1], NULL, 10); + path = argv[2]; + + return !!generate_pack_with_large_object(path, count, algo); +} + +int cmd__synthesize(int argc, const char **argv) +{ + const char *prefix = NULL; + char const * const synthesize_usage[] = { + "test-tool synthesize pack ", + NULL, + }; + parse_opt_subcommand_fn *fn = NULL; + struct option options[] = { + OPT_SUBCOMMAND("pack", &fn, cmd__synthesize__pack), + OPT_END() + }; + argc = parse_options(argc, argv, prefix, options, synthesize_usage, 0); + return !!fn(argc, argv, prefix, NULL); +} diff --git a/t/helper/test-tool.c b/t/helper/test-tool.c index 9d1b41c8e39b89..ee16b2cb23719e 100644 --- a/t/helper/test-tool.c +++ b/t/helper/test-tool.c @@ -83,6 +83,7 @@ static struct test_cmd cmds[] = { { "submodule-config", cmd__submodule_config }, { "submodule-nested-repo-config", cmd__submodule_nested_repo_config }, { "subprocess", cmd__subprocess }, + { "synthesize", cmd__synthesize }, { "trace2", cmd__trace2 }, { "truncate", cmd__truncate }, { "userdiff", cmd__userdiff }, diff --git a/t/helper/test-tool.h b/t/helper/test-tool.h index e18e5a9ed9de81..4cf9f935a4cdfa 100644 --- a/t/helper/test-tool.h +++ b/t/helper/test-tool.h @@ -76,6 +76,7 @@ int cmd__submodule(int argc, const char **argv); int cmd__submodule_config(int argc, const char **argv); int cmd__submodule_nested_repo_config(int argc, const char **argv); int cmd__subprocess(int argc, const char **argv); +int cmd__synthesize(int argc, const char **argv); int cmd__trace2(int argc, const char **argv); int cmd__truncate(int argc, const char **argv); int cmd__userdiff(int argc, const char **argv); diff --git a/t/t5608-clone-2gb.sh b/t/t5608-clone-2gb.sh index 87a8cd9f98381a..67b611353de8c4 100755 --- a/t/t5608-clone-2gb.sh +++ b/t/t5608-clone-2gb.sh @@ -49,4 +49,15 @@ test_expect_success 'clone - with worktree, file:// protocol' ' ' +test_expect_success 'clone with >4GB object' ' + # Generate a pack with a single >4GB blob to test the shift overflow fix. + # The bug causes size calculation to overflow when shift > 32 bits. + git init --bare 4gb-repo && + head_oid=$(test-tool synthesize pack $((4*1024*1024*1024+1)) 4gb-repo/objects/pack/test.pack) && + git -C 4gb-repo index-pack objects/pack/test.pack && + git -C 4gb-repo update-ref refs/heads/main $head_oid && + + git clone --no-checkout --bare 4gb-repo 4gb-clone +' + test_done