diff --git a/delta.h b/delta.h index 8a56ec07992c75..fad68cfc45f6f4 100644 --- a/delta.h +++ b/delta.h @@ -86,8 +86,11 @@ void *patch_delta(const void *src_buf, unsigned long src_size, * This must be called twice on the delta data buffer, first to get the * expected source buffer size, and again to get the target buffer size. */ -static inline unsigned long get_delta_hdr_size(const unsigned char **datap, - const unsigned char *top) +/* + * Size_t variant that doesn't truncate - use for >4GB objects on Windows. + */ +static inline size_t get_delta_hdr_size_sz(const unsigned char **datap, + const unsigned char *top) { const unsigned char *data = *datap; size_t cmd, size = 0; @@ -98,6 +101,13 @@ static inline unsigned long get_delta_hdr_size(const unsigned char **datap, i += 7; } while (cmd & 0x80 && data < top); *datap = data; + return size; +} + +static inline unsigned long get_delta_hdr_size(const unsigned char **datap, + const unsigned char *top) +{ + size_t size = get_delta_hdr_size_sz(datap, top); return cast_size_t_to_ulong(size); } diff --git a/object-file.c b/object-file.c index e55bf1bfff670f..0c823e5fc4adb6 100644 --- a/object-file.c +++ b/object-file.c @@ -1086,8 +1086,8 @@ int odb_source_loose_write_stream(struct odb_source *source, } while (ret == Z_OK || ret == Z_BUF_ERROR); if (stream.total_in != len + hdrlen) - die(_("write stream object %ld != %"PRIuMAX), stream.total_in, - (uintmax_t)len + hdrlen); + die(_("write stream object %"PRIuMAX" != %"PRIuMAX), + (uintmax_t)stream.total_in, (uintmax_t)len + hdrlen); /* * Common steps for write_loose_object and stream_loose_object to @@ -2108,6 +2108,7 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out, struct object_info oi = OBJECT_INFO_INIT; struct odb_loose_read_stream *st; unsigned long mapsize; + unsigned long size_ul; void *mapped; mapped = odb_source_loose_map_object(source, oid, &mapsize); @@ -2131,11 +2132,18 @@ int odb_source_loose_read_object_stream(struct odb_read_stream **out, goto error; } - oi.sizep = &st->base.size; + /* + * object_info.sizep is unsigned long* (32-bit on Windows), but + * st->base.size is size_t (64-bit). Use temporary variable. + * Note: loose objects >4GB would still truncate here, but such + * large loose objects are uncommon (they'd normally be packed). + */ + oi.sizep = &size_ul; oi.typep = &st->base.type; if (parse_loose_header(st->hdr, &oi) < 0 || st->base.type < 0) goto error; + st->base.size = size_ul; st->mapped = mapped; st->mapsize = mapsize; diff --git a/odb/streaming.c b/odb/streaming.c index 4a4474f891a07f..bd460f9adc2cb2 100644 --- a/odb/streaming.c +++ b/odb/streaming.c @@ -158,15 +158,26 @@ static int open_istream_incore(struct odb_read_stream **out, .base.read = read_istream_incore, }; struct odb_incore_read_stream *st; + unsigned long size_ul; int ret; oi.typep = &stream.base.type; - oi.sizep = &stream.base.size; + /* + * object_info.sizep is unsigned long* (32-bit on Windows), but + * stream.base.size is size_t (64-bit). We use a temporary variable + * because the types are incompatible. Note: this path still truncates + * for >4GB objects, but large objects should use pack streaming + * (packfile_store_read_object_stream) which handles size_t properly. + * This incore fallback is only used for small objects or when pack + * streaming is unavailable. + */ + oi.sizep = &size_ul; oi.contentp = (void **)&stream.buf; ret = odb_read_object_info_extended(odb, oid, &oi, OBJECT_INFO_DIE_IF_CORRUPT); if (ret) return ret; + stream.base.size = size_ul; CALLOC_ARRAY(st, 1); *st = stream; diff --git a/odb/streaming.h b/odb/streaming.h index c7861f7e13c606..517e2ea2d3f5c3 100644 --- a/odb/streaming.h +++ b/odb/streaming.h @@ -21,7 +21,7 @@ struct odb_read_stream { odb_read_stream_close_fn close; odb_read_stream_read_fn read; enum object_type type; - unsigned long size; /* inflated size of full object */ + size_t size; /* inflated size of full object */ }; /* diff --git a/packfile.c b/packfile.c index 402c3b5dc73131..e198c9793f089f 100644 --- a/packfile.c +++ b/packfile.c @@ -1130,8 +1130,8 @@ unsigned long repo_approximate_object_count(struct repository *r) return r->objects->approximate_object_count; } -unsigned long unpack_object_header_buffer(const unsigned char *buf, - unsigned long len, enum object_type *type, unsigned long *sizep) +static unsigned long unpack_object_header_buffer_internal(const unsigned char *buf, + unsigned long len, enum object_type *type, size_t *sizep) { unsigned shift; size_t size, c; @@ -1142,7 +1142,11 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, size = c & 15; shift = 4; while (c & 0x80) { - if (len <= used || (bitsizeof(long) - 7) < shift) { + /* + * Each continuation byte adds 7 bits. Ensure shift won't + * overflow size_t (use size_t not long for 64-bit on Windows). + */ + if (len <= used || (bitsizeof(size_t) - 7) < shift) { error("bad object header"); size = used = 0; break; @@ -1151,13 +1155,25 @@ unsigned long unpack_object_header_buffer(const unsigned char *buf, size = st_add(size, st_left_shift(c & 0x7f, shift)); shift += 7; } + *sizep = size; + return used; +} + +unsigned long unpack_object_header_buffer(const unsigned char *buf, + unsigned long len, enum object_type *type, unsigned long *sizep) +{ + size_t size; + unsigned long used = unpack_object_header_buffer_internal(buf, len, type, &size); *sizep = cast_size_t_to_ulong(size); return used; } -unsigned long get_size_from_delta(struct packed_git *p, - struct pack_window **w_curs, - off_t curpos) +/* + * Size_t variant for >4GB delta results on Windows. + */ +static size_t get_size_from_delta_sz(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos) { const unsigned char *data; unsigned char delta_head[20], *in; @@ -1204,10 +1220,44 @@ unsigned long get_size_from_delta(struct packed_git *p, data = delta_head; /* ignore base size */ - get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); + get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head)); /* Read the result size */ - return get_delta_hdr_size(&data, delta_head+sizeof(delta_head)); + return get_delta_hdr_size_sz(&data, delta_head+sizeof(delta_head)); +} + +unsigned long get_size_from_delta(struct packed_git *p, + struct pack_window **w_curs, + off_t curpos) +{ + size_t size = get_size_from_delta_sz(p, w_curs, curpos); + return cast_size_t_to_ulong(size); +} + +/* + * Like unpack_object_header(), but returns size via size_t* instead of + * unsigned long*. This is needed for >4GB objects on Windows where + * unsigned long is 32-bit but size_t is 64-bit. Used by streaming code + * to get the correct untruncated object size. + */ +static int unpack_object_header_sz(struct packed_git *p, + struct pack_window **w_curs, + off_t *curpos, + size_t *sizep) +{ + unsigned char *base; + unsigned long left; + unsigned long used; + enum object_type type; + + base = use_pack(p, w_curs, *curpos, &left); + used = unpack_object_header_buffer_internal(base, left, &type, sizep); + if (!used) { + type = OBJ_BAD; + } else + *curpos += used; + + return type; } int unpack_object_header(struct packed_git *p, @@ -1582,7 +1632,7 @@ int packed_object_info(struct packed_git *p, off_t obj_offset, struct object_info *oi) { struct pack_window *w_curs = NULL; - unsigned long size; + size_t size; off_t curpos = obj_offset; enum object_type type = OBJ_NONE; int ret; @@ -1597,7 +1647,8 @@ int packed_object_info(struct packed_git *p, if (!*oi->contentp) type = OBJ_BAD; } else if (oi->sizep || oi->typep || oi->delta_base_oid) { - type = unpack_object_header(p, &w_curs, &curpos, &size); + /* Use size_t variant to handle >4GB objects on Windows */ + type = unpack_object_header_sz(p, &w_curs, &curpos, &size); } if (!oi->contentp && oi->sizep) { @@ -1609,14 +1660,18 @@ int packed_object_info(struct packed_git *p, ret = -1; goto out; } - *oi->sizep = get_size_from_delta(p, &w_curs, tmp_pos); - if (*oi->sizep == 0) { + /* + * Use size_t variant to avoid die() on >4GB deltas. + * oi->sizep is unsigned long, so truncation may occur, + * but streaming code uses its own size_t tracking. + */ + size = get_size_from_delta_sz(p, &w_curs, tmp_pos); + if (size == 0) { ret = -1; goto out; } - } else { - *oi->sizep = size; } + *oi->sizep = (unsigned long)size; } if (oi->disk_sizep) { @@ -2561,21 +2616,29 @@ int packfile_store_read_object_stream(struct odb_read_stream **out, struct pack_window *window = NULL; struct object_info oi = OBJECT_INFO_INIT; enum object_type in_pack_type; - unsigned long size; - - oi.sizep = &size; + size_t size; + /* + * We need to check if this is a delta or if the object is smaller + * than the big file threshold. For the initial check, we don't need + * the exact size, just whether it qualifies for streaming. + */ if (packfile_store_read_object_info(store, oid, &oi, 0) || oi.u.packed.type == PACKED_OBJECT_TYPE_REF_DELTA || - oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA || - repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) + oi.u.packed.type == PACKED_OBJECT_TYPE_OFS_DELTA) return -1; - in_pack_type = unpack_object_header(oi.u.packed.pack, - &window, - &oi.u.packed.offset, - &size); + /* Read the actual size using size_t to handle >4GB objects on Windows */ + in_pack_type = unpack_object_header_sz(oi.u.packed.pack, + &window, + &oi.u.packed.offset, + &size); unuse_pack(&window); + + /* Now check the big file threshold with the correct size */ + if (repo_settings_get_big_file_threshold(store->source->odb->repo) >= size) + return -1; + switch (in_pack_type) { default: return -1; /* we do not do deltas for now */