summary | shortlog | log | commit | commitdiff | tree
raw | patch | inline | side by side (parent: 50a991e)
raw | patch | inline | side by side (parent: 50a991e)
author | Linus Torvalds <torvalds@linux-foundation.org> | |
Thu, 18 Jun 2009 00:22:27 +0000 (17:22 -0700) | ||
committer | Junio C Hamano <gitster@pobox.com> | |
Thu, 18 Jun 2009 16:22:46 +0000 (09:22 -0700) |
Shifting 'unsigned char' or 'unsigned short' left can result in sign
extension errors, since the C integer promotion rules means that the
unsigned char/short will get implicitly promoted to a signed 'int' due to
the shift (or due to other operations).
This normally doesn't matter, but if you shift things up sufficiently, it
will now set the sign bit in 'int', and a subsequent cast to a bigger type
(eg 'long' or 'unsigned long') will now sign-extend the value despite the
original expression being unsigned.
One example of this would be something like
unsigned long size;
unsigned char c;
size += c << 24;
where despite all the variables being unsigned, 'c << 24' ends up being a
signed entity, and will get sign-extended when then doing the addition in
an 'unsigned long' type.
Since git uses 'unsigned char' pointers extensively, we actually have this
bug in a couple of places.
I may have missed some, but this is the result of looking at
git grep '[^0-9 ][ ]*<<[ ][a-z]' -- '*.c' '*.h'
git grep '<<[ ]*24'
which catches at least the common byte cases (shifting variables by a
variable amount, and shifting by 24 bits).
I also grepped for just 'unsigned char' variables in general, and
converted the ones that most obviously ended up getting implicitly cast
immediately anyway (eg hash_name(), encode_85()).
In addition to just avoiding 'unsigned char', this patch also tries to use
a common idiom for the delta header size thing. We had three different
variations on it: "& 0x7fUL" in one place (getting the sign extension
right), and "& ~0x80" and "& 0x7f" in two other places (not getting it
right). Apart from making them all just avoid using "unsigned char" at
all, I also unified them to then use a simple "& 0x7f".
I considered making a sparse extension which warns about doing implicit
casts from unsigned types to signed types, but it gets rather complex very
quickly, so this is just a hack.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
extension errors, since the C integer promotion rules means that the
unsigned char/short will get implicitly promoted to a signed 'int' due to
the shift (or due to other operations).
This normally doesn't matter, but if you shift things up sufficiently, it
will now set the sign bit in 'int', and a subsequent cast to a bigger type
(eg 'long' or 'unsigned long') will now sign-extend the value despite the
original expression being unsigned.
One example of this would be something like
unsigned long size;
unsigned char c;
size += c << 24;
where despite all the variables being unsigned, 'c << 24' ends up being a
signed entity, and will get sign-extended when then doing the addition in
an 'unsigned long' type.
Since git uses 'unsigned char' pointers extensively, we actually have this
bug in a couple of places.
I may have missed some, but this is the result of looking at
git grep '[^0-9 ][ ]*<<[ ][a-z]' -- '*.c' '*.h'
git grep '<<[ ]*24'
which catches at least the common byte cases (shifting variables by a
variable amount, and shifting by 24 bits).
I also grepped for just 'unsigned char' variables in general, and
converted the ones that most obviously ended up getting implicitly cast
immediately anyway (eg hash_name(), encode_85()).
In addition to just avoiding 'unsigned char', this patch also tries to use
a common idiom for the delta header size thing. We had three different
variations on it: "& 0x7fUL" in one place (getting the sign extension
right), and "& ~0x80" and "& 0x7f" in two other places (not getting it
right). Apart from making them all just avoid using "unsigned char" at
all, I also unified them to then use a simple "& 0x7f".
I considered making a sparse extension which warns about doing implicit
casts from unsigned types to signed types, but it gets rather complex very
quickly, so this is just a hack.
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Junio C Hamano <gitster@pobox.com>
attr.c | patch | blob | history | |
base85.c | patch | blob | history | |
builtin-pack-objects.c | patch | blob | history | |
builtin-unpack-objects.c | patch | blob | history | |
delta.h | patch | blob | history | |
index-pack.c | patch | blob | history | |
patch-delta.c | patch | blob | history | |
sha1_file.c | patch | blob | history |
index 98eb636f13d314c20e18e646c074e2511b3c891c..f8f6faa94fd7eb4e260a75b82ef372e632f5eb9a 100644 (file)
--- a/attr.c
+++ b/attr.c
static unsigned hash_name(const char *name, int namelen)
{
- unsigned val = 0;
- unsigned char c;
+ unsigned val = 0, c;
while (namelen--) {
c = *name++;
diff --git a/base85.c b/base85.c
index b88270f90844095b3d352cc4213cbebd95a7f420..b417a15bbc83fff7180078a4cf9f73603477a295 100644 (file)
--- a/base85.c
+++ b/base85.c
unsigned acc = 0;
int cnt;
for (cnt = 24; cnt >= 0; cnt -= 8) {
- int ch = *data++;
+ unsigned ch = *data++;
acc |= ch << cnt;
if (--bytes == 0)
break;
diff --git a/builtin-pack-objects.c b/builtin-pack-objects.c
index 9742b45c4da7f9330491d0b4c6d3ed60aadb0f4c..941cc2d73cf5ee6791a0cee8c409dcdb9756b448 100644 (file)
--- a/builtin-pack-objects.c
+++ b/builtin-pack-objects.c
static unsigned name_hash(const char *name)
{
- unsigned char c;
- unsigned hash = 0;
+ unsigned c, hash = 0;
if (!name)
return 0;
index 9a773239cabab9998bcea829c0fb2abea9bdb8e8..8e831be476b71eed151cee1b5f2fc81c4edcb5ff 100644 (file)
--- a/builtin-unpack-objects.c
+++ b/builtin-unpack-objects.c
static void unpack_one(unsigned nr)
{
unsigned shift;
- unsigned char *pack, c;
- unsigned long size;
+ unsigned char *pack;
+ unsigned long size, c;
enum object_type type;
obj_list[nr].offset = consumed_bytes;
index 40ccf5a1e95f62d840a006274f7024fa43208b1c..b9d333dd5a1c64ab35159ed608cf942951504f84 100644 (file)
--- a/delta.h
+++ b/delta.h
const unsigned char *top)
{
const unsigned char *data = *datap;
- unsigned char cmd;
- unsigned long size = 0;
+ unsigned long cmd, size = 0;
int i = 0;
do {
cmd = *data++;
- size |= (cmd & ~0x80) << i;
+ size |= (cmd & 0x7f) << i;
i += 7;
} while (cmd & 0x80 && data < top);
*datap = data;
diff --git a/index-pack.c b/index-pack.c
index 6e93ee6af64593937ee9b078e599e81d40b74303..0c92bafcbb80cc539de9a86305562ff291361db9 100644 (file)
--- a/index-pack.c
+++ b/index-pack.c
static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_base)
{
- unsigned char *p, c;
- unsigned long size;
+ unsigned char *p;
+ unsigned long size, c;
off_t base_offset;
unsigned shift;
void *data;
@@ -312,7 +312,7 @@ static void *unpack_raw_entry(struct object_entry *obj, union delta_base *delta_
p = fill(1);
c = *p;
use(1);
- size += (c & 0x7fUL) << shift;
+ size += (c & 0x7f) << shift;
shift += 7;
}
obj->size = size;
diff --git a/patch-delta.c b/patch-delta.c
index ed9db81fa82c812c9ffa07f5a40540dbb15da0d3..ef748ce96d246bb17c6e64b51b882d535d7f7774 100644 (file)
--- a/patch-delta.c
+++ b/patch-delta.c
if (cmd & 0x01) cp_off = *data++;
if (cmd & 0x02) cp_off |= (*data++ << 8);
if (cmd & 0x04) cp_off |= (*data++ << 16);
- if (cmd & 0x08) cp_off |= (*data++ << 24);
+ if (cmd & 0x08) cp_off |= ((unsigned) *data++ << 24);
if (cmd & 0x10) cp_size = *data++;
if (cmd & 0x20) cp_size |= (*data++ << 8);
if (cmd & 0x40) cp_size |= (*data++ << 16);
diff --git a/sha1_file.c b/sha1_file.c
index e73cd4fc0ba2daac14f604f1973d1b0658212b26..8f5fe62d545ace21c338cd554c76bac5d5acb431 100644 (file)
--- a/sha1_file.c
+++ b/sha1_file.c
unsigned long len, enum object_type *type, unsigned long *sizep)
{
unsigned shift;
- unsigned char c;
- unsigned long size;
+ unsigned long size, c;
unsigned long used = 0;
c = buf[used++];