Merge branch 'en/diffstat-utf8-truncation-fix' into seen The computation to shorten the filenames shown in diffstat measured width of individual UTF-8 characters to add up, but forgot to take into account error cases (e.g., an invalid UTF-8 sequence, or a control character). * en/diffstat-utf8-truncation-fix: diff: fix out-of-bounds reads and NULL deref in diffstat UTF-8 truncation
diff --git a/diff.c b/diff.c index 397e38b..1a3b19f 100644 --- a/diff.c +++ b/diff.c
@@ -2927,6 +2927,28 @@ void print_stat_summary(FILE *fp, int files, print_stat_summary_inserts_deletes(&o, files, insertions, deletions); } +/* + * Like utf8_width(), but guaranteed safe for use in loops that subtract + * per-character widths: + * + * - utf8_width() sets *start to NULL on invalid UTF-8 and returns 0; + * we restore the pointer and advance by one byte, returning width 1 + * (matching the strlen()-based fallback in utf8_strwidth()). + * + * - utf8_width() returns -1 for control characters; we return 0 + * (matching utf8_strnwidth() which skips them). + */ +static int utf8_ish_width(const char **start) +{ + const char *old = *start; + int w = utf8_width(start, NULL); + if (!*start) { + *start = old + 1; + return 1; + } + return (w < 0) ? 0 : w; +} + static void show_stats(struct diffstat_t *data, struct diff_options *options) { int i, len, add, del, adds = 0, dels = 0; @@ -3093,8 +3115,8 @@ static void show_stats(struct diffstat_t *data, struct diff_options *options) if (len < 0) len = 0; - while (name_len > len) - name_len -= utf8_width((const char**)&name, NULL); + while (name_len > len && *name) + name_len -= utf8_ish_width((const char**)&name); slash = strchr(name, '/'); if (slash)
diff --git a/t/t4052-stat-output.sh b/t/t4052-stat-output.sh index 7c74906..84c53c1 100755 --- a/t/t4052-stat-output.sh +++ b/t/t4052-stat-output.sh
@@ -445,4 +445,29 @@ test_grep "<RED>|<RESET> ${FILENAME_TRIMMED} | 0" out ' +test_expect_success 'diffstat truncation with invalid UTF-8 does not crash' ' + empty_blob=$(git hash-object -w --stdin </dev/null) && + printf "100644 blob $empty_blob\taaa-\300-aaa\n" | + git mktree >tree_file && + tree=$(cat tree_file) && + empty_tree=$(git mktree </dev/null) && + c1=$(git commit-tree -m before $empty_tree) && + c2=$(git commit-tree -m after -p $c1 $tree) && + git -c core.quotepath=false diff --stat --stat-name-width=5 $c1..$c2 >output && + test_grep "| 0" output +' + +test_expect_success FUNNYNAMES 'diffstat truncation with control chars does not crash' ' + FNAME=$(printf "aaa-\x01-aaa") && + git commit --allow-empty -m setup && + >$FNAME && + git add -- $FNAME && + git commit -m "add file with control char name" && + git -c core.quotepath=false diff --stat --stat-name-width=5 HEAD~1..HEAD >output && + test_grep "| 0" output && + rm -- $FNAME && + git rm -- $FNAME && + git commit -m "remove test file" +' + test_done