From 47988fad885e8129f9dc36f0ed4d63375de23603 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?P=C3=A1draig=20Brady?= Date: Sun, 4 Sep 2022 19:59:25 +0100 Subject: ls: --color: honor separate sequences for extension cases Following on from commit v8.29-45-g24053fbd8 which unconditionally used case insensitive extension matching, support selective case sensitive matching when there are separate extension cases defined with different display sequences. * src/dircolors.hin: Document how file name suffixes are matched. Note this is displayed with `dircolors --print-database` which the texi info recommends to use for details. * src/ls.c (parse_ls_color): Postprocess the list to mark entries for case sensitive matching, and also adjust so that unmatchable entries are more quickly ignored. (get_color_indicator): Use exact matching rather than case insensitive matching if so marked. * tests/ls/color-ext.sh: Add test cases. * NEWS: Mention the change in behavior. Addresses https://bugs.gnu.org/33123 --- NEWS | 3 +++ src/dircolors.hin | 8 ++++--- src/ls.c | 66 +++++++++++++++++++++++++++++++++++++++++++++++---- tests/ls/color-ext.sh | 46 ++++++++++++++++++++++++++++++++++- 4 files changed, 114 insertions(+), 9 deletions(-) diff --git a/NEWS b/NEWS index 7bc8f9109..f46b16b6f 100644 --- a/NEWS +++ b/NEWS @@ -103,6 +103,9 @@ GNU coreutils NEWS -*- outline -*- reverting to the behavior in coreutils-9.0 and earlier. This behavior is now documented. + ls --color now matches a file extension case sensitively + if there are different sequences defined for separate cases. + printf unicode \uNNNN, \UNNNNNNNN syntax, now supports all valid unicode code points. Previously is was restricted to the C universal character subset, which restricted most points <= 0x9F. diff --git a/src/dircolors.hin b/src/dircolors.hin index e5d9eab94..24a90cf94 100644 --- a/src/dircolors.hin +++ b/src/dircolors.hin @@ -74,8 +74,10 @@ STICKY 37;44 # dir with the sticky bit set (+t) and not other-writable EXEC 01;32 # List any file extensions like '.gz' or '.tar' that you would like ls -# to color below. Put the extension, a space, and the color init string. -# (and any comments you want to add after a '#') +# to color below. Put the suffix, a space, and the color init string. +# (and any comments you want to add after a '#'). +# Suffixes are matched case insensitively, but if you define different +# init strings for separate cases, those will be honored. # If you use DOS-style suffixes, you may want to uncomment the following: #.cmd 01;32 # executables (bright green) @@ -88,7 +90,7 @@ EXEC 01;32 #.sh 01;32 #.csh 01;32 - # archives or compressed (bright red) +# archives or compressed (bright red) .tar 01;31 .tgz 01;31 .arc 01;31 diff --git a/src/ls.c b/src/ls.c index bd9130ccb..71d94fd6a 100644 --- a/src/ls.c +++ b/src/ls.c @@ -612,6 +612,7 @@ struct color_ext_type { struct bin_str ext; /* The extension we're looking for */ struct bin_str seq; /* The sequence to output when we do */ + bool exact_match; /* Whether to compare case insensitively */ struct color_ext_type *next; /* Next in list */ }; @@ -643,7 +644,7 @@ static struct bin_str color_indicator[] = { LEN_STR_PAIR ("\033[K") }, /* cl: clear to end of line */ }; -/* FIXME: comment */ +/* A list mapping file extensions to corresponding display sequence. */ static struct color_ext_type *color_ext_list = NULL; /* Buffer for color sequences */ @@ -2775,6 +2776,7 @@ parse_ls_color (void) ext = xmalloc (sizeof *ext); ext->next = color_ext_list; color_ext_list = ext; + ext->exact_match = false; ++p; ext->ext.string = buf; @@ -2860,6 +2862,49 @@ parse_ls_color (void) } print_with_color = false; } + else + { + /* Postprocess list to set EXACT_MATCH on entries where there are + different cased extensions with separate sequences defined. + Also set ext.len to SIZE_MAX on any entries that can't + match due to precedence, to avoid redundant string compares. */ + struct color_ext_type *e1; + + for (e1 = color_ext_list; e1 != NULL; e1 = e1->next) + { + struct color_ext_type *e2; + bool case_ignored = false; + + for (e2 = e1->next; e2 != NULL; e2 = e2->next) + { + if (e2->ext.len < SIZE_MAX && e1->ext.len == e2->ext.len) + { + if (memcmp (e1->ext.string, e2->ext.string, e1->ext.len) == 0) + e2->ext.len = SIZE_MAX; /* Ignore */ + else if (c_strncasecmp (e1->ext.string, e2->ext.string, + e1->ext.len) == 0) + { + if (case_ignored) + { + e2->ext.len = SIZE_MAX; /* Ignore */ + } + else if (e1->seq.len == e2->seq.len + && memcmp (e1->seq.string, e2->seq.string, + e1->seq.len) == 0) + { + e2->ext.len = SIZE_MAX; /* Ignore */ + case_ignored = true; /* Ignore all subsequent */ + } + else + { + e1->exact_match = true; + e2->exact_match = true; + } + } + } + } + } + } if (color_indicator[C_LINK].len == 6 && !STRNCMP_LIT (color_indicator[C_LINK].string, "target")) @@ -5040,10 +5085,21 @@ get_color_indicator (const struct fileinfo *f, bool symlink_target) name += len; /* Pointer to final \0. */ for (ext = color_ext_list; ext != NULL; ext = ext->next) { - if (ext->ext.len <= len - && c_strncasecmp (name - ext->ext.len, ext->ext.string, - ext->ext.len) == 0) - break; + if (ext->ext.len <= len) + { + if (ext->exact_match) + { + if (STREQ_LEN (name - ext->ext.len, ext->ext.string, + ext->ext.len)) + break; + } + else + { + if (c_strncasecmp (name - ext->ext.len, ext->ext.string, + ext->ext.len) == 0) + break; + } + } } } diff --git a/tests/ls/color-ext.sh b/tests/ls/color-ext.sh index 091895c9f..ab045ec6a 100755 --- a/tests/ls/color-ext.sh +++ b/tests/ls/color-ext.sh @@ -20,13 +20,16 @@ print_ver_ ls working_umask_or_skip_ -touch img1.jpg IMG2.JPG file1.z file2.Z || framework_failure_ +touch img1.jpg IMG2.JPG img3.JpG file1.z file2.Z || framework_failure_ code_jpg='01;35' +code_JPG='01;35;46' code_z='01;31' c0=$(printf '\033[0m') c_jpg=$(printf '\033[%sm' $code_jpg) +c_JPG=$(printf '\033[%sm' $code_JPG) c_z=$(printf '\033[%sm' $code_z) +# Case insenitive extensions LS_COLORS="*.jpg=$code_jpg:*.Z=$code_z" ls -U1 --color=always \ img1.jpg IMG2.JPG file1.z file2.Z > out || fail=1 printf "$c0\ @@ -37,5 +40,46 @@ ${c_z}file2.Z$c0 " > out_ok || framework_failure_ compare out out_ok || fail=1 +# Case sensitive extensions +LS_COLORS="*.jpg=$code_jpg:*.JPG=$code_JPG" ls -U1 --color=always \ + img1.jpg IMG2.JPG img3.JpG > out || fail=1 +printf "$c0\ +${c_jpg}img1.jpg$c0 +${c_JPG}IMG2.JPG$c0 +img3.JpG +" > out_ok || framework_failure_ +compare out out_ok || fail=1 + +# Case insensitive extensions (due to same sequences) +LS_COLORS="*.jpg=$code_jpg:*.JPG=$code_jpg" ls -U1 --color=always \ + img1.jpg IMG2.JPG img3.JpG > out || fail=1 +printf "$c0\ +${c_jpg}img1.jpg$c0 +${c_jpg}IMG2.JPG$c0 +${c_jpg}img3.JpG$c0 +" > out_ok || framework_failure_ +compare out out_ok || fail=1 + +# Case insensitive extensions (due to same sequences (after ignored sequences)) +# Note later entries in LS_COLORS take precedence. +LS_COLORS="*.jpg=$code_jpg:*.jpg=$code_JPG:*.JPG=$code_JPG" \ + ls -U1 --color=always img1.jpg IMG2.JPG img3.JpG > out || fail=1 +printf "$c0\ +${c_JPG}img1.jpg$c0 +${c_JPG}IMG2.JPG$c0 +${c_JPG}img3.JpG$c0 +" > out_ok || framework_failure_ +compare out out_ok || fail=1 + +# Case sensitive extensions (due to diff sequences (after ignored sequences)) +# Note later entries in LS_COLORS take precedence. +LS_COLORS="*.jpg=$code_JPG:*.jpg=$code_jpg:*.JPG=$code_JPG" \ + ls -U1 --color=always img1.jpg IMG2.JPG img3.JpG > out || fail=1 +printf "$c0\ +${c_jpg}img1.jpg$c0 +${c_JPG}IMG2.JPG$c0 +img3.JpG +" > out_ok || framework_failure_ +compare out out_ok || fail=1 Exit $fail -- cgit v1.2.1