summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorEdward Welbourne <edward.welbourne@qt.io>2021-11-15 12:35:21 +0100
committerDaniel Smith <Daniel.Smith@qt.io>2021-11-15 12:06:03 +0000
commit0c9290408c128d29411acb13add50209ea2f4355 (patch)
treee676bbb532290f59c0789b7574808306faae10f0
parent8b17acbfde3c6f86d29e62f5de14e6b9340ac3ce (diff)
downloadqtrepotools-0c9290408c128d29411acb13add50209ea2f4355.tar.gz
Match Unicode either as such or as UTF-8
This is a fix-up for commit 8b17acbfde3c6f86d29e62f5de14e6b9340ac3ce; it turns out perl5, at least on our current systems, is encoding the data we're parsing in UTF-8. (Parent commit was tested with onlinegdb's version of perl, which was using naked Unicode, not UTF-8, so we may need the Unicode-form of the check, too.) Task-number: QTQAINFRA-4630 Change-Id: Ic93f9fc1d0cd1d8f6bf6ec1e695454974a84f620 Reviewed-by: Daniel Smith <Daniel.Smith@qt.io>
-rwxr-xr-xgit-hooks/sanitize-commit5
1 files changed, 4 insertions, 1 deletions
diff --git a/git-hooks/sanitize-commit b/git-hooks/sanitize-commit
index 5a95ca1..3d1ac48 100755
--- a/git-hooks/sanitize-commit
+++ b/git-hooks/sanitize-commit
@@ -839,7 +839,10 @@ while (<DIFF>) {
}
# Check for Unicode Bi-directional Override exploit characters.
# See https://trojansource.codes/
- if (/[\x{202A}-\x{202E}\x{2066}-\x{2069}]/) {
+ # NB: data has been UTF-8 encoded by the time it gets here,
+ # but retain Unicode form of it for (a) ease of reading (b)
+ # maybe perl will change encoding...
+ if (/\xe2(\x80[\xaa-\xae]|\x81[\xa6-\xa9])/ || /[\x{202A}-\x{202E}\x{2066}-\x{2069}]/) {
complain_ln("Unicode bi-directional override characters", "bidi", 1)
}
} else {