summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAlexei Podtelezhnikov <apodtele@gmail.com>2021-08-30 07:09:53 -0400
committerAlexei Podtelezhnikov <apodtele@gmail.com>2021-08-30 07:09:53 -0400
commitb6c11d494e9b5ceaab75a5858b836e8a8ede41c0 (patch)
treeee640c857ecf7efaaea7d80378a4acd1fbce7868
parentd1c20005042a31fba7e86603b0f65fcbf21fe51c (diff)
downloadfreetype2-b6c11d494e9b5ceaab75a5858b836e8a8ede41c0.tar.gz
[smooth] Reduce shift in multiply-shift optimization.
* src/smooth/ftgrays.c (FT_UDIVPREP, FT_UDIV): Reduce shift. Smaller shifts that keep the division operands of FT_UDIVPREP within 32 bits result in slightly faster divisions, which is noticeable in the overall performance. The loss of precision is tolerable until the divisors (the components dx and dy) approach 32 - PIXEL_BITS. With PIXEL_BITS = 8, this corresponds to 65,000 pixels or the bitmap size that we refuse to render anyway. Using `ftbench -p -s60 -t5 -bc timesi.ttf`, Before: 8.52 us/op After: 8.32 us/op
-rw-r--r--src/smooth/ftgrays.c10
1 files changed, 4 insertions, 6 deletions
diff --git a/src/smooth/ftgrays.c b/src/smooth/ftgrays.c
index 576dbb325..b8d680160 100644
--- a/src/smooth/ftgrays.c
+++ b/src/smooth/ftgrays.c
@@ -386,12 +386,10 @@ typedef ptrdiff_t FT_PtrDist;
/* divisors to provide sufficient accuracy of the multiply-shift. */
/* It should not exceed (64 - PIXEL_BITS) to prevent overflowing and */
/* leave enough room for 64-bit unsigned multiplication however. */
-#define FT_UDIVPREP( c, b ) \
- FT_Int64 b ## _r = c ? (FT_Int64)( ~(FT_UInt64)0 >> PIXEL_BITS ) / ( b ) \
- : 0
-#define FT_UDIV( a, b ) \
- (TCoord)( ( (FT_UInt64)( a ) * (FT_UInt64)( b ## _r ) ) >> \
- ( sizeof( FT_UInt64 ) * FT_CHAR_BIT - PIXEL_BITS ) )
+#define FT_UDIVPREP( c, b ) \
+ FT_Int64 b ## _r = c ? (FT_Int64)0xFFFFFFFF / ( b ) : 0
+#define FT_UDIV( a, b ) \
+ (TCoord)( ( (FT_UInt64)( a ) * (FT_UInt64)( b ## _r ) ) >> 32 )
/* Scale area and apply fill rule to calculate the coverage byte. */