summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWerner Lemberg <wl@gnu.org>2021-07-16 07:40:56 +0200
committerWerner Lemberg <wl@gnu.org>2021-07-16 07:40:56 +0200
commit93771d619f117036da1d20e823840aff6de0e03c (patch)
tree45f56524775095bb2a9e678fa25856d8fc76ace6
parentc37c08738260e267764b6d6a8d4469eaa1e6568a (diff)
downloadfreetype2-93771d619f117036da1d20e823840aff6de0e03c.tar.gz
Formatting and ChangeLog additions for previous commits.
-rw-r--r--ChangeLog80
-rw-r--r--src/smooth/ftgrays.c138
-rwxr-xr-xtests/scripts/download-test-fonts.py61
3 files changed, 181 insertions, 98 deletions
diff --git a/ChangeLog b/ChangeLog
index a9d38aa7f..b7daa70c2 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,38 +1,88 @@
+2021-07-15 Ben Wagner <bungeman@chromium.org>
+
+ * src/smooth/ftgrays.c: Guard inclusion of `emmintrin.h`.
+
+ Guard inclusion of `emmintrin.h` with `#ifdef __SSE2__`. The gcc
+ version of this header, `xmmintrin.h`, and `mmintrin.h` check that
+ the appropriate defines are set before defining anything (are
+ internally guarded). However, the clang versions of these includes
+ are not internally guarded. As a result of this, externally guard
+ the inclusion of these headers.
+
2021-07-15 David Turner <david@freetype.org>
- [smooth] Implement Bezier quadratic arc flattenning with DDA
+ [smooth] Implement Bézier quadratic arc flattening with DDA.
Benchmarking shows that this provides a very slighty performance
- boost when rendering fonts with lots of quadratic bezier arcs,
+ boost when rendering fonts with lots of quadratic Bézier arcs,
compared to the recursive arc splitting, but only when SSE2 is
available, or on 64-bit CPUs.
+ On a 2017 Core i5-7300U CPU on Linux/x86_64:
+
+ ftbench -p -s10 -t5 -cb DroidSansFallbackFull.ttf
+
+ Before: 4.033 us/op (best of 5 runs for all numbers)
+ After: 3.876 us/op
+
+ ftbench -p -s60 -t5 -cb DroidSansFallbackFull.ttf
+
+ Before: 13.467 us/op
+ After: 13.385 us/op
+
* src/smooth/ftgrays.c (gray_render_conic): New implementation
based on DDA and optionally SSE2.
2021-07-15 David Turner <david@freetype.org>
- [smooth] Minor speedup to smooth rasterizer
+ [smooth] Minor speedup to smooth rasterizer.
- This speeds up the smooth rasterizer by avoiding a conditional
+ This speeds up the smooth rasterizer by avoiding conditional
branches in the hot path.
- * src/smooth/ftgrays.c: Define a null cell used to both as a
- sentinel for all linked-lists, and to accumulate coverage and
- area values for "out-of-bounds" cell positions without a
- conditional check.
+ - Define a fixed 'null cell', which will be pointed to whenever the
+ current cell is outside of the current target region. This avoids
+ a `ras.cell != NULL` check in the `FT_INTEGRATE` macro.
+
+ - Also use the null cell as a sentinel at the end of all `ycells`
+ linked-lists, by setting its x coordinate to `INT_MAX`. This
+ avoids a `if (!cell)` check in `gray_set_cell` as well.
+
+ - Slightly change the worker struct fields to perform a little less
+ operations during rendering.
+
+ Example results (on a 2013 Corei5-3337U CPU)
+
+ out/ftbench -p -s10 -t5 -bc DroidSansFallbackFull.ttf
+
+ Before: 5.472 us/op
+ After: 5.275 us/op
+
+ out/ftbench -p -s60 -t5 -bc DroidSansFallbackFull.ttf
+
+ Before: 17.988 us/op
+ After: 17.389 us/op
+
+ * src/smooth/ftgrays.c (grat_TWorker): Replace `num_cells` field with
+ `cell_free` and `cell_limit`.
+ (NULL_CELL_PTR, CELL_MAX_X_VALUE, CELL_IS_NULL): New macros.
+ (gray_dump_cells, gray_set_cell, gray_sweep, gray_sweep_direct,
+ gray_convert_glyph_inner, gray_convert_glyph): Updated.
2021-07-15 David Turner <david@freetype.org>
- Replaces download-test-fonts.sh with download-test-fonts.py which
- does the same work, and also avoids downloading anything if the
- files are already installed with the right content.
+ [tests] Rewrite download script in Python3.
+
+ This commit replaces the bash script with a Python script that does
+ the same work, plus avoiding to download anything if the files are
+ already installed with the right content.
- Now uses the first 8 byte of each file's sha256 hash for the digest.
+ We now use the first 8 bytes of each file's sha256 hash for the
+ digest.
- * tests/scripts/download-test-fonts.sh: Removed
- * tests/scripts/download-test-fonts.py: New script
- * tests/README.md: Updated
+ * tests/scripts/download-test-fonts.sh: Removed.
+ * tests/scripts/download-test-fonts.py: New script.
+ * tests/README.md: Updated.
2021-07-15 Alex Richardson <Alexander.Richardson@cl.cam.ac.uk>
diff --git a/src/smooth/ftgrays.c b/src/smooth/ftgrays.c
index 5e04ff41b..b802030e7 100644
--- a/src/smooth/ftgrays.c
+++ b/src/smooth/ftgrays.c
@@ -487,8 +487,8 @@ typedef ptrdiff_t FT_PtrDist;
PCell cell_free; /* call allocation next free slot */
PCell cell_limit; /* cell allocation limit */
- PCell* ycells; /* array of cell linked-lists, one per */
- /* vertical coordinate in the current band. */
+ PCell* ycells; /* array of cell linked-lists; one per */
+ /* vertical coordinate in the current band */
PCell cells; /* cell storage area */
FT_PtrDist max_cells; /* cell storage capacity */
@@ -513,19 +513,21 @@ typedef ptrdiff_t FT_PtrDist;
static gray_TWorker ras;
#endif
-/* Return a pointer to the "null cell", used as a sentinel at the end */
-/* of all ycells[] linked lists. Its x coordinate should be maximal */
-/* to ensure no NULL checks are necessary when looking for an insertion */
-/* point in gray_set_cell(). Other loops should check the cell pointer */
-/* with CELL_IS_NULL() to detect the end of the list. */
-#define NULL_CELL_PTR(ras) (ras).cells
+ /*
+ * Return a pointer to the 'null cell', used as a sentinel at the end of
+ * all `ycells` linked lists. Its x coordinate should be maximal to
+ * ensure no NULL checks are necessary when looking for an insertion point
+ * in `gray_set_cell`. Other loops should check the cell pointer with
+ * CELL_IS_NULL() to detect the end of the list.
+ */
+#define NULL_CELL_PTR( ras ) (ras).cells
-/* The |x| value of the null cell. Must be the largest possible */
-/* integer value stored in a TCell.x field. */
+ /* The |x| value of the null cell. Must be the largest possible */
+ /* integer value stored in a `TCell.x` field. */
#define CELL_MAX_X_VALUE INT_MAX
-/* Return true iff |cell| points to the null cell. */
-#define CELL_IS_NULL(cell) ((cell)->x == CELL_MAX_X_VALUE)
+ /* Return true iff |cell| points to the null cell. */
+#define CELL_IS_NULL( cell ) ( (cell)->x == CELL_MAX_X_VALUE )
#define FT_INTEGRATE( ras, a, b ) \
@@ -556,7 +558,7 @@ typedef ptrdiff_t FT_PtrDist;
printf( "%3d:", y );
- for ( ; !CELL_IS_NULL(cell); cell = cell->next )
+ for ( ; !CELL_IS_NULL( cell ); cell = cell->next )
printf( " (%3d, c:%4d, a:%6d)",
cell->x, cell->cover, cell->area );
printf( "\n" );
@@ -584,9 +586,11 @@ typedef ptrdiff_t FT_PtrDist;
/* Note that if a cell is to the left of the clipping region, it is */
/* actually set to the (min_ex-1) horizontal position. */
- TCoord ey_index = ey - ras.min_ey;
+ TCoord ey_index = ey - ras.min_ey;
+
+
if ( ey_index < 0 || ey_index >= ras.count_ey || ex >= ras.max_ex )
- ras.cell = NULL_CELL_PTR(ras);
+ ras.cell = NULL_CELL_PTR( ras );
else
{
PCell* pcell = ras.ycells + ey_index;
@@ -610,7 +614,7 @@ typedef ptrdiff_t FT_PtrDist;
/* insert new cell */
cell = ras.cell_free++;
- if (cell >= ras.cell_limit)
+ if ( cell >= ras.cell_limit )
ft_longjmp( ras.jump_buffer, 1 );
cell->x = ex;
@@ -978,6 +982,7 @@ typedef ptrdiff_t FT_PtrDist;
}
gray_set_cell( RAS_VAR_ ex1, ey1 );
+
} while ( ex1 != ex2 || ey1 != ey2 );
}
@@ -987,30 +992,37 @@ typedef ptrdiff_t FT_PtrDist;
FT_INTEGRATE( ras, fy2 - fy1, fx1 + fx2 );
End:
- ras.x = to_x;
- ras.y = to_y;
+ ras.x = to_x;
+ ras.y = to_y;
}
#endif
-/* Benchmarking shows that using DDA to flatten the quadratic bezier
- * arcs is slightly faster in the following cases:
- *
- * - When the host CPU is 64-bit.
- * - When SSE2 SIMD registers and instructions are available (even on x86).
- *
- * For other cases, using binary splits is actually slightly faster.
- */
-#if defined(__SSE2__) || defined(__x86_64__) || defined(__aarch64__) || defined(_M_AMD64) || defined(_M_ARM64)
-#define BEZIER_USE_DDA 1
+ /*
+ * Benchmarking shows that using DDA to flatten the quadratic Bézier arcs
+ * is slightly faster in the following cases:
+ *
+ * - When the host CPU is 64-bit.
+ * - When SSE2 SIMD registers and instructions are available (even on
+ * x86).
+ *
+ * For other cases, using binary splits is actually slightly faster.
+ */
+#if defined( __SSE2__ ) || \
+ defined( __x86_64__ ) || \
+ defined( __aarch64__ ) || \
+ defined( _M_AMD64 ) || \
+ defined( _M_ARM64 )
+# define BEZIER_USE_DDA 1
#else
-#define BEZIER_USE_DDA 0
+# define BEZIER_USE_DDA 0
#endif
+
#if BEZIER_USE_DDA
#ifdef __SSE2__
-#include <emmintrin.h>
+# include <emmintrin.h>
#endif
static void
@@ -1058,8 +1070,8 @@ typedef ptrdiff_t FT_PtrDist;
{
dx >>= 2;
shift += 1;
- }
- while (dx > ONE_PIXEL / 4);
+
+ } while ( dx > ONE_PIXEL / 4 );
/*
* The (P0,P1,P2) arc equation, for t in [0,1] range:
@@ -1102,12 +1114,17 @@ typedef ptrdiff_t FT_PtrDist;
* Q << 32 = (2 * B << (32 - N)) + (A << (32 - N - N))
* = (B << (33 - N)) + (A << (32 - N - N))
*/
+
#ifdef __SSE2__
- /* Experience shows that for small shift values, SSE2 is actually slower. */
- if (shift > 2) {
- union {
- struct { FT_Int64 ax, ay, bx, by; } i;
- struct { __m128i a, b; } vec;
+ /* Experience shows that for small shift values, */
+ /* SSE2 is actually slower. */
+ if ( shift > 2 )
+ {
+ union
+ {
+ struct { FT_Int64 ax, ay, bx, by; } i;
+ struct { __m128i a, b; } vec;
+
} u;
u.i.ax = p0.x + p2.x - 2 * p1.x;
@@ -1138,10 +1155,11 @@ typedef ptrdiff_t FT_PtrDist;
p = _mm_add_epi64(p, q);
q = _mm_add_epi64(q, r);
- _mm_store_si128(&v.vec, p);
+ _mm_store_si128( &v.vec, p );
- gray_render_line( RAS_VAR_ v.i.px_hi, v.i.py_hi);
+ gray_render_line( RAS_VAR_ v.i.px_hi, v.i.py_hi );
}
+
return;
}
#endif /* !__SSE2__ */
@@ -1167,13 +1185,15 @@ typedef ptrdiff_t FT_PtrDist;
qx += rx;
qy += ry;
- gray_render_line( RAS_VAR_ (FT_Pos)(px >> 32), (FT_Pos)(py >> 32));
+ gray_render_line( RAS_VAR_ (FT_Pos)( px >> 32 ),
+ (FT_Pos)( py >> 32 ) );
}
}
#else /* !BEZIER_USE_DDA */
- /* Note that multiple attempts to speed up the function below
+ /*
+ * Note that multiple attempts to speed up the function below
* with SSE2 intrinsics, using various data layouts, have turned
* out to be slower than the non-SIMD code below.
*/
@@ -1264,12 +1284,14 @@ typedef ptrdiff_t FT_PtrDist;
#endif /* !BEZIER_USE_DDA */
- /* For cubic bezier, binary splits are still faster than DDA
+
+ /*
+ * For cubic Bézier, binary splits are still faster than DDA
* because the splits are adaptive to how quickly each sub-arc
* approaches their chord trisection points.
*
* It might be useful to experiment with SSE2 to speed up
- * gray_split_cubic() though.
+ * `gray_split_cubic`, though.
*/
static void
gray_split_cubic( FT_Vector* base )
@@ -1361,6 +1383,7 @@ typedef ptrdiff_t FT_PtrDist;
}
}
+
static int
gray_move_to( const FT_Vector* to,
gray_PWorker worker )
@@ -1428,7 +1451,7 @@ typedef ptrdiff_t FT_PtrDist;
unsigned char* line = ras.target.origin - ras.target.pitch * y;
- for ( ; !CELL_IS_NULL(cell); cell = cell->next )
+ for ( ; !CELL_IS_NULL( cell ); cell = cell->next )
{
if ( cover != 0 && cell->x > x )
{
@@ -1476,7 +1499,7 @@ typedef ptrdiff_t FT_PtrDist;
TArea area;
- for ( ; !CELL_IS_NULL(cell); cell = cell->next )
+ for ( ; !CELL_IS_NULL( cell ); cell = cell->next )
{
if ( cover != 0 && cell->x > x )
{
@@ -1898,19 +1921,19 @@ typedef ptrdiff_t FT_PtrDist;
/* memory management */
n = ( height * sizeof ( PCell ) + sizeof ( TCell ) - 1 ) / sizeof ( TCell );
- ras.cells = buffer + n;
- ras.max_cells = (FT_PtrDist)( FT_MAX_GRAY_POOL - n );
+ ras.cells = buffer + n;
+ ras.max_cells = (FT_PtrDist)( FT_MAX_GRAY_POOL - n );
ras.cell_limit = ras.cells + ras.max_cells;
- ras.ycells = (PCell*)buffer;
+ ras.ycells = (PCell*)buffer;
- /* Initialize the null cell is at the start of the 'cells' array. */
- /* Note that this requires ras.cell_free initialization to skip */
- /* over the first entry in the array. */
- PCell null_cell = NULL_CELL_PTR(ras);
- null_cell->x = CELL_MAX_X_VALUE;
- null_cell->area = 0;
- null_cell->cover = 0;
- null_cell->next = NULL;;
+ /* Initialize the null cell at the start of the `cells` array. */
+ /* Note that this requires `ras.cell_free` initialization to skip */
+ /* over the first entry in the array. */
+ PCell null_cell = NULL_CELL_PTR( ras );
+ null_cell->x = CELL_MAX_X_VALUE;
+ null_cell->area = 0;
+ null_cell->cover = 0;
+ null_cell->next = NULL;;
for ( y = yMin; y < yMax; )
{
@@ -1928,7 +1951,8 @@ typedef ptrdiff_t FT_PtrDist;
TCoord w;
int error;
- for (w = 0; w < width; ++w)
+
+ for ( w = 0; w < width; ++w )
ras.ycells[w] = null_cell;
ras.cell_free = ras.cells + 1; /* NOTE: Skip over the null cell. */
diff --git a/tests/scripts/download-test-fonts.py b/tests/scripts/download-test-fonts.py
index cab133daf..52b742e22 100755
--- a/tests/scripts/download-test-fonts.py
+++ b/tests/scripts/download-test-fonts.py
@@ -1,8 +1,7 @@
#!/usr/bin/env python3
-"""Download test fonts used by the FreeType regression test programs.
-These will be copied to $FREETYPE/tests/data/ by default.
-"""
+"""Download test fonts used by the FreeType regression test programs. These
+will be copied to $FREETYPE/tests/data/ by default."""
import argparse
import collections
@@ -15,8 +14,8 @@ import zipfile
from typing import Callable, List, Optional, Tuple
-# The list of download items describing the font files to install.
-# Each download item is a dictionary with one of the following schemas:
+# The list of download items describing the font files to install. Each
+# download item is a dictionary with one of the following schemas:
#
# - File item:
#
@@ -28,8 +27,8 @@ from typing import Callable, List, Optional, Tuple
# install_name
# Type: file name string
# Required: No
-# Description: Installation name for the font file, only provided if it
-# must be different from the original URL's basename.
+# Description: Installation name for the font file, only provided if
+# it must be different from the original URL's basename.
#
# hex_digest
# Type: hexadecimal string
@@ -39,7 +38,7 @@ from typing import Callable, List, Optional, Tuple
# - Zip items:
#
# These items correspond to one or more font files that are embedded in a
-# remote zip archive. Each entry has the following fields:
+# remote zip archive. Each entry has the following fields:
#
# zip_url
# Type: URL string.
@@ -52,23 +51,25 @@ from typing import Callable, List, Optional, Tuple
# Description: A list of entries describing a single font file to be
# extracted from the archive
#
-# Apart from that, some schemas are used for dictionaries used inside download
-# items:
+# Apart from that, some schemas are used for dictionaries used inside
+# download items:
#
# - File entries:
#
-# These are dictionaries describing a single font file to extract from an archive.
+# These are dictionaries describing a single font file to extract from an
+# archive.
#
# filename
# Type: file path string
# Required: Yes
-# Description: Path of source file, relative to the archive's top-level directory.
+# Description: Path of source file, relative to the archive's
+# top-level directory.
#
# install_name
# Type: file name string
# Required: No
-# Description: Installation name for the font file, only provided if it must be
-# different from the original filename value.
+# Description: Installation name for the font file; only provided if
+# it must be different from the original filename value.
#
# hex_digest
# Type: hexadecimal string
@@ -90,7 +91,8 @@ _DOWNLOAD_ITEMS = [
def digest_data(data: bytes):
- """Compute the digest of a given input byte string, which are the first 8 bytes of its sha256 hash."""
+ """Compute the digest of a given input byte string, which are the first
+ 8 bytes of its sha256 hash."""
m = hashlib.sha256()
m.update(data)
return m.digest()[:8]
@@ -155,14 +157,16 @@ def extract_file_from_zip_archive(
Args:
archive: Input ZipFile objec.
- archive_name: Archive name or URL, only used to generate a human-readable error
- message.
+ archive_name: Archive name or URL, only used to generate a
+ human-readable error message.
+
filepath: Input filepath in archive.
expected_digest: Optional digest for the file.
Returns:
A new File instance corresponding to the extract file.
Raises:
- ValueError if expected_digest is not None and does not match the extracted file.
+ ValueError if expected_digest is not None and does not match the
+ extracted file.
"""
file = archive.open(filepath)
if expected_digest is not None:
@@ -181,7 +185,8 @@ def _get_and_install_file(
force_download: bool,
get_content: Callable[[], bytes],
) -> bool:
- if not force_download and hex_digest is not None and os.path.exists(install_path):
+ if not force_download and hex_digest is not None \
+ and os.path.exists(install_path):
with open(install_path, "rb") as f:
content: bytes = f.read()
if bytes.fromhex(hex_digest) == digest_data(content):
@@ -200,14 +205,15 @@ def download_and_install_item(
Args:
item: Download item as a dictionary, see above for schema.
install_dir: Installation directory.
- force_download: Set to True to force download and installation, even if
- the font file is already installed with the right content.
+ force_download: Set to True to force download and installation, even
+ if the font file is already installed with the right content.
Returns:
- A list of (install_name, status) tuples, where 'install_name' is the file's
- installation name under 'install_dir', and 'status' is a boolean that is True
- to indicate that the file was downloaded and installed, or False to indicate that
- the file is already installed with the right content.
+ A list of (install_name, status) tuples, where 'install_name' is the
+ file's installation name under 'install_dir', and 'status' is a
+ boolean that is True to indicate that the file was downloaded and
+ installed, or False to indicate that the file is already installed
+ with the right content.
"""
if "file_url" in item:
file_url = item["file_url"]
@@ -284,10 +290,13 @@ def main():
for install_name, status in download_and_install_item(
item, args.install_dir, args.force
):
- print("%s %s" % (install_name, "INSTALLED" if status else "UP-TO-DATE"))
+ print("%s %s" % (install_name,
+ "INSTALLED" if status else "UP-TO-DATE"))
return 0
if __name__ == "__main__":
sys.exit(main())
+
+# EOF