Tri/QuadriLinear changes for lcms2mt.

Add a mechanism so that we can force lcms2mt to use TriLinear (or QuadriLinear) interpolation for specific transforms. This is primarily useful so we can get "best case" transforms which we can use as a baseline for our accelerations. The use of TriLinear/QuadriLinear is governed by a new cmsFLAG bit, passed in to link creation. Unfortunately, these flag bits are not passed all the way down to where we actually need them. To avoid having to add an extra flags argument in many places, I stash the flags in a new field in the cmsContext. This is safe, if not entirely pretty, as each context will only be used in a single thread at a time. If this was a function used for more than just testing, we'd consider doing it in the neater (but more invasive) way.
author: Robin Watts <Robin.Watts@artifex.com> 2019-09-18 14:28:32 +0100
committer: Robin Watts <Robin.Watts@artifex.com> 2019-09-18 17:07:52 +0100
commit: 107aae1f353db3b62f8b714c5100ec7bc180f2fb (patch)
tree: 92f9b28024fa9a973fc31b7021a68effc5fe3aaa /lcms2mt
parent: 0b05dca78aaa456ba7b55b853917a4ae68a6fd38 (diff)
download: ghostpdl-107aae1f353db3b62f8b714c5100ec7bc180f2fb.tar.gz
5 files changed, 171 insertions, 5 deletions
diff --git a/lcms2mt/include/lcms2mt.h b/lcms2mt/include/lcms2mt.h
index d36115d22..6d5ad1aa2 100644
--- a/lcms2mt/include/lcms2mt.h
+++ b/lcms2mt/include/lcms2mt.h
@@ -1644,6 +1644,8 @@ CMSAPI cmsUInt32Number  CMSEXPORT cmsGetSupportedIntents(cmsContext ContextID,
 // Copy alpha channels when transforming
 #define cmsFLAGS_COPY_ALPHA               0x04000000 // Alpha channels are copied on cmsDoTransform()
 
+#define cmsFLAGS_FORCE_LINEARINTERP       0x08000000 // Force slower but more accurate interpolation
+
 // Fine-tune control over number of gridpoints
 #define cmsFLAGS_GRIDPOINTS(n)           (((n) & 0xFF) << 16)
 
diff --git a/lcms2mt/src/cmscnvrt.c b/lcms2mt/src/cmscnvrt.c
index afa3de60d..f29ab5bab 100644
--- a/lcms2mt/src/cmscnvrt.c
+++ b/lcms2mt/src/cmscnvrt.c
@@ -546,6 +546,8 @@ cmsPipeline* DefaultICCintents(cmsContext       ContextID,
     Result = cmsPipelineAlloc(ContextID, 0, 0);
     if (Result == NULL) return NULL;
 
+    ContextID->dwFlags = dwFlags;
+
     CurrentColorSpace = cmsGetColorSpace(ContextID, hProfiles[0]);
 
     for (i=0; i < nProfiles; i++) {
@@ -653,10 +655,14 @@ cmsPipeline* DefaultICCintents(cmsContext       ContextID,
 
     }
 
+    ContextID->dwFlags = 0;
+
     return Result;
 
 Error:
 
+    ContextID->dwFlags = 0;
+
     if (Lut != NULL) cmsPipelineFree(ContextID, Lut);
     if (Result != NULL) cmsPipelineFree(ContextID, Result);
     return NULL;
diff --git a/lcms2mt/src/cmsintrp.c b/lcms2mt/src/cmsintrp.c
index b6846f693..555125e32 100644
--- a/lcms2mt/src/cmsintrp.c
+++ b/lcms2mt/src/cmsintrp.c
@@ -85,6 +85,10 @@ cmsBool  _cmsRegisterInterpPlugin(cmsContext ContextID, cmsPluginBase* Data)
 cmsBool _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p)
 {
     _cmsInterpPluginChunkType* ptr = (_cmsInterpPluginChunkType*) _cmsContextGetClientChunk(ContextID, InterpPlugin);
+    cmsUInt32Number flags = 0;
+
+    if (ContextID->dwFlags & cmsFLAGS_FORCE_LINEARINTERP)
+        flags = CMS_LERP_FLAGS_TRILINEAR;
 
     p ->Interpolation.Lerp16 = NULL;
 
@@ -95,7 +99,7 @@ cmsBool _cmsSetInterpolationRoutine(cmsContext ContextID, cmsInterpParams* p)
     // If unsupported by the plug-in, go for the LittleCMS default.
     // If happens only if an extern plug-in is being used
     if (p ->Interpolation.Lerp16 == NULL)
-        p ->Interpolation = DefaultInterpolatorsFactory(p ->nInputs, p ->nOutputs, p ->dwFlags);
+        p ->Interpolation = DefaultInterpolatorsFactory(p ->nInputs, p ->nOutputs, p ->dwFlags | flags);
 
     // Check for valid interpolator (we just check one member of the union)
     if (p ->Interpolation.Lerp16 == NULL) {
@@ -607,6 +611,149 @@ void TrilinearInterp16(cmsContext ContextID, register const cmsUInt16Number Inpu
 #   undef DENS
 }
 
+static
+void QuadrilinearInterpFloat(cmsContext        ContextID,
+                       const cmsFloat32Number  Input[],
+                             cmsFloat32Number  Output[],
+                       const cmsInterpParams  *p)
+
+{
+    cmsFloat32Number rest;
+    cmsFloat32Number pk;
+    int k0;
+    cmsUInt32Number i, n;
+    cmsFloat32Number Tmp[MAX_STAGE_CHANNELS];
+    cmsInterpParams p1 = *p;
+    cmsFloat32Number i0 = fclamp(Input[0]);
+
+    pk = i0 * p->Domain[0];
+    k0 = _cmsQuickFloor(pk);
+    rest = pk - (cmsFloat32Number) k0;
+
+    memmove(&p1.Domain[0], &p ->Domain[1], 3*sizeof(cmsUInt32Number));
+    p1.Table = ((cmsFloat32Number*) p -> Table) + p -> opta[3] * k0;
+
+    TrilinearInterpFloat(ContextID, Input + 1,  Output, &p1);
+
+    if (i0 == 1.0)
+        return;
+
+    p1.Table = ((cmsFloat32Number*) p1.Table) + p->opta[3];
+    TrilinearInterpFloat(ContextID, Input + 1,  Tmp, &p1);
+
+    n = p -> nOutputs;
+    for (i=0; i < n; i++) {
+        cmsFloat32Number y0 = Output[i];
+        cmsFloat32Number y1 = Tmp[i];
+
+        Output[i] = y0 + (y1 - y0) * rest;
+    }
+}
+
+static CMS_NO_SANITIZE
+void QuadrilinearInterp16(cmsContext       ContextID,
+           register const cmsUInt16Number  Input[],
+           register cmsUInt16Number        Output[],
+           register const cmsInterpParams *p)
+
+{
+#define DENS(i,j,k,l) (LutTable[(i)+(j)+(k)+(l)+OutChan])
+#define LERP(a,l,h)     (cmsUInt16Number) (l + ROUND_FIXED_TO_INT(((h-l)*a)))
+
+           const cmsUInt16Number* LutTable = (cmsUInt16Number*) p ->Table;
+           int                    OutChan, TotalOut;
+           cmsS15Fixed16Number    fx, fy, fz, fk;
+  register int                    rx, ry, rz, rk;
+           int                    x0, y0, z0, k0;
+  register int                    X0, X1, Y0, Y1, Z0, Z1, K0, K1;
+           int                    d0000, d0001, d0010, d0011,
+                                  d0100, d0101, d0110, d0111,
+                                  d1000, d1001, d1010, d1011,
+                                  d1100, d1101, d1110, d1111,
+                                  d000, d001, d010, d011,
+                                  d100, d101, d110, d111,
+                                  dx00, dx01, dx10, dx11,
+                                  dxy0, dxy1, dxyz;
+           cmsUNUSED_PARAMETER(ContextID);
+
+    TotalOut   = p -> nOutputs;
+
+    fx = _cmsToFixedDomain((int) Input[0] * p -> Domain[0]);
+    x0  = FIXED_TO_INT(fx);
+    rx  = FIXED_REST_TO_INT(fx);    // Rest in 0..1.0 domain
+
+
+    fy = _cmsToFixedDomain((int) Input[1] * p -> Domain[1]);
+    y0  = FIXED_TO_INT(fy);
+    ry  = FIXED_REST_TO_INT(fy);
+
+    fz = _cmsToFixedDomain((int) Input[2] * p -> Domain[2]);
+    z0 = FIXED_TO_INT(fz);
+    rz = FIXED_REST_TO_INT(fz);
+
+    fk = _cmsToFixedDomain((int) Input[3] * p -> Domain[3]);
+    k0 = FIXED_TO_INT(fk);
+    rk = FIXED_REST_TO_INT(fk);
+
+
+    X0 = p -> opta[3] * x0;
+    X1 = X0 + (Input[0] == 0xFFFFU ? 0 : p->opta[3]);
+
+    Y0 = p -> opta[2] * y0;
+    Y1 = Y0 + (Input[1] == 0xFFFFU ? 0 : p->opta[2]);
+
+    Z0 = p -> opta[1] * z0;
+    Z1 = Z0 + (Input[2] == 0xFFFFU ? 0 : p->opta[1]);
+
+    K0 = p -> opta[0] * k0;
+    K1 = K0 + (Input[3] == 0xFFFFU ? 0 : p->opta[0]);
+
+    for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+        d0000 = DENS(X0, Y0, Z0, K0);
+        d0001 = DENS(X0, Y0, Z0, K1);
+        d000  = LERP(rk, d0000, d0001);
+        d0010 = DENS(X0, Y0, Z1, K0);
+        d0011 = DENS(X0, Y0, Z1, K1);
+        d001  = LERP(rk, d0010, d0011);
+        d0100 = DENS(X0, Y1, Z0, K0);
+        d0101 = DENS(X0, Y1, Z0, K1);
+        d010  = LERP(rk, d0100, d0101);
+        d0110 = DENS(X0, Y1, Z1, K0);
+        d0111 = DENS(X0, Y1, Z1, K1);
+        d011  = LERP(rk, d0110, d0111);
+
+        d1000 = DENS(X1, Y0, Z0, K0);
+        d1001 = DENS(X1, Y0, Z0, K1);
+        d100  = LERP(rk, d1000, d1001);
+        d1010 = DENS(X1, Y0, Z1, K0);
+        d1011 = DENS(X1, Y0, Z1, K1);
+        d101  = LERP(rk, d1010, d1011);
+        d1100 = DENS(X1, Y1, Z0, K0);
+        d1101 = DENS(X1, Y1, Z0, K1);
+        d110  = LERP(rk, d1100, d1101);
+        d1110 = DENS(X1, Y1, Z1, K0);
+        d1111 = DENS(X1, Y1, Z1, K1);
+        d111  = LERP(rk, d1110, d1111);
+
+        dx00 = LERP(rx, d000, d100);
+        dx01 = LERP(rx, d001, d101);
+        dx10 = LERP(rx, d010, d110);
+        dx11 = LERP(rx, d011, d111);
+
+        dxy0 = LERP(ry, dx00, dx10);
+        dxy1 = LERP(ry, dx01, dx11);
+
+        dxyz = LERP(rz, dxy0, dxy1);
+
+        Output[OutChan] = (cmsUInt16Number) dxyz;
+    }
+
+
+#   undef LERP
+#   undef DENS
+}
+
 
 // Tetrahedral interpolation, using Sakamoto algorithm.
 #define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
@@ -1508,10 +1655,17 @@ cmsInterpFunction DefaultInterpolatorsFactory(cmsUInt32Number nInputChannels, cm
 
            case 4:  // CMYK lut
 
-               if (IsFloat)
-                   Interpolation.LerpFloat =  Eval4InputsFloat;
-               else
-                   Interpolation.Lerp16    =  Eval4Inputs;
+               if (IsTrilinear) {
+                   if (IsFloat)
+                       Interpolation.LerpFloat =  QuadrilinearInterpFloat;
+                   else
+                       Interpolation.Lerp16    =  QuadrilinearInterp16;
+               } else {
+                   if (IsFloat)
+                       Interpolation.LerpFloat =  Eval4InputsFloat;
+                   else
+                       Interpolation.Lerp16    =  Eval4Inputs;
+               }
                break;
 
            case 5: // 5 Inks
diff --git a/lcms2mt/src/cmsopt.c b/lcms2mt/src/cmsopt.c
index 93d78f261..fedb5043f 100644
--- a/lcms2mt/src/cmsopt.c
+++ b/lcms2mt/src/cmsopt.c
@@ -695,8 +695,10 @@ cmsBool OptimizeByResampling(cmsContext ContextID, cmsPipeline** Lut, cmsUInt32N
         }
     }
 
+    ContextID->dwFlags = *dwFlags;
     // Allocate the CLUT
     CLUT = cmsStageAllocCLut16bit(ContextID, nGridPoints, Src ->InputChannels, Src->OutputChannels, NULL);
+    ContextID->dwFlags = 0;
     if (CLUT == NULL) goto Error;
 
     // Add the CLUT to the destination LUT
diff --git a/lcms2mt/src/lcms2_internal.h b/lcms2mt/src/lcms2_internal.h
index bc971fc50..896255921 100644
--- a/lcms2mt/src/lcms2_internal.h
+++ b/lcms2mt/src/lcms2_internal.h
@@ -517,6 +517,8 @@ struct _cmsContext_struct {
                                       // If NULL, then it reverts to global Context0
 
     _cmsMemPluginChunkType DefaultMemoryManager;  // The allocators used for creating the context itself. Cannot be overridden
+
+    cmsUInt32Number dwFlags;          // Horrid, but safe hack
 };
 
 // Returns a pointer to a valid context structure, including the global one if id is zero.
author	Robin Watts <Robin.Watts@artifex.com>	2019-09-18 14:28:32 +0100
committer	Robin Watts <Robin.Watts@artifex.com>	2019-09-18 17:07:52 +0100
commit	107aae1f353db3b62f8b714c5100ec7bc180f2fb (patch)
tree	92f9b28024fa9a973fc31b7021a68effc5fe3aaa /lcms2mt
parent	0b05dca78aaa456ba7b55b853917a4ae68a6fd38 (diff)
download	ghostpdl-107aae1f353db3b62f8b714c5100ec7bc180f2fb.tar.gz