Add Lab -> RGB/CMYK/Gray/Lab kernel to fast float plugin

First try, although it seems to work. Is is more accurate close to neutral axis and less accurate on self-flourescent hyper-saturated colors, which are rare. configure have to be regenerated (it will be wiped out soon)
author: Marti Maria <marti.maria@littlecms.com> 2020-12-02 22:03:40 +0100
committer: Marti Maria <marti.maria@littlecms.com> 2020-12-02 22:03:40 +0100
commit: fb843e69014751aa630188629099c04277135e2d (patch)
tree: 1d18674f81434e52b8e163ca9abf0a7b7b7b9025 /plugins
parent: e2b6e7e06520123a9821f0381877dfe94cefb2e5 (diff)
download: lcms2-fb843e69014751aa630188629099c04277135e2d.tar.gz
8 files changed, 631 insertions, 106 deletions
diff --git a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj
index 40e42b9..a1b6540 100644
--- a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj
+++ b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj
@@ -32,6 +32,7 @@
     <ClCompile Include="..\..\src\fast_float_15mats.c" />
     <ClCompile Include="..\..\src\fast_float_cmyk.c" />
     <ClCompile Include="..\..\src\fast_float_curves.c" />
+    <ClCompile Include="..\..\src\fast_float_lab.c" />
     <ClCompile Include="..\..\src\fast_float_matsh.c" />
     <ClCompile Include="..\..\src\fast_float_separate.c" />
     <ClCompile Include="..\..\src\fast_float_sup.c" />
diff --git a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters
index c2761d1..58fcf43 100644
--- a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters
+++ b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters
@@ -60,6 +60,9 @@
     <ClCompile Include="..\..\src\fast_8_matsh.c">
       <Filter>Source Files</Filter>
     </ClCompile>
+    <ClCompile Include="..\..\src\fast_float_lab.c">
+      <Filter>Source Files</Filter>
+    </ClCompile>
   </ItemGroup>
   <ItemGroup>
     <None Include="..\..\COPYING.GPL3">
diff --git a/plugins/fast_float/src/Makefile.am b/plugins/fast_float/src/Makefile.am
index 0ef9728..723dfea 100644
--- a/plugins/fast_float/src/Makefile.am
+++ b/plugins/fast_float/src/Makefile.am
@@ -23,5 +23,5 @@ liblcms2_fast_float_la_LIBADD = $(LCMS_LIB_DEPLIBS) $(top_builddir)/src/liblcms2
 
 liblcms2_fast_float_la_SOURCES = fast_8_curves.c fast_8_matsh_sse.c fast_8_matsh.c fast_8_tethra.c \
   fast_16_tethra.c fast_float_15bits.c fast_float_15mats.c fast_float_cmyk.c fast_float_curves.c fast_float_matsh.c  \
-  fast_float_separate.c fast_float_sup.c fast_float_tethra.c fast_float_internal.h
+  fast_float_separate.c fast_float_sup.c fast_float_tethra.c fast_float_lab.c fast_float_internal.h
 
diff --git a/plugins/fast_float/src/fast_float_internal.h b/plugins/fast_float/src/fast_float_internal.h
index c43e8f9..43de02d 100644
--- a/plugins/fast_float/src/fast_float_internal.h
+++ b/plugins/fast_float/src/fast_float_internal.h
@@ -200,20 +200,20 @@ cmsBool Optimize8ByJoiningCurves(_cmsTransform2Fn* TransformFn,
                                  cmsUInt32Number* dwFlags);
 
 cmsBool OptimizeFloatByJoiningCurves(_cmsTransform2Fn* TransformFn,                                  
-                                void** UserData,
-                                _cmsFreeUserDataFn* FreeUserData,
-                                cmsPipeline** Lut, 
-                                cmsUInt32Number* InputFormat, 
-                                cmsUInt32Number* OutputFormat, 
-                                cmsUInt32Number* dwFlags);    
+                                   void** UserData,
+                                   _cmsFreeUserDataFn* FreeUserData,
+                                   cmsPipeline** Lut, 
+                                   cmsUInt32Number* InputFormat, 
+                                   cmsUInt32Number* OutputFormat, 
+                                   cmsUInt32Number* dwFlags);    
 
 cmsBool OptimizeFloatMatrixShaper(_cmsTransform2Fn* TransformFn,                                  
-                             void** UserData,
-                             _cmsFreeUserDataFn* FreeUserData,
-                             cmsPipeline** Lut, 
-                             cmsUInt32Number* InputFormat, 
-                             cmsUInt32Number* OutputFormat, 
-                             cmsUInt32Number* dwFlags);
+                                   void** UserData,
+                                   _cmsFreeUserDataFn* FreeUserData,
+                                   cmsPipeline** Lut, 
+                                   cmsUInt32Number* InputFormat, 
+                                   cmsUInt32Number* OutputFormat, 
+                                   cmsUInt32Number* dwFlags);
 
 cmsBool Optimize8BitRGBTransform(_cmsTransform2Fn* TransformFn,
                                    void** UserData,
@@ -240,11 +240,21 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn,
                                   cmsUInt32Number* dwFlags);      
 
 cmsBool OptimizeCLUTCMYKTransform(_cmsTransform2Fn* TransformFn,
-					void** UserData,
-					_cmsFreeUserDataFn* FreeDataFn,
-					cmsPipeline** Lut,
-					cmsUInt32Number* InputFormat,
-					cmsUInt32Number* OutputFormat,
-					cmsUInt32Number* dwFlags);
+					              void** UserData,
+					              _cmsFreeUserDataFn* FreeDataFn,
+					              cmsPipeline** Lut,
+					              cmsUInt32Number* InputFormat,
+					              cmsUInt32Number* OutputFormat,
+					              cmsUInt32Number* dwFlags);
+
+
+cmsBool OptimizeCLUTLabTransform(_cmsTransform2Fn* TransformFn,
+                                 void** UserData,
+                                 _cmsFreeUserDataFn* FreeDataFn,
+                                 cmsPipeline** Lut, 
+                                 cmsUInt32Number* InputFormat, 
+                                 cmsUInt32Number* OutputFormat, 
+                                 cmsUInt32Number* dwFlags);      
+
 
 #endif
diff --git a/plugins/fast_float/src/fast_float_lab.c b/plugins/fast_float/src/fast_float_lab.c
new file mode 100644
index 0000000..da2ddb7
--- /dev/null
+++ b/plugins/fast_float/src/fast_float_lab.c
@@ -0,0 +1,428 @@
+//---------------------------------------------------------------------------------
+//
+//  Little Color Management System, fast floating point extensions
+//  Copyright (c) 1998-2020 Marti Maria Saguer, all rights reserved
+//
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+// 
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+// GNU General Public License for more details.
+// 
+// You should have received a copy of the GNU General Public License
+// along with this program.  If not, see <http://www.gnu.org/licenses/>.
+//
+//---------------------------------------------------------------------------------
+
+#include "fast_float_internal.h"
+
+
+#define SIGMOID_POINTS 1024
+
+// Optimization for floating point tetrahedral interpolation  using Lab as indexing space
+typedef struct {
+
+    cmsContext ContextID;
+    const cmsInterpParams* p;   // Tetrahedrical interpolation parameters. This is a not-owned pointer.
+
+    cmsFloat32Number sigmoidIn[SIGMOID_POINTS];   // to apply to a*/b* axis on indexing
+    cmsFloat32Number sigmoidOut[SIGMOID_POINTS];  // the curve above, inverted.
+
+} LabCLUTdata;
+
+
+typedef struct {
+
+    LabCLUTdata* data;
+    cmsPipeline* original;
+
+} ResamplingContainer;
+
+/**
+* Predefined tone curve
+*/
+#define TYPE_SIGMOID  109
+
+
+// Floating-point version of 1D interpolation
+cmsINLINE cmsFloat32Number LinLerp1D(cmsFloat32Number Value, const cmsFloat32Number* LutTable)
+{
+    if (Value >= 1.0f)
+    {
+        return LutTable[SIGMOID_POINTS - 1];
+    }
+    else
+        if (Value <= 0)
+        {
+            return LutTable[0];
+        }
+        else
+        {
+            cmsFloat32Number y1, y0;
+            cmsFloat32Number rest;
+            int cell0, cell1;
+
+            Value *= (SIGMOID_POINTS - 1);
+
+            cell0 = _cmsQuickFloor(Value);
+            cell1 = cell0 + 1;
+
+            rest = Value - cell0;
+
+            y0 = LutTable[cell0];
+            y1 = LutTable[cell1];
+
+            return y0 + (y1 - y0) * rest;
+        }
+}
+
+static
+void tabulateSigmoid(cmsContext ContextID, cmsInt32Number type, cmsFloat32Number table[], cmsInt32Number tablePoints)
+{
+    const cmsFloat64Number sigmoidal_slope = 2.5;
+    cmsToneCurve* original;
+    cmsInt32Number i;
+    
+    memset(table, 0, sizeof(cmsFloat32Number) * tablePoints);
+    original = cmsBuildParametricToneCurve(ContextID, type, &sigmoidal_slope);
+    if (original != NULL)
+    {
+        for (i = 0; i < tablePoints; i++)
+        {
+            cmsFloat32Number v = (cmsFloat32Number)i / (cmsFloat32Number)(tablePoints - 1);
+
+            table[i] = fclamp(cmsEvalToneCurveFloat(original, v));
+        }
+     
+        cmsFreeToneCurve(original);
+    }
+}
+
+
+// Allocates container and curves
+static
+LabCLUTdata* LabCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p)
+{
+    LabCLUTdata* fd;
+    
+    fd = (LabCLUTdata*) _cmsMallocZero(ContextID, sizeof(LabCLUTdata));
+    if (fd == NULL) return NULL;
+    
+    fd ->ContextID = ContextID;
+    fd ->p = p;
+    
+    tabulateSigmoid(ContextID, +TYPE_SIGMOID, fd->sigmoidIn, SIGMOID_POINTS);     
+    tabulateSigmoid(ContextID, -TYPE_SIGMOID, fd->sigmoidOut, SIGMOID_POINTS);    
+
+    return fd;
+}
+
+static
+void LabCLUTFree(cmsContext ContextID, void* v)
+{    
+    _cmsFree(ContextID, v);
+}
+
+// Sampler implemented by another LUT. 
+static
+int XFormSampler(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], CMSREGISTER void* Cargo)
+{
+    ResamplingContainer* container = (ResamplingContainer*)Cargo;
+    cmsFloat32Number linearized[3];
+
+    // Apply inverse sigmoid
+    linearized[0] = In[0];
+    linearized[1] = LinLerp1D(In[1], container->data->sigmoidOut);
+    linearized[2] = LinLerp1D(In[2], container->data->sigmoidOut);
+
+    cmsPipelineEvalFloat(linearized, Out, container->original);    
+    return TRUE;
+}
+
+// A optimized interpolation for Lab.
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+
+static
+void LabCLUTEval(struct _cmstransform_struct* CMMcargo,
+                        const void* Input,
+                        void* Output,
+                        cmsUInt32Number PixelsPerLine,
+                        cmsUInt32Number LineCount,
+                        const cmsStride* Stride)
+
+{
+
+    LabCLUTdata* pfloat = (LabCLUTdata*)_cmsGetTransformUserData(CMMcargo);
+
+    cmsFloat32Number        l, a, b;
+    cmsFloat32Number        px, py, pz;
+    int                     x0, y0, z0;
+    int                     X0, Y0, Z0, X1, Y1, Z1;
+    cmsFloat32Number        rx, ry, rz;
+    cmsFloat32Number        c0, c1 = 0, c2 = 0, c3 = 0;
+    cmsUInt32Number         OutChan;
+
+    const cmsInterpParams* p = pfloat->p;
+    cmsUInt32Number        TotalOut = p->nOutputs;
+    cmsUInt32Number        TotalPlusAlpha;
+    const cmsFloat32Number* LutTable = (const cmsFloat32Number*)p->Table;
+
+    cmsUInt32Number       i, ii;
+    const cmsUInt8Number* lin;
+    const cmsUInt8Number* ain;
+    const cmsUInt8Number* bin;
+    const cmsUInt8Number* xin = NULL;
+
+    cmsUInt8Number* out[cmsMAXCHANNELS];
+    cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS];
+    cmsUInt32Number SourceIncrements[cmsMAXCHANNELS];
+    cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS];
+    cmsUInt32Number DestIncrements[cmsMAXCHANNELS];
+
+    cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo);
+    cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo);
+
+    cmsUInt32Number nchans, nalpha;
+    cmsUInt32Number strideIn, strideOut;
+                                                        
+    _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements);
+    _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements);
+
+    if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA))
+        nalpha = 0;
+
+    strideIn = strideOut = 0;
+    for (i = 0; i < LineCount; i++) {
+
+        lin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn;
+        ain = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn;
+        bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn;
+
+        if (nalpha)
+            xin = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn;
+
+        TotalPlusAlpha = TotalOut;
+        if (xin) TotalPlusAlpha++;
+
+        for (ii = 0; ii < TotalPlusAlpha; ii++)
+            out[ii] = (cmsUInt8Number*)Output + DestStartingOrder[ii] + strideOut;
+
+        for (ii = 0; ii < PixelsPerLine; ii++) {
+
+            // Decode Lab and go across sigmoids on a*/b*
+            l = fclamp((*(cmsFloat32Number*)lin) / 100.0f);
+            a = LinLerp1D(((*(cmsFloat32Number*)ain) + 128.0f) / 255.0f, pfloat->sigmoidIn);
+            b = LinLerp1D(((*(cmsFloat32Number*)bin) + 128.0f) / 255.0f, pfloat->sigmoidIn);
+
+            lin += SourceIncrements[0];
+            ain += SourceIncrements[1];
+            bin += SourceIncrements[2];
+
+            px = l * p->Domain[0];
+            py = a * p->Domain[1];
+            pz = b * p->Domain[2];
+            
+            x0 = _cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0);
+            y0 = _cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0);
+            z0 = _cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0);
+            
+            X0 = p->opta[2] * x0;
+            X1 = X0 + (l >= 1.0f ? 0 : p->opta[2]);
+
+            Y0 = p->opta[1] * y0;
+            Y1 = Y0 + (a >= 1.0f ? 0 : p->opta[1]);
+
+            Z0 = p->opta[0] * z0;
+            Z1 = Z0 + (b >= 1.0f ? 0 : p->opta[0]);
+
+            for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+                // These are the 6 Tetrahedral
+
+                c0 = DENS(X0, Y0, Z0);
+
+                if (rx >= ry && ry >= rz) {
+
+                    c1 = DENS(X1, Y0, Z0) - c0;
+                    c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+                    c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                }
+                else
+                    if (rx >= rz && rz >= ry) {
+
+                        c1 = DENS(X1, Y0, Z0) - c0;
+                        c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                        c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+                    }
+                    else
+                        if (rz >= rx && rx >= ry) {
+
+                            c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+                            c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+                            c3 = DENS(X0, Y0, Z1) - c0;
+
+                        }
+                        else
+                            if (ry >= rx && rx >= rz) {
+
+                                c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+                                c2 = DENS(X0, Y1, Z0) - c0;
+                                c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+                            }
+                            else
+                                if (ry >= rz && rz >= rx) {
+
+                                    c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                    c2 = DENS(X0, Y1, Z0) - c0;
+                                    c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+                                }
+                                else
+                                    if (rz >= ry && ry >= rx) {
+
+                                        c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+                                        c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+                                        c3 = DENS(X0, Y0, Z1) - c0;
+
+                                    }
+                                    else {
+                                        c1 = c2 = c3 = 0;
+                                    }
+
+                *(cmsFloat32Number*)(out[OutChan]) = c0 + c1 * rx + c2 * ry + c3 * rz;
+
+                out[OutChan] += DestIncrements[OutChan];
+            }
+
+            if (xin)
+                *out[TotalOut] = *xin;
+        }
+
+        strideIn  += Stride->BytesPerLineIn;
+        strideOut += Stride->BytesPerLineOut;
+    }
+}
+
+#undef DENS
+
+
+/**
+* Get from flags
+*/
+static
+int GetGridpoints(cmsUInt32Number dwFlags)
+{
+    // Already specified?
+    if (dwFlags & 0x00FF0000) {
+        return (dwFlags >> 16) & 0xFF;
+    }
+
+    // HighResPrecalc is maximum resolution
+    if (dwFlags & cmsFLAGS_HIGHRESPRECALC) {
+        return 66;
+    }
+    else
+        // LowResPrecal is lower resolution
+        if (dwFlags & cmsFLAGS_LOWRESPRECALC) {
+            return 33;
+        }
+        else 
+            return 51;
+
+}
+
+// --------------------------------------------------------------------------------------------------------------
+
+cmsBool OptimizeCLUTLabTransform(_cmsTransform2Fn* TransformFn,
+                                  void** UserData,
+                                  _cmsFreeUserDataFn* FreeDataFn,
+                                  cmsPipeline** Lut, 
+                                  cmsUInt32Number* InputFormat, 
+                                  cmsUInt32Number* OutputFormat, 
+                                  cmsUInt32Number* dwFlags)      
+{
+    cmsPipeline* OriginalLut;
+    int nGridPoints;    
+    cmsPipeline* OptimizedLUT = NULL;    
+    cmsStage* OptimizedCLUTmpe;
+    cmsColorSpaceSignature OutputColorSpace;    
+    cmsStage* mpe;
+    LabCLUTdata* pfloat;
+    cmsContext ContextID;
+    _cmsStageCLutData* data;
+    ResamplingContainer container;
+
+
+    // For empty transforms, do nothing
+    if (*Lut == NULL) return FALSE;
+
+    // Check for floating point only
+    if (!T_FLOAT(*InputFormat) || !T_FLOAT(*OutputFormat)) return FALSE;
+
+    // Only on floats
+    if (T_BYTES(*InputFormat) != sizeof(cmsFloat32Number) ||
+        T_BYTES(*OutputFormat) != sizeof(cmsFloat32Number)) return FALSE;
+
+    if (T_COLORSPACE(*InputFormat) != PT_Lab) return FALSE;
+
+    OriginalLut = *Lut;
+
+    // Named color pipelines cannot be optimized either
+    for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut);
+        mpe != NULL;
+        mpe = cmsStageNext(mpe)) {
+        if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
+    }
+
+    ContextID = cmsGetPipelineContextID(OriginalLut);
+    OutputColorSpace = _cmsICCcolorSpace(T_COLORSPACE(*OutputFormat));
+    nGridPoints = GetGridpoints(*dwFlags);
+             
+    // Create the result LUT
+    OptimizedLUT = cmsPipelineAlloc(cmsGetPipelineContextID(OriginalLut), 3, cmsPipelineOutputChannels(OriginalLut));
+    if (OptimizedLUT == NULL) goto Error;
+    
+    // Allocate the CLUT for result
+    OptimizedCLUTmpe = cmsStageAllocCLutFloat(ContextID, nGridPoints, 3, cmsPipelineOutputChannels(OriginalLut), NULL);
+
+    // Add the CLUT to the destination LUT
+    cmsPipelineInsertStage(OptimizedLUT, cmsAT_BEGIN, OptimizedCLUTmpe);
+    
+    // Set the evaluator, copy parameters   
+    data = (_cmsStageCLutData*) cmsStageData(OptimizedCLUTmpe);
+
+    // Allocate data
+    pfloat = LabCLUTAlloc(ContextID, data ->Params);
+    if (pfloat == NULL) return FALSE;   
+
+    container.data = pfloat;
+    container.original = OriginalLut;
+
+    // Resample the LUT
+    if (!cmsStageSampleCLutFloat(OptimizedCLUTmpe, XFormSampler, (void*)&container, 0)) goto Error;
+
+    // And return the obtained LUT
+    cmsPipelineFree(OriginalLut);
+
+    *Lut = OptimizedLUT;
+    *TransformFn = LabCLUTEval;
+    *UserData   = pfloat;
+    *FreeDataFn = LabCLUTFree;
+    *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER;
+    return TRUE;
+
+Error:
+      
+    if (OptimizedLUT != NULL) cmsPipelineFree(OptimizedLUT);
+
+    return FALSE;    
+}
+
diff --git a/plugins/fast_float/src/fast_float_sup.c b/plugins/fast_float/src/fast_float_sup.c
index d83a7ff..89edf41 100644
--- a/plugins/fast_float/src/fast_float_sup.c
+++ b/plugins/fast_float/src/fast_float_sup.c
@@ -33,39 +33,41 @@ cmsBool Floating_Point_Transforms_Dispatcher(_cmsTransform2Fn* TransformFn,
                                   cmsUInt32Number* OutputFormat, 
                                   cmsUInt32Number* dwFlags) 
 {
-	
-       // Try to optimize as a set of curves plus a matrix plus a set of curves
-       if (OptimizeMatrixShaper15(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
-       // Try to optimize by joining curves
-       if (Optimize8ByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to optimize as a set of curves plus a matrix plus a set of curves
+    if (OptimizeMatrixShaper15(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+
+    // Try to optimize by joining curves
+    if (Optimize8ByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
 #ifndef CMS_DONT_USE_SSE2
-       // Try to use SSE2 to optimize as a set of curves plus a matrix plus a set of curves
-       if (Optimize8MatrixShaperSSE(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to use SSE2 to optimize as a set of curves plus a matrix plus a set of curves
+    if (Optimize8MatrixShaperSSE(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 #endif
-       // Try to optimize as a set of curves plus a matrix plus a set of curves
-       if (Optimize8MatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to optimize as a set of curves plus a matrix plus a set of curves
+    if (Optimize8MatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+
+    // Try to optimize by joining curves
+    if (OptimizeFloatByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
-       // Try to optimize by joining curves
-       if (OptimizeFloatByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to optimize as a set of curves plus a matrix plus a set of curves
+    if (OptimizeFloatMatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
-       // Try to optimize as a set of curves plus a matrix plus a set of curves
-       if (OptimizeFloatMatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to optimize using prelinearization plus tetrahedral
+    if (Optimize8BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
-       // Try to optimize using prelinearization plus tetrahedral
-       if (Optimize8BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to optimize using prelinearization plus tetrahedral
+    if (Optimize16BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
-       // Try to optimize using prelinearization plus tetrahedral
-       if (Optimize16BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to optimize using prelinearization plus tetrahedral
+    if (OptimizeCLUTRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
-       // Try to optimize using prelinearization plus tetrahedral
-       if (OptimizeCLUTRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to optimize using prelinearization plus tetrahedral
+    if (OptimizeCLUTCMYKTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
-       // Try to optimize using prelinearization plus tetrahedral
-       if (OptimizeCLUTCMYKTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+    // Try to optimize for Lab float as input
+    if (OptimizeCLUTLabTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
 
-      
     // Cannot optimize, use lcms normal process 
     return FALSE;
 }
diff --git a/plugins/fast_float/src/fast_float_tethra.c b/plugins/fast_float/src/fast_float_tethra.c
index e2ece0e..4e54214 100644
--- a/plugins/fast_float/src/fast_float_tethra.c
+++ b/plugins/fast_float/src/fast_float_tethra.c
@@ -27,11 +27,9 @@ typedef struct {
     cmsContext ContextID;
     const cmsInterpParams* p;   // Tetrahedrical interpolation parameters. This is a not-owned pointer.
 
-    cmsBool isLab;
-
 } FloatCLUTData;
 
-// Precomputes tables for 8-bit on input devicelink. 
+// Allocates container
 static
 FloatCLUTData* FloatCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p)
 {
@@ -42,21 +40,16 @@ FloatCLUTData* FloatCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p)
     
     fd ->ContextID = ContextID;
     fd ->p = p;
-    fd->isLab = FALSE;
-
+    
     return fd;
 }
 
 
-// Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for 
-// almost any transform. We use floating point precision and then convert from floating point to 16 bits.
+// Sampler implemented by another LUT. 
 static
 int XFormSampler(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], CMSREGISTER void* Cargo)
-{
-    // Evaluate in 16 bits
+{    
     cmsPipelineEvalFloat(In, Out, (cmsPipeline*) Cargo);
-
-    // Always succeed
     return TRUE;
 }
 
@@ -129,21 +122,9 @@ void FloatCLUTEval(struct _cmstransform_struct* CMMcargo,
 
         for (ii = 0; ii < PixelsPerLine; ii++) {
 
-#if 0
-            if (pfloat->isLab)
-            {
-                r = fclamp(*(cmsFloat32Number*)rin / 100.0f);                
-                g = fclamp((*(cmsFloat32Number*)gin + 128.0f) / 255.0f);
-                b = fclamp((*(cmsFloat32Number*)bin + 128.0f) / 255.0f);
-            }
-            else
-#endif
-
-            {
-                r = fclamp(*(cmsFloat32Number*)rin);
-                g = fclamp(*(cmsFloat32Number*)gin);
-                b = fclamp(*(cmsFloat32Number*)bin);
-            }
+            r = fclamp(*(cmsFloat32Number*)rin);
+            g = fclamp(*(cmsFloat32Number*)gin);
+            b = fclamp(*(cmsFloat32Number*)bin);
 
             rin += SourceIncrements[0];
             gin += SourceIncrements[1];
@@ -153,9 +134,9 @@ void FloatCLUTEval(struct _cmstransform_struct* CMMcargo,
             py = g * p->Domain[1];
             pz = b * p->Domain[2];
             
-            x0 = (int)_cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0);
-            y0 = (int)_cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0);
-            z0 = (int)_cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0);
+            x0 = _cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0);
+            y0 = _cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0);
+            z0 = _cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0);
             
 
             X0 = p->opta[2] * x0;
@@ -272,13 +253,8 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn,
     if (T_BYTES(*InputFormat) != sizeof(cmsFloat32Number) || 
         T_BYTES(*OutputFormat) != sizeof(cmsFloat32Number)) return FALSE;
 
-#if 0
-    if (T_COLORSPACE(*InputFormat)  != PT_RGB && 
-        T_COLORSPACE(*InputFormat)  != PT_Lab) return FALSE;
-#else
     // Input has to be RGB, Output may be any
     if (T_COLORSPACE(*InputFormat) != PT_RGB) return FALSE;
-#endif
 
     OriginalLut = *Lut;
 
@@ -312,14 +288,6 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn,
     pfloat = FloatCLUTAlloc(ContextID, data ->Params);
     if (pfloat == NULL) return FALSE;   
 
-#if 0
-    // For Lab
-    if (T_COLORSPACE(*InputFormat) == PT_Lab)
-    {
-        pfloat->isLab = TRUE;
-    }
-#endif
-
     // And return the obtained LUT
     cmsPipelineFree(OriginalLut);
 
diff --git a/plugins/fast_float/testbed/fast_float_testbed.c b/plugins/fast_float/testbed/fast_float_testbed.c
index 6a46853..e827a4e 100644
--- a/plugins/fast_float/testbed/fast_float_testbed.c
+++ b/plugins/fast_float/testbed/fast_float_testbed.c
@@ -933,49 +933,84 @@ void CheckConversionFloat(void)
        printf("Ok\n");
 }
 
-#if 0
+
+static
+cmsBool ValidFloat2(cmsFloat32Number a, cmsFloat32Number b)
+{
+    return fabsf(a - b) < 0.007;
+}
+
+
+static
+cmsFloat32Number distance(cmsFloat32Number rgb1[], cmsFloat32Number rgb2[])
+{
+    cmsFloat32Number dr = rgb2[0] - rgb1[0];
+    cmsFloat32Number dg = rgb2[1] - rgb1[1];
+    cmsFloat32Number db = rgb2[2] - rgb1[2];
+
+    return dr * dr + dg * dg + db * db;
+}
+
 static
 void CheckLab2RGB(void)
 {
     cmsHPROFILE hLab = cmsCreateLab4Profile(NULL);
-    cmsHPROFILE hXYZ = cmsOpenProfileFromFile("test3.icc", "r");
+    cmsHPROFILE hRGB = cmsOpenProfileFromFile("test3.icc", "r");
     cmsContext noPlugin = cmsCreateContext(0, 0);
 
-    cmsHTRANSFORM hXformNoPlugin = cmsCreateTransformTHR(noPlugin, hLab, TYPE_Lab_FLT, hXYZ, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE| cmsFLAGS_FORCE_CLUT| cmsFLAGS_GRIDPOINTS(63));
-    cmsHTRANSFORM hXformPlugin = cmsCreateTransformTHR(0, hLab, TYPE_Lab_FLT, hXYZ, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE | cmsFLAGS_FORCE_CLUT | cmsFLAGS_GRIDPOINTS(63));
+    cmsHTRANSFORM hXformNoPlugin = cmsCreateTransformTHR(noPlugin, hLab, TYPE_Lab_FLT, hRGB, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE);
+    cmsHTRANSFORM hXformPlugin = cmsCreateTransformTHR(0, hLab, TYPE_Lab_FLT, hRGB, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE);
 
-    cmsFloat32Number Lab[3], XYZ[3], XYZ2[3];
+    cmsFloat32Number Lab[3], RGB[3], RGB2[3];
 
-    cmsFloat32Number L, a, b;
+    cmsFloat32Number maxInside = 0, maxOutside = 0, L, a, b;
 
     printf("Checking Lab -> RGB...");
-    for (L = 0; L <= 100; L++)
+    for (L = 4; L <= 100; L++)
     {
-        for (a = -127; a < 128; a++)
-            for (b = -127; b < 128; b++)
+        for (a = -30; a < +30; a++)
+            for (b = -30; b < +30; b++)
             {
+                cmsFloat32Number d;
+
                 Lab[0] = L; Lab[1] = a; Lab[2] = b;
-                cmsDoTransform(hXformNoPlugin, Lab, XYZ, 1);
-                cmsDoTransform(hXformPlugin, Lab, XYZ2, 1);
+                cmsDoTransform(hXformNoPlugin, Lab, RGB, 1);
+                cmsDoTransform(hXformPlugin, Lab, RGB2, 1);
 
-                if (!ValidFloatLab(XYZ[0], XYZ2[0]) ||
-                    !ValidFloatLab(XYZ[1], XYZ2[1]) ||
-                    !ValidFloatLab(XYZ[2], XYZ2[2]))
-                {
-                    Fail("Lab to RGB Error!");
-                }
+                d = distance(RGB, RGB2);
+                if (d > maxInside)
+                    maxInside = d;                   
+            }
+    }
+
+
+    for (L = 1; L <= 100; L += 5)
+    {
+        for (a = -100; a < +100; a += 5)
+            for (b = -100; b < +100; b += 5)
+            {
+                cmsFloat32Number d;
 
+                Lab[0] = L; Lab[1] = a; Lab[2] = b;
+                cmsDoTransform(hXformNoPlugin, Lab, RGB, 1);
+                cmsDoTransform(hXformPlugin, Lab, RGB2, 1);
+
+                d = distance(RGB, RGB2);
+                if (d > maxOutside)
+                    maxOutside = d;
             }
 
     }
 
+
+    printf("Max distance: Inside gamut %f, Outside gamut %f\n", sqrtf(maxInside), sqrtf(maxOutside));
+
     cmsDeleteTransform(hXformNoPlugin);
     cmsDeleteTransform(hXformPlugin);
 
-    cmsDeleteContext(noPlugin);
-    printf("Ok\n");
+    cmsDeleteContext(noPlugin);    
 }
-#endif
+
 
 
 
@@ -1639,11 +1674,83 @@ cmsFloat64Number SpeedTestFloatCMYK(cmsContext ct, cmsHPROFILE hlcmsProfileIn, c
 
 
 static
+cmsFloat64Number SpeedTestFloatLab(cmsContext ct, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut)
+{
+    cmsInt32Number j;
+    clock_t atime;
+    cmsFloat64Number diff;
+    cmsHTRANSFORM hlcmsxform;
+    void* In;
+    cmsUInt32Number size, Mb;
+    cmsUInt32Number  outFormatter = 0;
+    cmsFloat64Number seconds;
+    cmsFloat32Number L, a, b;
+    Scanline_LabFloat* fill;
+
+
+    if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+        Fail("Unable to open profiles");
+
+
+    if (cmsGetColorSpace(hlcmsProfileIn) != cmsSigLabData)
+    {
+        Fail("Invalid colorspace");
+    }
+
+    switch (cmsGetColorSpace(hlcmsProfileOut))
+    {
+    case cmsSigRgbData:  outFormatter = TYPE_RGB_FLT; break;
+    case cmsSigLabData:  outFormatter = TYPE_Lab_FLT; break;
+    case cmsSigXYZData:  outFormatter = TYPE_XYZ_FLT; break;
+
+    default:
+        Fail("Invalid colorspace");
+    }
+
+    hlcmsxform = cmsCreateTransformTHR(ct, hlcmsProfileIn, TYPE_Lab_FLT, hlcmsProfileOut, outFormatter, INTENT_PERCEPTUAL, cmsFLAGS_NOCACHE);
+    cmsCloseProfile(hlcmsProfileIn);
+    cmsCloseProfile(hlcmsProfileOut);
+
+    j = 0;
+
+    size = 100 * 256 * 256;
+    Mb = size * sizeof(Scanline_LabFloat);
+    In = malloc(Mb);
+    fill = (Scanline_LabFloat*)In;
+
+    for (L = 0; L < 100; L++)
+        for (a = -127.0; a < 127.0; a++)
+            for (b = -127.0; b < +127.0; b++) {
+
+                fill[j].L = L;
+                fill[j].a = a;
+                fill[j].b = b;
+
+                j++;
+            }
+    
+
+    atime = clock();
+
+    cmsDoTransform(hlcmsxform, In, In, size);
+
+    diff = clock() - atime;
+    free(In);
+
+    cmsDeleteTransform(hlcmsxform);
+
+    seconds = (cmsFloat64Number)diff / (cmsFloat64Number)CLOCKS_PER_SEC;
+    return ((cmsFloat64Number)size) / (1024.0 * 1024.0 * seconds);
+}
+
+
+
+static
 void SpeedTestFloat(void)
 {
        cmsContext noPlugin = cmsCreateContext(0, 0);
        
-       cmsFloat64Number t[10];
+       cmsFloat64Number t[10] = { 0 };
 
        printf("\n\n");
        printf("P E R F O R M A N C E   T E S T S   F L O A T  (D E F A U L T)\n");
@@ -1658,7 +1765,8 @@ void SpeedTestFloat(void)
        t[4] = Performance("Floating point on RGB->Lab       ", SpeedTestFloatRGB, noPlugin, "test5.icc", "*lab",      sizeof(Scanline_rgbFloat), 0);
        t[5] = Performance("Floating point on RGB->XYZ       ", SpeedTestFloatRGB, noPlugin, "test3.icc", "*xyz",      sizeof(Scanline_rgbFloat), 0);
        t[6] = Performance("Floating point on CMYK->CMYK     ", SpeedTestFloatCMYK, noPlugin, "test1.icc", "test2.icc",sizeof(Scanline_cmykFloat), 0);
-   
+       t[7] = Performance("Floating point on Lab->RGB       ", SpeedTestFloatLab,  noPlugin, "*lab",     "test3.icc", sizeof(Scanline_LabFloat), 0);
+
 
        // Note that context 0 has the plug-in installed
 
@@ -1675,7 +1783,8 @@ void SpeedTestFloat(void)
        Performance("Floating point on RGB->Lab       ", SpeedTestFloatRGB, 0, "test5.icc", "*lab",      sizeof(Scanline_rgbFloat), t[4]);
        Performance("Floating point on RGB->XYZ       ", SpeedTestFloatRGB, 0, "test3.icc", "*xyz",      sizeof(Scanline_rgbFloat), t[5]);
        Performance("Floating point on CMYK->CMYK     ", SpeedTestFloatCMYK, 0, "test1.icc", "test2.icc", sizeof(Scanline_cmykFloat), t[6]);
-      
+       Performance("Floating point on Lab->RGB       ", SpeedTestFloatLab,  0, "*lab",      "test3.icc", sizeof(Scanline_LabFloat), t[7]);
+
        cmsDeleteContext(noPlugin);
 }
 
@@ -2030,6 +2139,7 @@ int main()
        cmsPlugin(cmsFastFloatExtensions());
        printf("done.\n\n");
        
+       
        CheckComputeIncrements();
 
        // 15 bit functionality
@@ -2039,6 +2149,9 @@ int main()
        // 16 bits functionality
        CheckAccuracy16Bits();
 
+       // Lab to whatever
+       CheckLab2RGB();
+
        // Change format
        CheckChangeFormat();
author	Marti Maria <marti.maria@littlecms.com>	2020-12-02 22:03:40 +0100
committer	Marti Maria <marti.maria@littlecms.com>	2020-12-02 22:03:40 +0100
commit	fb843e69014751aa630188629099c04277135e2d (patch)
tree	1d18674f81434e52b8e163ca9abf0a7b7b7b9025 /plugins
parent	e2b6e7e06520123a9821f0381877dfe94cefb2e5 (diff)
download	lcms2-fb843e69014751aa630188629099c04277135e2d.tar.gz