summaryrefslogtreecommitdiff
path: root/plugins
diff options
context:
space:
mode:
authorMarti Maria <marti.maria@littlecms.com>2020-12-02 22:03:40 +0100
committerMarti Maria <marti.maria@littlecms.com>2020-12-02 22:03:40 +0100
commitfb843e69014751aa630188629099c04277135e2d (patch)
tree1d18674f81434e52b8e163ca9abf0a7b7b7b9025 /plugins
parente2b6e7e06520123a9821f0381877dfe94cefb2e5 (diff)
downloadlcms2-fb843e69014751aa630188629099c04277135e2d.tar.gz
Add Lab -> RGB/CMYK/Gray/Lab kernel to fast float plugin
First try, although it seems to work. Is is more accurate close to neutral axis and less accurate on self-flourescent hyper-saturated colors, which are rare. configure have to be regenerated (it will be wiped out soon)
Diffstat (limited to 'plugins')
-rw-r--r--plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj1
-rw-r--r--plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters3
-rw-r--r--plugins/fast_float/src/Makefile.am2
-rw-r--r--plugins/fast_float/src/fast_float_internal.h46
-rw-r--r--plugins/fast_float/src/fast_float_lab.c428
-rw-r--r--plugins/fast_float/src/fast_float_sup.c46
-rw-r--r--plugins/fast_float/src/fast_float_tethra.c52
-rw-r--r--plugins/fast_float/testbed/fast_float_testbed.c159
8 files changed, 631 insertions, 106 deletions
diff --git a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj
index 40e42b9..a1b6540 100644
--- a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj
+++ b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj
@@ -32,6 +32,7 @@
<ClCompile Include="..\..\src\fast_float_15mats.c" />
<ClCompile Include="..\..\src\fast_float_cmyk.c" />
<ClCompile Include="..\..\src\fast_float_curves.c" />
+ <ClCompile Include="..\..\src\fast_float_lab.c" />
<ClCompile Include="..\..\src\fast_float_matsh.c" />
<ClCompile Include="..\..\src\fast_float_separate.c" />
<ClCompile Include="..\..\src\fast_float_sup.c" />
diff --git a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters
index c2761d1..58fcf43 100644
--- a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters
+++ b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters
@@ -60,6 +60,9 @@
<ClCompile Include="..\..\src\fast_8_matsh.c">
<Filter>Source Files</Filter>
</ClCompile>
+ <ClCompile Include="..\..\src\fast_float_lab.c">
+ <Filter>Source Files</Filter>
+ </ClCompile>
</ItemGroup>
<ItemGroup>
<None Include="..\..\COPYING.GPL3">
diff --git a/plugins/fast_float/src/Makefile.am b/plugins/fast_float/src/Makefile.am
index 0ef9728..723dfea 100644
--- a/plugins/fast_float/src/Makefile.am
+++ b/plugins/fast_float/src/Makefile.am
@@ -23,5 +23,5 @@ liblcms2_fast_float_la_LIBADD = $(LCMS_LIB_DEPLIBS) $(top_builddir)/src/liblcms2
liblcms2_fast_float_la_SOURCES = fast_8_curves.c fast_8_matsh_sse.c fast_8_matsh.c fast_8_tethra.c \
fast_16_tethra.c fast_float_15bits.c fast_float_15mats.c fast_float_cmyk.c fast_float_curves.c fast_float_matsh.c \
- fast_float_separate.c fast_float_sup.c fast_float_tethra.c fast_float_internal.h
+ fast_float_separate.c fast_float_sup.c fast_float_tethra.c fast_float_lab.c fast_float_internal.h
diff --git a/plugins/fast_float/src/fast_float_internal.h b/plugins/fast_float/src/fast_float_internal.h
index c43e8f9..43de02d 100644
--- a/plugins/fast_float/src/fast_float_internal.h
+++ b/plugins/fast_float/src/fast_float_internal.h
@@ -200,20 +200,20 @@ cmsBool Optimize8ByJoiningCurves(_cmsTransform2Fn* TransformFn,
cmsUInt32Number* dwFlags);
cmsBool OptimizeFloatByJoiningCurves(_cmsTransform2Fn* TransformFn,
- void** UserData,
- _cmsFreeUserDataFn* FreeUserData,
- cmsPipeline** Lut,
- cmsUInt32Number* InputFormat,
- cmsUInt32Number* OutputFormat,
- cmsUInt32Number* dwFlags);
+ void** UserData,
+ _cmsFreeUserDataFn* FreeUserData,
+ cmsPipeline** Lut,
+ cmsUInt32Number* InputFormat,
+ cmsUInt32Number* OutputFormat,
+ cmsUInt32Number* dwFlags);
cmsBool OptimizeFloatMatrixShaper(_cmsTransform2Fn* TransformFn,
- void** UserData,
- _cmsFreeUserDataFn* FreeUserData,
- cmsPipeline** Lut,
- cmsUInt32Number* InputFormat,
- cmsUInt32Number* OutputFormat,
- cmsUInt32Number* dwFlags);
+ void** UserData,
+ _cmsFreeUserDataFn* FreeUserData,
+ cmsPipeline** Lut,
+ cmsUInt32Number* InputFormat,
+ cmsUInt32Number* OutputFormat,
+ cmsUInt32Number* dwFlags);
cmsBool Optimize8BitRGBTransform(_cmsTransform2Fn* TransformFn,
void** UserData,
@@ -240,11 +240,21 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn,
cmsUInt32Number* dwFlags);
cmsBool OptimizeCLUTCMYKTransform(_cmsTransform2Fn* TransformFn,
- void** UserData,
- _cmsFreeUserDataFn* FreeDataFn,
- cmsPipeline** Lut,
- cmsUInt32Number* InputFormat,
- cmsUInt32Number* OutputFormat,
- cmsUInt32Number* dwFlags);
+ void** UserData,
+ _cmsFreeUserDataFn* FreeDataFn,
+ cmsPipeline** Lut,
+ cmsUInt32Number* InputFormat,
+ cmsUInt32Number* OutputFormat,
+ cmsUInt32Number* dwFlags);
+
+
+cmsBool OptimizeCLUTLabTransform(_cmsTransform2Fn* TransformFn,
+ void** UserData,
+ _cmsFreeUserDataFn* FreeDataFn,
+ cmsPipeline** Lut,
+ cmsUInt32Number* InputFormat,
+ cmsUInt32Number* OutputFormat,
+ cmsUInt32Number* dwFlags);
+
#endif
diff --git a/plugins/fast_float/src/fast_float_lab.c b/plugins/fast_float/src/fast_float_lab.c
new file mode 100644
index 0000000..da2ddb7
--- /dev/null
+++ b/plugins/fast_float/src/fast_float_lab.c
@@ -0,0 +1,428 @@
+//---------------------------------------------------------------------------------
+//
+// Little Color Management System, fast floating point extensions
+// Copyright (c) 1998-2020 Marti Maria Saguer, all rights reserved
+//
+//
+// This program is free software: you can redistribute it and/or modify
+// it under the terms of the GNU General Public License as published by
+// the Free Software Foundation, either version 3 of the License, or
+// (at your option) any later version.
+//
+// This program is distributed in the hope that it will be useful,
+// but WITHOUT ANY WARRANTY; without even the implied warranty of
+// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+// GNU General Public License for more details.
+//
+// You should have received a copy of the GNU General Public License
+// along with this program. If not, see <http://www.gnu.org/licenses/>.
+//
+//---------------------------------------------------------------------------------
+
+#include "fast_float_internal.h"
+
+
+#define SIGMOID_POINTS 1024
+
+// Optimization for floating point tetrahedral interpolation using Lab as indexing space
+typedef struct {
+
+ cmsContext ContextID;
+ const cmsInterpParams* p; // Tetrahedrical interpolation parameters. This is a not-owned pointer.
+
+ cmsFloat32Number sigmoidIn[SIGMOID_POINTS]; // to apply to a*/b* axis on indexing
+ cmsFloat32Number sigmoidOut[SIGMOID_POINTS]; // the curve above, inverted.
+
+} LabCLUTdata;
+
+
+typedef struct {
+
+ LabCLUTdata* data;
+ cmsPipeline* original;
+
+} ResamplingContainer;
+
+/**
+* Predefined tone curve
+*/
+#define TYPE_SIGMOID 109
+
+
+// Floating-point version of 1D interpolation
+cmsINLINE cmsFloat32Number LinLerp1D(cmsFloat32Number Value, const cmsFloat32Number* LutTable)
+{
+ if (Value >= 1.0f)
+ {
+ return LutTable[SIGMOID_POINTS - 1];
+ }
+ else
+ if (Value <= 0)
+ {
+ return LutTable[0];
+ }
+ else
+ {
+ cmsFloat32Number y1, y0;
+ cmsFloat32Number rest;
+ int cell0, cell1;
+
+ Value *= (SIGMOID_POINTS - 1);
+
+ cell0 = _cmsQuickFloor(Value);
+ cell1 = cell0 + 1;
+
+ rest = Value - cell0;
+
+ y0 = LutTable[cell0];
+ y1 = LutTable[cell1];
+
+ return y0 + (y1 - y0) * rest;
+ }
+}
+
+static
+void tabulateSigmoid(cmsContext ContextID, cmsInt32Number type, cmsFloat32Number table[], cmsInt32Number tablePoints)
+{
+ const cmsFloat64Number sigmoidal_slope = 2.5;
+ cmsToneCurve* original;
+ cmsInt32Number i;
+
+ memset(table, 0, sizeof(cmsFloat32Number) * tablePoints);
+ original = cmsBuildParametricToneCurve(ContextID, type, &sigmoidal_slope);
+ if (original != NULL)
+ {
+ for (i = 0; i < tablePoints; i++)
+ {
+ cmsFloat32Number v = (cmsFloat32Number)i / (cmsFloat32Number)(tablePoints - 1);
+
+ table[i] = fclamp(cmsEvalToneCurveFloat(original, v));
+ }
+
+ cmsFreeToneCurve(original);
+ }
+}
+
+
+// Allocates container and curves
+static
+LabCLUTdata* LabCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p)
+{
+ LabCLUTdata* fd;
+
+ fd = (LabCLUTdata*) _cmsMallocZero(ContextID, sizeof(LabCLUTdata));
+ if (fd == NULL) return NULL;
+
+ fd ->ContextID = ContextID;
+ fd ->p = p;
+
+ tabulateSigmoid(ContextID, +TYPE_SIGMOID, fd->sigmoidIn, SIGMOID_POINTS);
+ tabulateSigmoid(ContextID, -TYPE_SIGMOID, fd->sigmoidOut, SIGMOID_POINTS);
+
+ return fd;
+}
+
+static
+void LabCLUTFree(cmsContext ContextID, void* v)
+{
+ _cmsFree(ContextID, v);
+}
+
+// Sampler implemented by another LUT.
+static
+int XFormSampler(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], CMSREGISTER void* Cargo)
+{
+ ResamplingContainer* container = (ResamplingContainer*)Cargo;
+ cmsFloat32Number linearized[3];
+
+ // Apply inverse sigmoid
+ linearized[0] = In[0];
+ linearized[1] = LinLerp1D(In[1], container->data->sigmoidOut);
+ linearized[2] = LinLerp1D(In[2], container->data->sigmoidOut);
+
+ cmsPipelineEvalFloat(linearized, Out, container->original);
+ return TRUE;
+}
+
+// A optimized interpolation for Lab.
+#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan])
+
+static
+void LabCLUTEval(struct _cmstransform_struct* CMMcargo,
+ const void* Input,
+ void* Output,
+ cmsUInt32Number PixelsPerLine,
+ cmsUInt32Number LineCount,
+ const cmsStride* Stride)
+
+{
+
+ LabCLUTdata* pfloat = (LabCLUTdata*)_cmsGetTransformUserData(CMMcargo);
+
+ cmsFloat32Number l, a, b;
+ cmsFloat32Number px, py, pz;
+ int x0, y0, z0;
+ int X0, Y0, Z0, X1, Y1, Z1;
+ cmsFloat32Number rx, ry, rz;
+ cmsFloat32Number c0, c1 = 0, c2 = 0, c3 = 0;
+ cmsUInt32Number OutChan;
+
+ const cmsInterpParams* p = pfloat->p;
+ cmsUInt32Number TotalOut = p->nOutputs;
+ cmsUInt32Number TotalPlusAlpha;
+ const cmsFloat32Number* LutTable = (const cmsFloat32Number*)p->Table;
+
+ cmsUInt32Number i, ii;
+ const cmsUInt8Number* lin;
+ const cmsUInt8Number* ain;
+ const cmsUInt8Number* bin;
+ const cmsUInt8Number* xin = NULL;
+
+ cmsUInt8Number* out[cmsMAXCHANNELS];
+ cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS];
+ cmsUInt32Number SourceIncrements[cmsMAXCHANNELS];
+ cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS];
+ cmsUInt32Number DestIncrements[cmsMAXCHANNELS];
+
+ cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo);
+ cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo);
+
+ cmsUInt32Number nchans, nalpha;
+ cmsUInt32Number strideIn, strideOut;
+
+ _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements);
+ _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements);
+
+ if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA))
+ nalpha = 0;
+
+ strideIn = strideOut = 0;
+ for (i = 0; i < LineCount; i++) {
+
+ lin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn;
+ ain = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn;
+ bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn;
+
+ if (nalpha)
+ xin = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn;
+
+ TotalPlusAlpha = TotalOut;
+ if (xin) TotalPlusAlpha++;
+
+ for (ii = 0; ii < TotalPlusAlpha; ii++)
+ out[ii] = (cmsUInt8Number*)Output + DestStartingOrder[ii] + strideOut;
+
+ for (ii = 0; ii < PixelsPerLine; ii++) {
+
+ // Decode Lab and go across sigmoids on a*/b*
+ l = fclamp((*(cmsFloat32Number*)lin) / 100.0f);
+ a = LinLerp1D(((*(cmsFloat32Number*)ain) + 128.0f) / 255.0f, pfloat->sigmoidIn);
+ b = LinLerp1D(((*(cmsFloat32Number*)bin) + 128.0f) / 255.0f, pfloat->sigmoidIn);
+
+ lin += SourceIncrements[0];
+ ain += SourceIncrements[1];
+ bin += SourceIncrements[2];
+
+ px = l * p->Domain[0];
+ py = a * p->Domain[1];
+ pz = b * p->Domain[2];
+
+ x0 = _cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0);
+ y0 = _cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0);
+ z0 = _cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0);
+
+ X0 = p->opta[2] * x0;
+ X1 = X0 + (l >= 1.0f ? 0 : p->opta[2]);
+
+ Y0 = p->opta[1] * y0;
+ Y1 = Y0 + (a >= 1.0f ? 0 : p->opta[1]);
+
+ Z0 = p->opta[0] * z0;
+ Z1 = Z0 + (b >= 1.0f ? 0 : p->opta[0]);
+
+ for (OutChan = 0; OutChan < TotalOut; OutChan++) {
+
+ // These are the 6 Tetrahedral
+
+ c0 = DENS(X0, Y0, Z0);
+
+ if (rx >= ry && ry >= rz) {
+
+ c1 = DENS(X1, Y0, Z0) - c0;
+ c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0);
+ c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+ }
+ else
+ if (rx >= rz && rz >= ry) {
+
+ c1 = DENS(X1, Y0, Z0) - c0;
+ c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+ c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0);
+
+ }
+ else
+ if (rz >= rx && rx >= ry) {
+
+ c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1);
+ c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1);
+ c3 = DENS(X0, Y0, Z1) - c0;
+
+ }
+ else
+ if (ry >= rx && rx >= rz) {
+
+ c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0);
+ c2 = DENS(X0, Y1, Z0) - c0;
+ c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0);
+
+ }
+ else
+ if (ry >= rz && rz >= rx) {
+
+ c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+ c2 = DENS(X0, Y1, Z0) - c0;
+ c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0);
+
+ }
+ else
+ if (rz >= ry && ry >= rx) {
+
+ c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1);
+ c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1);
+ c3 = DENS(X0, Y0, Z1) - c0;
+
+ }
+ else {
+ c1 = c2 = c3 = 0;
+ }
+
+ *(cmsFloat32Number*)(out[OutChan]) = c0 + c1 * rx + c2 * ry + c3 * rz;
+
+ out[OutChan] += DestIncrements[OutChan];
+ }
+
+ if (xin)
+ *out[TotalOut] = *xin;
+ }
+
+ strideIn += Stride->BytesPerLineIn;
+ strideOut += Stride->BytesPerLineOut;
+ }
+}
+
+#undef DENS
+
+
+/**
+* Get from flags
+*/
+static
+int GetGridpoints(cmsUInt32Number dwFlags)
+{
+ // Already specified?
+ if (dwFlags & 0x00FF0000) {
+ return (dwFlags >> 16) & 0xFF;
+ }
+
+ // HighResPrecalc is maximum resolution
+ if (dwFlags & cmsFLAGS_HIGHRESPRECALC) {
+ return 66;
+ }
+ else
+ // LowResPrecal is lower resolution
+ if (dwFlags & cmsFLAGS_LOWRESPRECALC) {
+ return 33;
+ }
+ else
+ return 51;
+
+}
+
+// --------------------------------------------------------------------------------------------------------------
+
+cmsBool OptimizeCLUTLabTransform(_cmsTransform2Fn* TransformFn,
+ void** UserData,
+ _cmsFreeUserDataFn* FreeDataFn,
+ cmsPipeline** Lut,
+ cmsUInt32Number* InputFormat,
+ cmsUInt32Number* OutputFormat,
+ cmsUInt32Number* dwFlags)
+{
+ cmsPipeline* OriginalLut;
+ int nGridPoints;
+ cmsPipeline* OptimizedLUT = NULL;
+ cmsStage* OptimizedCLUTmpe;
+ cmsColorSpaceSignature OutputColorSpace;
+ cmsStage* mpe;
+ LabCLUTdata* pfloat;
+ cmsContext ContextID;
+ _cmsStageCLutData* data;
+ ResamplingContainer container;
+
+
+ // For empty transforms, do nothing
+ if (*Lut == NULL) return FALSE;
+
+ // Check for floating point only
+ if (!T_FLOAT(*InputFormat) || !T_FLOAT(*OutputFormat)) return FALSE;
+
+ // Only on floats
+ if (T_BYTES(*InputFormat) != sizeof(cmsFloat32Number) ||
+ T_BYTES(*OutputFormat) != sizeof(cmsFloat32Number)) return FALSE;
+
+ if (T_COLORSPACE(*InputFormat) != PT_Lab) return FALSE;
+
+ OriginalLut = *Lut;
+
+ // Named color pipelines cannot be optimized either
+ for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut);
+ mpe != NULL;
+ mpe = cmsStageNext(mpe)) {
+ if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE;
+ }
+
+ ContextID = cmsGetPipelineContextID(OriginalLut);
+ OutputColorSpace = _cmsICCcolorSpace(T_COLORSPACE(*OutputFormat));
+ nGridPoints = GetGridpoints(*dwFlags);
+
+ // Create the result LUT
+ OptimizedLUT = cmsPipelineAlloc(cmsGetPipelineContextID(OriginalLut), 3, cmsPipelineOutputChannels(OriginalLut));
+ if (OptimizedLUT == NULL) goto Error;
+
+ // Allocate the CLUT for result
+ OptimizedCLUTmpe = cmsStageAllocCLutFloat(ContextID, nGridPoints, 3, cmsPipelineOutputChannels(OriginalLut), NULL);
+
+ // Add the CLUT to the destination LUT
+ cmsPipelineInsertStage(OptimizedLUT, cmsAT_BEGIN, OptimizedCLUTmpe);
+
+ // Set the evaluator, copy parameters
+ data = (_cmsStageCLutData*) cmsStageData(OptimizedCLUTmpe);
+
+ // Allocate data
+ pfloat = LabCLUTAlloc(ContextID, data ->Params);
+ if (pfloat == NULL) return FALSE;
+
+ container.data = pfloat;
+ container.original = OriginalLut;
+
+ // Resample the LUT
+ if (!cmsStageSampleCLutFloat(OptimizedCLUTmpe, XFormSampler, (void*)&container, 0)) goto Error;
+
+ // And return the obtained LUT
+ cmsPipelineFree(OriginalLut);
+
+ *Lut = OptimizedLUT;
+ *TransformFn = LabCLUTEval;
+ *UserData = pfloat;
+ *FreeDataFn = LabCLUTFree;
+ *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER;
+ return TRUE;
+
+Error:
+
+ if (OptimizedLUT != NULL) cmsPipelineFree(OptimizedLUT);
+
+ return FALSE;
+}
+
diff --git a/plugins/fast_float/src/fast_float_sup.c b/plugins/fast_float/src/fast_float_sup.c
index d83a7ff..89edf41 100644
--- a/plugins/fast_float/src/fast_float_sup.c
+++ b/plugins/fast_float/src/fast_float_sup.c
@@ -33,39 +33,41 @@ cmsBool Floating_Point_Transforms_Dispatcher(_cmsTransform2Fn* TransformFn,
cmsUInt32Number* OutputFormat,
cmsUInt32Number* dwFlags)
{
-
- // Try to optimize as a set of curves plus a matrix plus a set of curves
- if (OptimizeMatrixShaper15(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
- // Try to optimize by joining curves
- if (Optimize8ByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to optimize as a set of curves plus a matrix plus a set of curves
+ if (OptimizeMatrixShaper15(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+
+ // Try to optimize by joining curves
+ if (Optimize8ByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
#ifndef CMS_DONT_USE_SSE2
- // Try to use SSE2 to optimize as a set of curves plus a matrix plus a set of curves
- if (Optimize8MatrixShaperSSE(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to use SSE2 to optimize as a set of curves plus a matrix plus a set of curves
+ if (Optimize8MatrixShaperSSE(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
#endif
- // Try to optimize as a set of curves plus a matrix plus a set of curves
- if (Optimize8MatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to optimize as a set of curves plus a matrix plus a set of curves
+ if (Optimize8MatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+
+ // Try to optimize by joining curves
+ if (OptimizeFloatByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
- // Try to optimize by joining curves
- if (OptimizeFloatByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to optimize as a set of curves plus a matrix plus a set of curves
+ if (OptimizeFloatMatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
- // Try to optimize as a set of curves plus a matrix plus a set of curves
- if (OptimizeFloatMatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to optimize using prelinearization plus tetrahedral
+ if (Optimize8BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
- // Try to optimize using prelinearization plus tetrahedral
- if (Optimize8BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to optimize using prelinearization plus tetrahedral
+ if (Optimize16BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
- // Try to optimize using prelinearization plus tetrahedral
- if (Optimize16BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to optimize using prelinearization plus tetrahedral
+ if (OptimizeCLUTRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
- // Try to optimize using prelinearization plus tetrahedral
- if (OptimizeCLUTRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to optimize using prelinearization plus tetrahedral
+ if (OptimizeCLUTCMYKTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
- // Try to optimize using prelinearization plus tetrahedral
- if (OptimizeCLUTCMYKTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
+ // Try to optimize for Lab float as input
+ if (OptimizeCLUTLabTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE;
-
// Cannot optimize, use lcms normal process
return FALSE;
}
diff --git a/plugins/fast_float/src/fast_float_tethra.c b/plugins/fast_float/src/fast_float_tethra.c
index e2ece0e..4e54214 100644
--- a/plugins/fast_float/src/fast_float_tethra.c
+++ b/plugins/fast_float/src/fast_float_tethra.c
@@ -27,11 +27,9 @@ typedef struct {
cmsContext ContextID;
const cmsInterpParams* p; // Tetrahedrical interpolation parameters. This is a not-owned pointer.
- cmsBool isLab;
-
} FloatCLUTData;
-// Precomputes tables for 8-bit on input devicelink.
+// Allocates container
static
FloatCLUTData* FloatCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p)
{
@@ -42,21 +40,16 @@ FloatCLUTData* FloatCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p)
fd ->ContextID = ContextID;
fd ->p = p;
- fd->isLab = FALSE;
-
+
return fd;
}
-// Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for
-// almost any transform. We use floating point precision and then convert from floating point to 16 bits.
+// Sampler implemented by another LUT.
static
int XFormSampler(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], CMSREGISTER void* Cargo)
-{
- // Evaluate in 16 bits
+{
cmsPipelineEvalFloat(In, Out, (cmsPipeline*) Cargo);
-
- // Always succeed
return TRUE;
}
@@ -129,21 +122,9 @@ void FloatCLUTEval(struct _cmstransform_struct* CMMcargo,
for (ii = 0; ii < PixelsPerLine; ii++) {
-#if 0
- if (pfloat->isLab)
- {
- r = fclamp(*(cmsFloat32Number*)rin / 100.0f);
- g = fclamp((*(cmsFloat32Number*)gin + 128.0f) / 255.0f);
- b = fclamp((*(cmsFloat32Number*)bin + 128.0f) / 255.0f);
- }
- else
-#endif
-
- {
- r = fclamp(*(cmsFloat32Number*)rin);
- g = fclamp(*(cmsFloat32Number*)gin);
- b = fclamp(*(cmsFloat32Number*)bin);
- }
+ r = fclamp(*(cmsFloat32Number*)rin);
+ g = fclamp(*(cmsFloat32Number*)gin);
+ b = fclamp(*(cmsFloat32Number*)bin);
rin += SourceIncrements[0];
gin += SourceIncrements[1];
@@ -153,9 +134,9 @@ void FloatCLUTEval(struct _cmstransform_struct* CMMcargo,
py = g * p->Domain[1];
pz = b * p->Domain[2];
- x0 = (int)_cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0);
- y0 = (int)_cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0);
- z0 = (int)_cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0);
+ x0 = _cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0);
+ y0 = _cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0);
+ z0 = _cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0);
X0 = p->opta[2] * x0;
@@ -272,13 +253,8 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn,
if (T_BYTES(*InputFormat) != sizeof(cmsFloat32Number) ||
T_BYTES(*OutputFormat) != sizeof(cmsFloat32Number)) return FALSE;
-#if 0
- if (T_COLORSPACE(*InputFormat) != PT_RGB &&
- T_COLORSPACE(*InputFormat) != PT_Lab) return FALSE;
-#else
// Input has to be RGB, Output may be any
if (T_COLORSPACE(*InputFormat) != PT_RGB) return FALSE;
-#endif
OriginalLut = *Lut;
@@ -312,14 +288,6 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn,
pfloat = FloatCLUTAlloc(ContextID, data ->Params);
if (pfloat == NULL) return FALSE;
-#if 0
- // For Lab
- if (T_COLORSPACE(*InputFormat) == PT_Lab)
- {
- pfloat->isLab = TRUE;
- }
-#endif
-
// And return the obtained LUT
cmsPipelineFree(OriginalLut);
diff --git a/plugins/fast_float/testbed/fast_float_testbed.c b/plugins/fast_float/testbed/fast_float_testbed.c
index 6a46853..e827a4e 100644
--- a/plugins/fast_float/testbed/fast_float_testbed.c
+++ b/plugins/fast_float/testbed/fast_float_testbed.c
@@ -933,49 +933,84 @@ void CheckConversionFloat(void)
printf("Ok\n");
}
-#if 0
+
+static
+cmsBool ValidFloat2(cmsFloat32Number a, cmsFloat32Number b)
+{
+ return fabsf(a - b) < 0.007;
+}
+
+
+static
+cmsFloat32Number distance(cmsFloat32Number rgb1[], cmsFloat32Number rgb2[])
+{
+ cmsFloat32Number dr = rgb2[0] - rgb1[0];
+ cmsFloat32Number dg = rgb2[1] - rgb1[1];
+ cmsFloat32Number db = rgb2[2] - rgb1[2];
+
+ return dr * dr + dg * dg + db * db;
+}
+
static
void CheckLab2RGB(void)
{
cmsHPROFILE hLab = cmsCreateLab4Profile(NULL);
- cmsHPROFILE hXYZ = cmsOpenProfileFromFile("test3.icc", "r");
+ cmsHPROFILE hRGB = cmsOpenProfileFromFile("test3.icc", "r");
cmsContext noPlugin = cmsCreateContext(0, 0);
- cmsHTRANSFORM hXformNoPlugin = cmsCreateTransformTHR(noPlugin, hLab, TYPE_Lab_FLT, hXYZ, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE| cmsFLAGS_FORCE_CLUT| cmsFLAGS_GRIDPOINTS(63));
- cmsHTRANSFORM hXformPlugin = cmsCreateTransformTHR(0, hLab, TYPE_Lab_FLT, hXYZ, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE | cmsFLAGS_FORCE_CLUT | cmsFLAGS_GRIDPOINTS(63));
+ cmsHTRANSFORM hXformNoPlugin = cmsCreateTransformTHR(noPlugin, hLab, TYPE_Lab_FLT, hRGB, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE);
+ cmsHTRANSFORM hXformPlugin = cmsCreateTransformTHR(0, hLab, TYPE_Lab_FLT, hRGB, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE);
- cmsFloat32Number Lab[3], XYZ[3], XYZ2[3];
+ cmsFloat32Number Lab[3], RGB[3], RGB2[3];
- cmsFloat32Number L, a, b;
+ cmsFloat32Number maxInside = 0, maxOutside = 0, L, a, b;
printf("Checking Lab -> RGB...");
- for (L = 0; L <= 100; L++)
+ for (L = 4; L <= 100; L++)
{
- for (a = -127; a < 128; a++)
- for (b = -127; b < 128; b++)
+ for (a = -30; a < +30; a++)
+ for (b = -30; b < +30; b++)
{
+ cmsFloat32Number d;
+
Lab[0] = L; Lab[1] = a; Lab[2] = b;
- cmsDoTransform(hXformNoPlugin, Lab, XYZ, 1);
- cmsDoTransform(hXformPlugin, Lab, XYZ2, 1);
+ cmsDoTransform(hXformNoPlugin, Lab, RGB, 1);
+ cmsDoTransform(hXformPlugin, Lab, RGB2, 1);
- if (!ValidFloatLab(XYZ[0], XYZ2[0]) ||
- !ValidFloatLab(XYZ[1], XYZ2[1]) ||
- !ValidFloatLab(XYZ[2], XYZ2[2]))
- {
- Fail("Lab to RGB Error!");
- }
+ d = distance(RGB, RGB2);
+ if (d > maxInside)
+ maxInside = d;
+ }
+ }
+
+
+ for (L = 1; L <= 100; L += 5)
+ {
+ for (a = -100; a < +100; a += 5)
+ for (b = -100; b < +100; b += 5)
+ {
+ cmsFloat32Number d;
+ Lab[0] = L; Lab[1] = a; Lab[2] = b;
+ cmsDoTransform(hXformNoPlugin, Lab, RGB, 1);
+ cmsDoTransform(hXformPlugin, Lab, RGB2, 1);
+
+ d = distance(RGB, RGB2);
+ if (d > maxOutside)
+ maxOutside = d;
}
}
+
+ printf("Max distance: Inside gamut %f, Outside gamut %f\n", sqrtf(maxInside), sqrtf(maxOutside));
+
cmsDeleteTransform(hXformNoPlugin);
cmsDeleteTransform(hXformPlugin);
- cmsDeleteContext(noPlugin);
- printf("Ok\n");
+ cmsDeleteContext(noPlugin);
}
-#endif
+
@@ -1639,11 +1674,83 @@ cmsFloat64Number SpeedTestFloatCMYK(cmsContext ct, cmsHPROFILE hlcmsProfileIn, c
static
+cmsFloat64Number SpeedTestFloatLab(cmsContext ct, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut)
+{
+ cmsInt32Number j;
+ clock_t atime;
+ cmsFloat64Number diff;
+ cmsHTRANSFORM hlcmsxform;
+ void* In;
+ cmsUInt32Number size, Mb;
+ cmsUInt32Number outFormatter = 0;
+ cmsFloat64Number seconds;
+ cmsFloat32Number L, a, b;
+ Scanline_LabFloat* fill;
+
+
+ if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL)
+ Fail("Unable to open profiles");
+
+
+ if (cmsGetColorSpace(hlcmsProfileIn) != cmsSigLabData)
+ {
+ Fail("Invalid colorspace");
+ }
+
+ switch (cmsGetColorSpace(hlcmsProfileOut))
+ {
+ case cmsSigRgbData: outFormatter = TYPE_RGB_FLT; break;
+ case cmsSigLabData: outFormatter = TYPE_Lab_FLT; break;
+ case cmsSigXYZData: outFormatter = TYPE_XYZ_FLT; break;
+
+ default:
+ Fail("Invalid colorspace");
+ }
+
+ hlcmsxform = cmsCreateTransformTHR(ct, hlcmsProfileIn, TYPE_Lab_FLT, hlcmsProfileOut, outFormatter, INTENT_PERCEPTUAL, cmsFLAGS_NOCACHE);
+ cmsCloseProfile(hlcmsProfileIn);
+ cmsCloseProfile(hlcmsProfileOut);
+
+ j = 0;
+
+ size = 100 * 256 * 256;
+ Mb = size * sizeof(Scanline_LabFloat);
+ In = malloc(Mb);
+ fill = (Scanline_LabFloat*)In;
+
+ for (L = 0; L < 100; L++)
+ for (a = -127.0; a < 127.0; a++)
+ for (b = -127.0; b < +127.0; b++) {
+
+ fill[j].L = L;
+ fill[j].a = a;
+ fill[j].b = b;
+
+ j++;
+ }
+
+
+ atime = clock();
+
+ cmsDoTransform(hlcmsxform, In, In, size);
+
+ diff = clock() - atime;
+ free(In);
+
+ cmsDeleteTransform(hlcmsxform);
+
+ seconds = (cmsFloat64Number)diff / (cmsFloat64Number)CLOCKS_PER_SEC;
+ return ((cmsFloat64Number)size) / (1024.0 * 1024.0 * seconds);
+}
+
+
+
+static
void SpeedTestFloat(void)
{
cmsContext noPlugin = cmsCreateContext(0, 0);
- cmsFloat64Number t[10];
+ cmsFloat64Number t[10] = { 0 };
printf("\n\n");
printf("P E R F O R M A N C E T E S T S F L O A T (D E F A U L T)\n");
@@ -1658,7 +1765,8 @@ void SpeedTestFloat(void)
t[4] = Performance("Floating point on RGB->Lab ", SpeedTestFloatRGB, noPlugin, "test5.icc", "*lab", sizeof(Scanline_rgbFloat), 0);
t[5] = Performance("Floating point on RGB->XYZ ", SpeedTestFloatRGB, noPlugin, "test3.icc", "*xyz", sizeof(Scanline_rgbFloat), 0);
t[6] = Performance("Floating point on CMYK->CMYK ", SpeedTestFloatCMYK, noPlugin, "test1.icc", "test2.icc",sizeof(Scanline_cmykFloat), 0);
-
+ t[7] = Performance("Floating point on Lab->RGB ", SpeedTestFloatLab, noPlugin, "*lab", "test3.icc", sizeof(Scanline_LabFloat), 0);
+
// Note that context 0 has the plug-in installed
@@ -1675,7 +1783,8 @@ void SpeedTestFloat(void)
Performance("Floating point on RGB->Lab ", SpeedTestFloatRGB, 0, "test5.icc", "*lab", sizeof(Scanline_rgbFloat), t[4]);
Performance("Floating point on RGB->XYZ ", SpeedTestFloatRGB, 0, "test3.icc", "*xyz", sizeof(Scanline_rgbFloat), t[5]);
Performance("Floating point on CMYK->CMYK ", SpeedTestFloatCMYK, 0, "test1.icc", "test2.icc", sizeof(Scanline_cmykFloat), t[6]);
-
+ Performance("Floating point on Lab->RGB ", SpeedTestFloatLab, 0, "*lab", "test3.icc", sizeof(Scanline_LabFloat), t[7]);
+
cmsDeleteContext(noPlugin);
}
@@ -2030,6 +2139,7 @@ int main()
cmsPlugin(cmsFastFloatExtensions());
printf("done.\n\n");
+
CheckComputeIncrements();
// 15 bit functionality
@@ -2039,6 +2149,9 @@ int main()
// 16 bits functionality
CheckAccuracy16Bits();
+ // Lab to whatever
+ CheckLab2RGB();
+
// Change format
CheckChangeFormat();