diff options
author | Marti Maria <marti.maria@littlecms.com> | 2020-12-02 22:03:40 +0100 |
---|---|---|
committer | Marti Maria <marti.maria@littlecms.com> | 2020-12-02 22:03:40 +0100 |
commit | fb843e69014751aa630188629099c04277135e2d (patch) | |
tree | 1d18674f81434e52b8e163ca9abf0a7b7b7b9025 /plugins | |
parent | e2b6e7e06520123a9821f0381877dfe94cefb2e5 (diff) | |
download | lcms2-fb843e69014751aa630188629099c04277135e2d.tar.gz |
Add Lab -> RGB/CMYK/Gray/Lab kernel to fast float plugin
First try, although it seems to work.
Is is more accurate close to neutral axis and less accurate on self-flourescent hyper-saturated colors, which are rare.
configure have to be regenerated (it will be wiped out soon)
Diffstat (limited to 'plugins')
-rw-r--r-- | plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj | 1 | ||||
-rw-r--r-- | plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters | 3 | ||||
-rw-r--r-- | plugins/fast_float/src/Makefile.am | 2 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_internal.h | 46 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_lab.c | 428 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_sup.c | 46 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_tethra.c | 52 | ||||
-rw-r--r-- | plugins/fast_float/testbed/fast_float_testbed.c | 159 |
8 files changed, 631 insertions, 106 deletions
diff --git a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj index 40e42b9..a1b6540 100644 --- a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj +++ b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj @@ -32,6 +32,7 @@ <ClCompile Include="..\..\src\fast_float_15mats.c" /> <ClCompile Include="..\..\src\fast_float_cmyk.c" /> <ClCompile Include="..\..\src\fast_float_curves.c" /> + <ClCompile Include="..\..\src\fast_float_lab.c" /> <ClCompile Include="..\..\src\fast_float_matsh.c" /> <ClCompile Include="..\..\src\fast_float_separate.c" /> <ClCompile Include="..\..\src\fast_float_sup.c" /> diff --git a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters index c2761d1..58fcf43 100644 --- a/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters +++ b/plugins/fast_float/Projects/VC2019/lcms2_fast_float_plugin.vcxproj.filters @@ -60,6 +60,9 @@ <ClCompile Include="..\..\src\fast_8_matsh.c"> <Filter>Source Files</Filter> </ClCompile> + <ClCompile Include="..\..\src\fast_float_lab.c"> + <Filter>Source Files</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <None Include="..\..\COPYING.GPL3"> diff --git a/plugins/fast_float/src/Makefile.am b/plugins/fast_float/src/Makefile.am index 0ef9728..723dfea 100644 --- a/plugins/fast_float/src/Makefile.am +++ b/plugins/fast_float/src/Makefile.am @@ -23,5 +23,5 @@ liblcms2_fast_float_la_LIBADD = $(LCMS_LIB_DEPLIBS) $(top_builddir)/src/liblcms2 liblcms2_fast_float_la_SOURCES = fast_8_curves.c fast_8_matsh_sse.c fast_8_matsh.c fast_8_tethra.c \ fast_16_tethra.c fast_float_15bits.c fast_float_15mats.c fast_float_cmyk.c fast_float_curves.c fast_float_matsh.c \ - fast_float_separate.c fast_float_sup.c fast_float_tethra.c fast_float_internal.h + fast_float_separate.c fast_float_sup.c fast_float_tethra.c fast_float_lab.c fast_float_internal.h diff --git a/plugins/fast_float/src/fast_float_internal.h b/plugins/fast_float/src/fast_float_internal.h index c43e8f9..43de02d 100644 --- a/plugins/fast_float/src/fast_float_internal.h +++ b/plugins/fast_float/src/fast_float_internal.h @@ -200,20 +200,20 @@ cmsBool Optimize8ByJoiningCurves(_cmsTransform2Fn* TransformFn, cmsUInt32Number* dwFlags); cmsBool OptimizeFloatByJoiningCurves(_cmsTransform2Fn* TransformFn, - void** UserData, - _cmsFreeUserDataFn* FreeUserData, - cmsPipeline** Lut, - cmsUInt32Number* InputFormat, - cmsUInt32Number* OutputFormat, - cmsUInt32Number* dwFlags); + void** UserData, + _cmsFreeUserDataFn* FreeUserData, + cmsPipeline** Lut, + cmsUInt32Number* InputFormat, + cmsUInt32Number* OutputFormat, + cmsUInt32Number* dwFlags); cmsBool OptimizeFloatMatrixShaper(_cmsTransform2Fn* TransformFn, - void** UserData, - _cmsFreeUserDataFn* FreeUserData, - cmsPipeline** Lut, - cmsUInt32Number* InputFormat, - cmsUInt32Number* OutputFormat, - cmsUInt32Number* dwFlags); + void** UserData, + _cmsFreeUserDataFn* FreeUserData, + cmsPipeline** Lut, + cmsUInt32Number* InputFormat, + cmsUInt32Number* OutputFormat, + cmsUInt32Number* dwFlags); cmsBool Optimize8BitRGBTransform(_cmsTransform2Fn* TransformFn, void** UserData, @@ -240,11 +240,21 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn, cmsUInt32Number* dwFlags); cmsBool OptimizeCLUTCMYKTransform(_cmsTransform2Fn* TransformFn, - void** UserData, - _cmsFreeUserDataFn* FreeDataFn, - cmsPipeline** Lut, - cmsUInt32Number* InputFormat, - cmsUInt32Number* OutputFormat, - cmsUInt32Number* dwFlags); + void** UserData, + _cmsFreeUserDataFn* FreeDataFn, + cmsPipeline** Lut, + cmsUInt32Number* InputFormat, + cmsUInt32Number* OutputFormat, + cmsUInt32Number* dwFlags); + + +cmsBool OptimizeCLUTLabTransform(_cmsTransform2Fn* TransformFn, + void** UserData, + _cmsFreeUserDataFn* FreeDataFn, + cmsPipeline** Lut, + cmsUInt32Number* InputFormat, + cmsUInt32Number* OutputFormat, + cmsUInt32Number* dwFlags); + #endif diff --git a/plugins/fast_float/src/fast_float_lab.c b/plugins/fast_float/src/fast_float_lab.c new file mode 100644 index 0000000..da2ddb7 --- /dev/null +++ b/plugins/fast_float/src/fast_float_lab.c @@ -0,0 +1,428 @@ +//--------------------------------------------------------------------------------- +// +// Little Color Management System, fast floating point extensions +// Copyright (c) 1998-2020 Marti Maria Saguer, all rights reserved +// +// +// This program is free software: you can redistribute it and/or modify +// it under the terms of the GNU General Public License as published by +// the Free Software Foundation, either version 3 of the License, or +// (at your option) any later version. +// +// This program is distributed in the hope that it will be useful, +// but WITHOUT ANY WARRANTY; without even the implied warranty of +// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +// GNU General Public License for more details. +// +// You should have received a copy of the GNU General Public License +// along with this program. If not, see <http://www.gnu.org/licenses/>. +// +//--------------------------------------------------------------------------------- + +#include "fast_float_internal.h" + + +#define SIGMOID_POINTS 1024 + +// Optimization for floating point tetrahedral interpolation using Lab as indexing space +typedef struct { + + cmsContext ContextID; + const cmsInterpParams* p; // Tetrahedrical interpolation parameters. This is a not-owned pointer. + + cmsFloat32Number sigmoidIn[SIGMOID_POINTS]; // to apply to a*/b* axis on indexing + cmsFloat32Number sigmoidOut[SIGMOID_POINTS]; // the curve above, inverted. + +} LabCLUTdata; + + +typedef struct { + + LabCLUTdata* data; + cmsPipeline* original; + +} ResamplingContainer; + +/** +* Predefined tone curve +*/ +#define TYPE_SIGMOID 109 + + +// Floating-point version of 1D interpolation +cmsINLINE cmsFloat32Number LinLerp1D(cmsFloat32Number Value, const cmsFloat32Number* LutTable) +{ + if (Value >= 1.0f) + { + return LutTable[SIGMOID_POINTS - 1]; + } + else + if (Value <= 0) + { + return LutTable[0]; + } + else + { + cmsFloat32Number y1, y0; + cmsFloat32Number rest; + int cell0, cell1; + + Value *= (SIGMOID_POINTS - 1); + + cell0 = _cmsQuickFloor(Value); + cell1 = cell0 + 1; + + rest = Value - cell0; + + y0 = LutTable[cell0]; + y1 = LutTable[cell1]; + + return y0 + (y1 - y0) * rest; + } +} + +static +void tabulateSigmoid(cmsContext ContextID, cmsInt32Number type, cmsFloat32Number table[], cmsInt32Number tablePoints) +{ + const cmsFloat64Number sigmoidal_slope = 2.5; + cmsToneCurve* original; + cmsInt32Number i; + + memset(table, 0, sizeof(cmsFloat32Number) * tablePoints); + original = cmsBuildParametricToneCurve(ContextID, type, &sigmoidal_slope); + if (original != NULL) + { + for (i = 0; i < tablePoints; i++) + { + cmsFloat32Number v = (cmsFloat32Number)i / (cmsFloat32Number)(tablePoints - 1); + + table[i] = fclamp(cmsEvalToneCurveFloat(original, v)); + } + + cmsFreeToneCurve(original); + } +} + + +// Allocates container and curves +static +LabCLUTdata* LabCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p) +{ + LabCLUTdata* fd; + + fd = (LabCLUTdata*) _cmsMallocZero(ContextID, sizeof(LabCLUTdata)); + if (fd == NULL) return NULL; + + fd ->ContextID = ContextID; + fd ->p = p; + + tabulateSigmoid(ContextID, +TYPE_SIGMOID, fd->sigmoidIn, SIGMOID_POINTS); + tabulateSigmoid(ContextID, -TYPE_SIGMOID, fd->sigmoidOut, SIGMOID_POINTS); + + return fd; +} + +static +void LabCLUTFree(cmsContext ContextID, void* v) +{ + _cmsFree(ContextID, v); +} + +// Sampler implemented by another LUT. +static +int XFormSampler(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], CMSREGISTER void* Cargo) +{ + ResamplingContainer* container = (ResamplingContainer*)Cargo; + cmsFloat32Number linearized[3]; + + // Apply inverse sigmoid + linearized[0] = In[0]; + linearized[1] = LinLerp1D(In[1], container->data->sigmoidOut); + linearized[2] = LinLerp1D(In[2], container->data->sigmoidOut); + + cmsPipelineEvalFloat(linearized, Out, container->original); + return TRUE; +} + +// A optimized interpolation for Lab. +#define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan]) + +static +void LabCLUTEval(struct _cmstransform_struct* CMMcargo, + const void* Input, + void* Output, + cmsUInt32Number PixelsPerLine, + cmsUInt32Number LineCount, + const cmsStride* Stride) + +{ + + LabCLUTdata* pfloat = (LabCLUTdata*)_cmsGetTransformUserData(CMMcargo); + + cmsFloat32Number l, a, b; + cmsFloat32Number px, py, pz; + int x0, y0, z0; + int X0, Y0, Z0, X1, Y1, Z1; + cmsFloat32Number rx, ry, rz; + cmsFloat32Number c0, c1 = 0, c2 = 0, c3 = 0; + cmsUInt32Number OutChan; + + const cmsInterpParams* p = pfloat->p; + cmsUInt32Number TotalOut = p->nOutputs; + cmsUInt32Number TotalPlusAlpha; + const cmsFloat32Number* LutTable = (const cmsFloat32Number*)p->Table; + + cmsUInt32Number i, ii; + const cmsUInt8Number* lin; + const cmsUInt8Number* ain; + const cmsUInt8Number* bin; + const cmsUInt8Number* xin = NULL; + + cmsUInt8Number* out[cmsMAXCHANNELS]; + cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; + cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; + cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; + cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; + + cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo); + cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo); + + cmsUInt32Number nchans, nalpha; + cmsUInt32Number strideIn, strideOut; + + _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); + _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); + + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + + strideIn = strideOut = 0; + for (i = 0; i < LineCount; i++) { + + lin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; + ain = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; + bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; + + if (nalpha) + xin = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; + + TotalPlusAlpha = TotalOut; + if (xin) TotalPlusAlpha++; + + for (ii = 0; ii < TotalPlusAlpha; ii++) + out[ii] = (cmsUInt8Number*)Output + DestStartingOrder[ii] + strideOut; + + for (ii = 0; ii < PixelsPerLine; ii++) { + + // Decode Lab and go across sigmoids on a*/b* + l = fclamp((*(cmsFloat32Number*)lin) / 100.0f); + a = LinLerp1D(((*(cmsFloat32Number*)ain) + 128.0f) / 255.0f, pfloat->sigmoidIn); + b = LinLerp1D(((*(cmsFloat32Number*)bin) + 128.0f) / 255.0f, pfloat->sigmoidIn); + + lin += SourceIncrements[0]; + ain += SourceIncrements[1]; + bin += SourceIncrements[2]; + + px = l * p->Domain[0]; + py = a * p->Domain[1]; + pz = b * p->Domain[2]; + + x0 = _cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0); + y0 = _cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0); + z0 = _cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0); + + X0 = p->opta[2] * x0; + X1 = X0 + (l >= 1.0f ? 0 : p->opta[2]); + + Y0 = p->opta[1] * y0; + Y1 = Y0 + (a >= 1.0f ? 0 : p->opta[1]); + + Z0 = p->opta[0] * z0; + Z1 = Z0 + (b >= 1.0f ? 0 : p->opta[0]); + + for (OutChan = 0; OutChan < TotalOut; OutChan++) { + + // These are the 6 Tetrahedral + + c0 = DENS(X0, Y0, Z0); + + if (rx >= ry && ry >= rz) { + + c1 = DENS(X1, Y0, Z0) - c0; + c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); + c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + + } + else + if (rx >= rz && rz >= ry) { + + c1 = DENS(X1, Y0, Z0) - c0; + c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); + c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); + + } + else + if (rz >= rx && rx >= ry) { + + c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); + c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); + c3 = DENS(X0, Y0, Z1) - c0; + + } + else + if (ry >= rx && rx >= rz) { + + c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); + c2 = DENS(X0, Y1, Z0) - c0; + c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + + } + else + if (ry >= rz && rz >= rx) { + + c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); + c2 = DENS(X0, Y1, Z0) - c0; + c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); + + } + else + if (rz >= ry && ry >= rx) { + + c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); + c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); + c3 = DENS(X0, Y0, Z1) - c0; + + } + else { + c1 = c2 = c3 = 0; + } + + *(cmsFloat32Number*)(out[OutChan]) = c0 + c1 * rx + c2 * ry + c3 * rz; + + out[OutChan] += DestIncrements[OutChan]; + } + + if (xin) + *out[TotalOut] = *xin; + } + + strideIn += Stride->BytesPerLineIn; + strideOut += Stride->BytesPerLineOut; + } +} + +#undef DENS + + +/** +* Get from flags +*/ +static +int GetGridpoints(cmsUInt32Number dwFlags) +{ + // Already specified? + if (dwFlags & 0x00FF0000) { + return (dwFlags >> 16) & 0xFF; + } + + // HighResPrecalc is maximum resolution + if (dwFlags & cmsFLAGS_HIGHRESPRECALC) { + return 66; + } + else + // LowResPrecal is lower resolution + if (dwFlags & cmsFLAGS_LOWRESPRECALC) { + return 33; + } + else + return 51; + +} + +// -------------------------------------------------------------------------------------------------------------- + +cmsBool OptimizeCLUTLabTransform(_cmsTransform2Fn* TransformFn, + void** UserData, + _cmsFreeUserDataFn* FreeDataFn, + cmsPipeline** Lut, + cmsUInt32Number* InputFormat, + cmsUInt32Number* OutputFormat, + cmsUInt32Number* dwFlags) +{ + cmsPipeline* OriginalLut; + int nGridPoints; + cmsPipeline* OptimizedLUT = NULL; + cmsStage* OptimizedCLUTmpe; + cmsColorSpaceSignature OutputColorSpace; + cmsStage* mpe; + LabCLUTdata* pfloat; + cmsContext ContextID; + _cmsStageCLutData* data; + ResamplingContainer container; + + + // For empty transforms, do nothing + if (*Lut == NULL) return FALSE; + + // Check for floating point only + if (!T_FLOAT(*InputFormat) || !T_FLOAT(*OutputFormat)) return FALSE; + + // Only on floats + if (T_BYTES(*InputFormat) != sizeof(cmsFloat32Number) || + T_BYTES(*OutputFormat) != sizeof(cmsFloat32Number)) return FALSE; + + if (T_COLORSPACE(*InputFormat) != PT_Lab) return FALSE; + + OriginalLut = *Lut; + + // Named color pipelines cannot be optimized either + for (mpe = cmsPipelineGetPtrToFirstStage(OriginalLut); + mpe != NULL; + mpe = cmsStageNext(mpe)) { + if (cmsStageType(mpe) == cmsSigNamedColorElemType) return FALSE; + } + + ContextID = cmsGetPipelineContextID(OriginalLut); + OutputColorSpace = _cmsICCcolorSpace(T_COLORSPACE(*OutputFormat)); + nGridPoints = GetGridpoints(*dwFlags); + + // Create the result LUT + OptimizedLUT = cmsPipelineAlloc(cmsGetPipelineContextID(OriginalLut), 3, cmsPipelineOutputChannels(OriginalLut)); + if (OptimizedLUT == NULL) goto Error; + + // Allocate the CLUT for result + OptimizedCLUTmpe = cmsStageAllocCLutFloat(ContextID, nGridPoints, 3, cmsPipelineOutputChannels(OriginalLut), NULL); + + // Add the CLUT to the destination LUT + cmsPipelineInsertStage(OptimizedLUT, cmsAT_BEGIN, OptimizedCLUTmpe); + + // Set the evaluator, copy parameters + data = (_cmsStageCLutData*) cmsStageData(OptimizedCLUTmpe); + + // Allocate data + pfloat = LabCLUTAlloc(ContextID, data ->Params); + if (pfloat == NULL) return FALSE; + + container.data = pfloat; + container.original = OriginalLut; + + // Resample the LUT + if (!cmsStageSampleCLutFloat(OptimizedCLUTmpe, XFormSampler, (void*)&container, 0)) goto Error; + + // And return the obtained LUT + cmsPipelineFree(OriginalLut); + + *Lut = OptimizedLUT; + *TransformFn = LabCLUTEval; + *UserData = pfloat; + *FreeDataFn = LabCLUTFree; + *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; + return TRUE; + +Error: + + if (OptimizedLUT != NULL) cmsPipelineFree(OptimizedLUT); + + return FALSE; +} + diff --git a/plugins/fast_float/src/fast_float_sup.c b/plugins/fast_float/src/fast_float_sup.c index d83a7ff..89edf41 100644 --- a/plugins/fast_float/src/fast_float_sup.c +++ b/plugins/fast_float/src/fast_float_sup.c @@ -33,39 +33,41 @@ cmsBool Floating_Point_Transforms_Dispatcher(_cmsTransform2Fn* TransformFn, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags) { - - // Try to optimize as a set of curves plus a matrix plus a set of curves - if (OptimizeMatrixShaper15(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; - // Try to optimize by joining curves - if (Optimize8ByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to optimize as a set of curves plus a matrix plus a set of curves + if (OptimizeMatrixShaper15(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + + // Try to optimize by joining curves + if (Optimize8ByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; #ifndef CMS_DONT_USE_SSE2 - // Try to use SSE2 to optimize as a set of curves plus a matrix plus a set of curves - if (Optimize8MatrixShaperSSE(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to use SSE2 to optimize as a set of curves plus a matrix plus a set of curves + if (Optimize8MatrixShaperSSE(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; #endif - // Try to optimize as a set of curves plus a matrix plus a set of curves - if (Optimize8MatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to optimize as a set of curves plus a matrix plus a set of curves + if (Optimize8MatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + + // Try to optimize by joining curves + if (OptimizeFloatByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; - // Try to optimize by joining curves - if (OptimizeFloatByJoiningCurves(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to optimize as a set of curves plus a matrix plus a set of curves + if (OptimizeFloatMatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; - // Try to optimize as a set of curves plus a matrix plus a set of curves - if (OptimizeFloatMatrixShaper(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to optimize using prelinearization plus tetrahedral + if (Optimize8BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; - // Try to optimize using prelinearization plus tetrahedral - if (Optimize8BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to optimize using prelinearization plus tetrahedral + if (Optimize16BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; - // Try to optimize using prelinearization plus tetrahedral - if (Optimize16BitRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to optimize using prelinearization plus tetrahedral + if (OptimizeCLUTRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; - // Try to optimize using prelinearization plus tetrahedral - if (OptimizeCLUTRGBTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to optimize using prelinearization plus tetrahedral + if (OptimizeCLUTCMYKTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; - // Try to optimize using prelinearization plus tetrahedral - if (OptimizeCLUTCMYKTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; + // Try to optimize for Lab float as input + if (OptimizeCLUTLabTransform(TransformFn, UserData, FreeUserData, Lut, InputFormat, OutputFormat, dwFlags)) return TRUE; - // Cannot optimize, use lcms normal process return FALSE; } diff --git a/plugins/fast_float/src/fast_float_tethra.c b/plugins/fast_float/src/fast_float_tethra.c index e2ece0e..4e54214 100644 --- a/plugins/fast_float/src/fast_float_tethra.c +++ b/plugins/fast_float/src/fast_float_tethra.c @@ -27,11 +27,9 @@ typedef struct { cmsContext ContextID; const cmsInterpParams* p; // Tetrahedrical interpolation parameters. This is a not-owned pointer. - cmsBool isLab; - } FloatCLUTData; -// Precomputes tables for 8-bit on input devicelink. +// Allocates container static FloatCLUTData* FloatCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p) { @@ -42,21 +40,16 @@ FloatCLUTData* FloatCLUTAlloc(cmsContext ContextID, const cmsInterpParams* p) fd ->ContextID = ContextID; fd ->p = p; - fd->isLab = FALSE; - + return fd; } -// Sampler implemented by another LUT. This is a clean way to precalculate the devicelink 3D CLUT for -// almost any transform. We use floating point precision and then convert from floating point to 16 bits. +// Sampler implemented by another LUT. static int XFormSampler(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32Number Out[], CMSREGISTER void* Cargo) -{ - // Evaluate in 16 bits +{ cmsPipelineEvalFloat(In, Out, (cmsPipeline*) Cargo); - - // Always succeed return TRUE; } @@ -129,21 +122,9 @@ void FloatCLUTEval(struct _cmstransform_struct* CMMcargo, for (ii = 0; ii < PixelsPerLine; ii++) { -#if 0 - if (pfloat->isLab) - { - r = fclamp(*(cmsFloat32Number*)rin / 100.0f); - g = fclamp((*(cmsFloat32Number*)gin + 128.0f) / 255.0f); - b = fclamp((*(cmsFloat32Number*)bin + 128.0f) / 255.0f); - } - else -#endif - - { - r = fclamp(*(cmsFloat32Number*)rin); - g = fclamp(*(cmsFloat32Number*)gin); - b = fclamp(*(cmsFloat32Number*)bin); - } + r = fclamp(*(cmsFloat32Number*)rin); + g = fclamp(*(cmsFloat32Number*)gin); + b = fclamp(*(cmsFloat32Number*)bin); rin += SourceIncrements[0]; gin += SourceIncrements[1]; @@ -153,9 +134,9 @@ void FloatCLUTEval(struct _cmstransform_struct* CMMcargo, py = g * p->Domain[1]; pz = b * p->Domain[2]; - x0 = (int)_cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0); - y0 = (int)_cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0); - z0 = (int)_cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0); + x0 = _cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0); + y0 = _cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0); + z0 = _cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0); X0 = p->opta[2] * x0; @@ -272,13 +253,8 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn, if (T_BYTES(*InputFormat) != sizeof(cmsFloat32Number) || T_BYTES(*OutputFormat) != sizeof(cmsFloat32Number)) return FALSE; -#if 0 - if (T_COLORSPACE(*InputFormat) != PT_RGB && - T_COLORSPACE(*InputFormat) != PT_Lab) return FALSE; -#else // Input has to be RGB, Output may be any if (T_COLORSPACE(*InputFormat) != PT_RGB) return FALSE; -#endif OriginalLut = *Lut; @@ -312,14 +288,6 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn, pfloat = FloatCLUTAlloc(ContextID, data ->Params); if (pfloat == NULL) return FALSE; -#if 0 - // For Lab - if (T_COLORSPACE(*InputFormat) == PT_Lab) - { - pfloat->isLab = TRUE; - } -#endif - // And return the obtained LUT cmsPipelineFree(OriginalLut); diff --git a/plugins/fast_float/testbed/fast_float_testbed.c b/plugins/fast_float/testbed/fast_float_testbed.c index 6a46853..e827a4e 100644 --- a/plugins/fast_float/testbed/fast_float_testbed.c +++ b/plugins/fast_float/testbed/fast_float_testbed.c @@ -933,49 +933,84 @@ void CheckConversionFloat(void) printf("Ok\n"); } -#if 0 + +static +cmsBool ValidFloat2(cmsFloat32Number a, cmsFloat32Number b) +{ + return fabsf(a - b) < 0.007; +} + + +static +cmsFloat32Number distance(cmsFloat32Number rgb1[], cmsFloat32Number rgb2[]) +{ + cmsFloat32Number dr = rgb2[0] - rgb1[0]; + cmsFloat32Number dg = rgb2[1] - rgb1[1]; + cmsFloat32Number db = rgb2[2] - rgb1[2]; + + return dr * dr + dg * dg + db * db; +} + static void CheckLab2RGB(void) { cmsHPROFILE hLab = cmsCreateLab4Profile(NULL); - cmsHPROFILE hXYZ = cmsOpenProfileFromFile("test3.icc", "r"); + cmsHPROFILE hRGB = cmsOpenProfileFromFile("test3.icc", "r"); cmsContext noPlugin = cmsCreateContext(0, 0); - cmsHTRANSFORM hXformNoPlugin = cmsCreateTransformTHR(noPlugin, hLab, TYPE_Lab_FLT, hXYZ, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE| cmsFLAGS_FORCE_CLUT| cmsFLAGS_GRIDPOINTS(63)); - cmsHTRANSFORM hXformPlugin = cmsCreateTransformTHR(0, hLab, TYPE_Lab_FLT, hXYZ, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE | cmsFLAGS_FORCE_CLUT | cmsFLAGS_GRIDPOINTS(63)); + cmsHTRANSFORM hXformNoPlugin = cmsCreateTransformTHR(noPlugin, hLab, TYPE_Lab_FLT, hRGB, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE); + cmsHTRANSFORM hXformPlugin = cmsCreateTransformTHR(0, hLab, TYPE_Lab_FLT, hRGB, TYPE_RGB_FLT, INTENT_RELATIVE_COLORIMETRIC, cmsFLAGS_NOCACHE); - cmsFloat32Number Lab[3], XYZ[3], XYZ2[3]; + cmsFloat32Number Lab[3], RGB[3], RGB2[3]; - cmsFloat32Number L, a, b; + cmsFloat32Number maxInside = 0, maxOutside = 0, L, a, b; printf("Checking Lab -> RGB..."); - for (L = 0; L <= 100; L++) + for (L = 4; L <= 100; L++) { - for (a = -127; a < 128; a++) - for (b = -127; b < 128; b++) + for (a = -30; a < +30; a++) + for (b = -30; b < +30; b++) { + cmsFloat32Number d; + Lab[0] = L; Lab[1] = a; Lab[2] = b; - cmsDoTransform(hXformNoPlugin, Lab, XYZ, 1); - cmsDoTransform(hXformPlugin, Lab, XYZ2, 1); + cmsDoTransform(hXformNoPlugin, Lab, RGB, 1); + cmsDoTransform(hXformPlugin, Lab, RGB2, 1); - if (!ValidFloatLab(XYZ[0], XYZ2[0]) || - !ValidFloatLab(XYZ[1], XYZ2[1]) || - !ValidFloatLab(XYZ[2], XYZ2[2])) - { - Fail("Lab to RGB Error!"); - } + d = distance(RGB, RGB2); + if (d > maxInside) + maxInside = d; + } + } + + + for (L = 1; L <= 100; L += 5) + { + for (a = -100; a < +100; a += 5) + for (b = -100; b < +100; b += 5) + { + cmsFloat32Number d; + Lab[0] = L; Lab[1] = a; Lab[2] = b; + cmsDoTransform(hXformNoPlugin, Lab, RGB, 1); + cmsDoTransform(hXformPlugin, Lab, RGB2, 1); + + d = distance(RGB, RGB2); + if (d > maxOutside) + maxOutside = d; } } + + printf("Max distance: Inside gamut %f, Outside gamut %f\n", sqrtf(maxInside), sqrtf(maxOutside)); + cmsDeleteTransform(hXformNoPlugin); cmsDeleteTransform(hXformPlugin); - cmsDeleteContext(noPlugin); - printf("Ok\n"); + cmsDeleteContext(noPlugin); } -#endif + @@ -1639,11 +1674,83 @@ cmsFloat64Number SpeedTestFloatCMYK(cmsContext ct, cmsHPROFILE hlcmsProfileIn, c static +cmsFloat64Number SpeedTestFloatLab(cmsContext ct, cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut) +{ + cmsInt32Number j; + clock_t atime; + cmsFloat64Number diff; + cmsHTRANSFORM hlcmsxform; + void* In; + cmsUInt32Number size, Mb; + cmsUInt32Number outFormatter = 0; + cmsFloat64Number seconds; + cmsFloat32Number L, a, b; + Scanline_LabFloat* fill; + + + if (hlcmsProfileIn == NULL || hlcmsProfileOut == NULL) + Fail("Unable to open profiles"); + + + if (cmsGetColorSpace(hlcmsProfileIn) != cmsSigLabData) + { + Fail("Invalid colorspace"); + } + + switch (cmsGetColorSpace(hlcmsProfileOut)) + { + case cmsSigRgbData: outFormatter = TYPE_RGB_FLT; break; + case cmsSigLabData: outFormatter = TYPE_Lab_FLT; break; + case cmsSigXYZData: outFormatter = TYPE_XYZ_FLT; break; + + default: + Fail("Invalid colorspace"); + } + + hlcmsxform = cmsCreateTransformTHR(ct, hlcmsProfileIn, TYPE_Lab_FLT, hlcmsProfileOut, outFormatter, INTENT_PERCEPTUAL, cmsFLAGS_NOCACHE); + cmsCloseProfile(hlcmsProfileIn); + cmsCloseProfile(hlcmsProfileOut); + + j = 0; + + size = 100 * 256 * 256; + Mb = size * sizeof(Scanline_LabFloat); + In = malloc(Mb); + fill = (Scanline_LabFloat*)In; + + for (L = 0; L < 100; L++) + for (a = -127.0; a < 127.0; a++) + for (b = -127.0; b < +127.0; b++) { + + fill[j].L = L; + fill[j].a = a; + fill[j].b = b; + + j++; + } + + + atime = clock(); + + cmsDoTransform(hlcmsxform, In, In, size); + + diff = clock() - atime; + free(In); + + cmsDeleteTransform(hlcmsxform); + + seconds = (cmsFloat64Number)diff / (cmsFloat64Number)CLOCKS_PER_SEC; + return ((cmsFloat64Number)size) / (1024.0 * 1024.0 * seconds); +} + + + +static void SpeedTestFloat(void) { cmsContext noPlugin = cmsCreateContext(0, 0); - cmsFloat64Number t[10]; + cmsFloat64Number t[10] = { 0 }; printf("\n\n"); printf("P E R F O R M A N C E T E S T S F L O A T (D E F A U L T)\n"); @@ -1658,7 +1765,8 @@ void SpeedTestFloat(void) t[4] = Performance("Floating point on RGB->Lab ", SpeedTestFloatRGB, noPlugin, "test5.icc", "*lab", sizeof(Scanline_rgbFloat), 0); t[5] = Performance("Floating point on RGB->XYZ ", SpeedTestFloatRGB, noPlugin, "test3.icc", "*xyz", sizeof(Scanline_rgbFloat), 0); t[6] = Performance("Floating point on CMYK->CMYK ", SpeedTestFloatCMYK, noPlugin, "test1.icc", "test2.icc",sizeof(Scanline_cmykFloat), 0); - + t[7] = Performance("Floating point on Lab->RGB ", SpeedTestFloatLab, noPlugin, "*lab", "test3.icc", sizeof(Scanline_LabFloat), 0); + // Note that context 0 has the plug-in installed @@ -1675,7 +1783,8 @@ void SpeedTestFloat(void) Performance("Floating point on RGB->Lab ", SpeedTestFloatRGB, 0, "test5.icc", "*lab", sizeof(Scanline_rgbFloat), t[4]); Performance("Floating point on RGB->XYZ ", SpeedTestFloatRGB, 0, "test3.icc", "*xyz", sizeof(Scanline_rgbFloat), t[5]); Performance("Floating point on CMYK->CMYK ", SpeedTestFloatCMYK, 0, "test1.icc", "test2.icc", sizeof(Scanline_cmykFloat), t[6]); - + Performance("Floating point on Lab->RGB ", SpeedTestFloatLab, 0, "*lab", "test3.icc", sizeof(Scanline_LabFloat), t[7]); + cmsDeleteContext(noPlugin); } @@ -2030,6 +2139,7 @@ int main() cmsPlugin(cmsFastFloatExtensions()); printf("done.\n\n"); + CheckComputeIncrements(); // 15 bit functionality @@ -2039,6 +2149,9 @@ int main() // 16 bits functionality CheckAccuracy16Bits(); + // Lab to whatever + CheckLab2RGB(); + // Change format CheckChangeFormat(); |