diff options
author | Marti Maria <info@littlecms.com> | 2020-10-02 10:57:06 +0200 |
---|---|---|
committer | Marti Maria <info@littlecms.com> | 2020-10-02 10:57:06 +0200 |
commit | b78a296b3e28a2e6b1998caebeb370c5b511dd3a (patch) | |
tree | ac7a18bc4c73fb0651288168a327bd7c8cff0f02 | |
parent | 887057fb25ae585959dade9c8fa3881c63c2b610 (diff) | |
download | lcms2-b78a296b3e28a2e6b1998caebeb370c5b511dd3a.tar.gz |
fast plugin update to development sources
Fixes several issues on C++ (functions not found due to mangling)
Converts all optimizers to version 2
Adds complete alpha handling
Improves speed on certain cases
-rw-r--r-- | include/lcms2_plugin.h | 3 | ||||
-rw-r--r-- | plugins/fast_float/include/lcms2_fast_float.h | 2 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_16_tethra.c | 9 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_8_curves.c | 18 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_8_matsh.c | 9 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_8_matsh_sse.c | 7 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_8_tethra.c | 8 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_15mats.c | 7 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_cmyk.c | 289 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_curves.c | 289 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_internal.h | 20 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_matsh.c | 104 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_sup.c | 7 | ||||
-rw-r--r-- | plugins/fast_float/src/fast_float_tethra.c | 223 | ||||
-rw-r--r-- | plugins/fast_float/testbed/fast_float_testbed.c | 29 | ||||
-rw-r--r-- | src/cmsxform.c | 6 |
16 files changed, 620 insertions, 410 deletions
diff --git a/include/lcms2_plugin.h b/include/lcms2_plugin.h index 507acd6..3b4975a 100644 --- a/include/lcms2_plugin.h +++ b/include/lcms2_plugin.h @@ -626,6 +626,9 @@ CMSAPI void * CMSEXPORT _cmsGetTransformUserData(struct _cmstransform_struct *CM CMSAPI void CMSEXPORT _cmsGetTransformFormatters16 (struct _cmstransform_struct *CMMcargo, cmsFormatter16* FromInput, cmsFormatter16* ToOutput); CMSAPI void CMSEXPORT _cmsGetTransformFormattersFloat(struct _cmstransform_struct *CMMcargo, cmsFormatterFloat* FromInput, cmsFormatterFloat* ToOutput); +// Retrieve original flags +CMSAPI cmsUInt32Number CMSEXPORT _cmsGetTransformFlags(struct _cmstransform_struct* CMMcargo); + typedef struct { cmsPluginBase base; diff --git a/plugins/fast_float/include/lcms2_fast_float.h b/plugins/fast_float/include/lcms2_fast_float.h index 4b96800..e47554c 100644 --- a/plugins/fast_float/include/lcms2_fast_float.h +++ b/plugins/fast_float/include/lcms2_fast_float.h @@ -30,7 +30,7 @@ extern "C" { # endif #endif -#define LCMS2_FAST_FLOAT_VERSION 1200 +#define LCMS2_FAST_FLOAT_VERSION 1300 // Configuration toggles diff --git a/plugins/fast_float/src/fast_16_tethra.c b/plugins/fast_float/src/fast_16_tethra.c index ea07029..cc1f6b8 100644 --- a/plugins/fast_float/src/fast_16_tethra.c +++ b/plugins/fast_float/src/fast_16_tethra.c @@ -116,13 +116,16 @@ void PerformanceEval16(struct _cmstransform_struct *CMMcargo, cmsUInt32Number dwInFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo); cmsUInt32Number dwOutFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo); - + _cmsComputeComponentIncrements(dwInFormat, Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(dwOutFormat, Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); in16 = (T_BYTES(dwInFormat) == 2); out16 = (T_BYTES(dwOutFormat) == 2); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -306,7 +309,7 @@ void PerformanceEval16(struct _cmstransform_struct *CMMcargo, // -------------------------------------------------------------------------------------------------------------- -cmsBool Optimize16BitRGBTransform(_cmsTransformFn* TransformFn, +cmsBool Optimize16BitRGBTransform(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, @@ -376,7 +379,7 @@ cmsBool Optimize16BitRGBTransform(_cmsTransformFn* TransformFn, p16 = Performance16alloc(ContextID, data->Params); if (p16 == NULL) return FALSE; - *TransformFn = (_cmsTransformFn) PerformanceEval16; + *TransformFn = PerformanceEval16; *UserData = p16; *FreeDataFn = Performance16free; *InputFormat |= 0x02000000; diff --git a/plugins/fast_float/src/fast_8_curves.c b/plugins/fast_float/src/fast_8_curves.c index 8a7508d..e234441 100644 --- a/plugins/fast_float/src/fast_8_curves.c +++ b/plugins/fast_float/src/fast_8_curves.c @@ -63,6 +63,9 @@ static void FastEvaluateRGBCurves8(struct _cmstransform_struct *CMMcargo, _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -137,6 +140,9 @@ static void FastRGBIdentity8(struct _cmstransform_struct *CMMcargo, _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -210,6 +216,9 @@ static void FastEvaluateGrayCurves8(struct _cmstransform_struct *CMMcargo, _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -270,6 +279,9 @@ static void FastGrayIdentity8(struct _cmstransform_struct *CMMcargo, _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -352,7 +364,7 @@ Curves8Data* ComputeCompositeCurves(cmsUInt32Number nChan, cmsPipeline* Src) // If the target LUT holds only curves, the optimization procedure is to join all those // curves together. That only works on curves and does not work on matrices. // Any number of channels up to 16 -cmsBool Optimize8ByJoiningCurves(_cmsTransformFn* TransformFn, +cmsBool Optimize8ByJoiningCurves(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -396,9 +408,9 @@ cmsBool Optimize8ByJoiningCurves(_cmsTransformFn* TransformFn, // Maybe the curves are linear at the end if (nChans == 1) - *TransformFn = (_cmsTransformFn) (AllCurvesAreLinear(Data) ? FastGrayIdentity8 : FastEvaluateGrayCurves8); + *TransformFn = (AllCurvesAreLinear(Data) ? FastGrayIdentity8 : FastEvaluateGrayCurves8); else - *TransformFn = (_cmsTransformFn) (AllCurvesAreLinear(Data) ? FastRGBIdentity8 : FastEvaluateRGBCurves8); + *TransformFn = (AllCurvesAreLinear(Data) ? FastRGBIdentity8 : FastEvaluateRGBCurves8); return TRUE; diff --git a/plugins/fast_float/src/fast_8_matsh.c b/plugins/fast_float/src/fast_8_matsh.c index 6a126f3..855e6a4 100644 --- a/plugins/fast_float/src/fast_8_matsh.c +++ b/plugins/fast_float/src/fast_8_matsh.c @@ -174,7 +174,7 @@ void MatShaperXform8(struct _cmstransform_struct *CMMcargo, { XMatShaper8Data* p = (XMatShaper8Data*) _cmsGetTransformUserData(CMMcargo); - register cmsS1Fixed14Number l1, l2, l3; + cmsS1Fixed14Number l1, l2, l3; cmsS1Fixed14Number r, g, b; cmsUInt32Number ri, gi, bi; cmsUInt32Number i, ii; @@ -199,6 +199,9 @@ void MatShaperXform8(struct _cmstransform_struct *CMMcargo, _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -262,7 +265,7 @@ void MatShaperXform8(struct _cmstransform_struct *CMMcargo, // 8 bits on input allows matrix-shaper boost up a little bit -cmsBool Optimize8MatrixShaper(_cmsTransformFn* TransformFn, +cmsBool Optimize8MatrixShaper(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -368,7 +371,7 @@ cmsBool Optimize8MatrixShaper(_cmsTransformFn* TransformFn, *UserData = SetMatShaper(ContextID, mpeC1 ->TheCurves, &res, (cmsVEC3*) Data2 ->Offset, mpeC2->TheCurves); *FreeUserData = FreeMatShaper; - *TransformFn = (_cmsTransformFn) MatShaperXform8; + *TransformFn = MatShaperXform8; } *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; diff --git a/plugins/fast_float/src/fast_8_matsh_sse.c b/plugins/fast_float/src/fast_8_matsh_sse.c index ddffd0d..8eb238b 100644 --- a/plugins/fast_float/src/fast_8_matsh_sse.c +++ b/plugins/fast_float/src/fast_8_matsh_sse.c @@ -214,6 +214,9 @@ void MatShaperXform8SSE(struct _cmstransform_struct *CMMcargo, _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -317,7 +320,7 @@ cmsBool IsSSE2Available(void) // 8 bits on input allows matrix-shaper boost up a little bit -cmsBool Optimize8MatrixShaperSSE(_cmsTransformFn* TransformFn, +cmsBool Optimize8MatrixShaperSSE(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -407,7 +410,7 @@ cmsBool Optimize8MatrixShaperSSE(_cmsTransformFn* TransformFn, *UserData = SetMatShaper(ContextID, mpeC1 ->TheCurves, &res, (cmsVEC3*) Data2 ->Offset, mpeC2->TheCurves); *FreeUserData = FreeMatShaper; - *TransformFn = (_cmsTransformFn) MatShaperXform8SSE; + *TransformFn = MatShaperXform8SSE; } *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; diff --git a/plugins/fast_float/src/fast_8_tethra.c b/plugins/fast_float/src/fast_8_tethra.c index 36b492b..a1a35a3 100644 --- a/plugins/fast_float/src/fast_8_tethra.c +++ b/plugins/fast_float/src/fast_8_tethra.c @@ -148,10 +148,12 @@ void PerformanceEval8(struct _cmstransform_struct *CMMcargo, cmsUInt32Number nalpha, strideIn, strideOut; - _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -326,7 +328,7 @@ void SlopeLimiting(cmsUInt16Number* Table16, int nEntries) // -------------------------------------------------------------------------------------------------------------- -cmsBool Optimize8BitRGBTransform(_cmsTransformFn* TransformFn, +cmsBool Optimize8BitRGBTransform(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, @@ -483,7 +485,7 @@ cmsBool Optimize8BitRGBTransform(_cmsTransformFn* TransformFn, *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; *Lut = OptimizedLUT; - *TransformFn = (_cmsTransformFn) PerformanceEval8; + *TransformFn = PerformanceEval8; *UserData = p8; *FreeDataFn = Performance8free; diff --git a/plugins/fast_float/src/fast_float_15mats.c b/plugins/fast_float/src/fast_float_15mats.c index 56414b4..c2f4721 100644 --- a/plugins/fast_float/src/fast_float_15mats.c +++ b/plugins/fast_float/src/fast_float_15mats.c @@ -186,6 +186,9 @@ void MatShaperXform(struct _cmstransform_struct *CMMcargo, _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, NULL, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, NULL, &nalpha, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { @@ -258,7 +261,7 @@ void MatShaperXform(struct _cmstransform_struct *CMMcargo, // 15 bits on input allows matrix-shaper boost up a little bit -cmsBool OptimizeMatrixShaper15(_cmsTransformFn* TransformFn, +cmsBool OptimizeMatrixShaper15(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -341,7 +344,7 @@ cmsBool OptimizeMatrixShaper15(_cmsTransformFn* TransformFn, *UserData = SetMatShaper(ContextID, mpeC1->TheCurves, &res, (cmsVEC3*)Data2->Offset, mpeC2->TheCurves, IdentityMat); *FreeUserData = FreeMatShaper; - *TransformFn = (_cmsTransformFn)MatShaperXform; + *TransformFn = MatShaperXform; } diff --git a/plugins/fast_float/src/fast_float_cmyk.c b/plugins/fast_float/src/fast_float_cmyk.c index 1b15f4f..a175f45 100644 --- a/plugins/fast_float/src/fast_float_cmyk.c +++ b/plugins/fast_float/src/fast_float_cmyk.c @@ -73,10 +73,11 @@ cmsINLINE cmsFloat32Number fclamp100(cmsFloat32Number v) static void FloatCMYKCLUTEval(struct _cmstransform_struct *CMMcargo, - const cmsFloat32Number* Input, - cmsFloat32Number* Output, - cmsUInt32Number len, - cmsUInt32Number Stride) + const void* Input, + void* Output, + cmsUInt32Number PixelsPerLine, + cmsUInt32Number LineCount, + const cmsStride* Stride) { cmsFloat32Number c, m, y, k; @@ -91,12 +92,14 @@ void FloatCMYKCLUTEval(struct _cmstransform_struct *CMMcargo, const cmsInterpParams* p = p8 ->p; cmsUInt32Number TotalOut = p -> nOutputs; + cmsUInt32Number TotalPlusAlpha; const cmsFloat32Number* LutTable = (const cmsFloat32Number*)p->Table; - cmsUInt32Number ii; + cmsUInt32Number i, ii; const cmsUInt8Number* cin; const cmsUInt8Number* min; const cmsUInt8Number* yin; const cmsUInt8Number* kin; + const cmsUInt8Number* ain = NULL; cmsFloat32Number Tmp1[cmsMAXCHANNELS], Tmp2[cmsMAXCHANNELS]; @@ -110,187 +113,203 @@ void FloatCMYKCLUTEval(struct _cmstransform_struct *CMMcargo, cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM) CMMcargo); cmsUInt32Number nchans, nalpha; + cmsUInt32Number strideIn, strideOut; - _cmsComputeComponentIncrements(InputFormat, Stride, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); - _cmsComputeComponentIncrements(OutputFormat, Stride, &nchans, &nalpha, DestStartingOrder, DestIncrements); + _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); + _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); - // SeparateCMYK(InputFormat, Stride, SourceStartingOrder, SourceIncrements); - // SeparateCMYK(OutputFormat, Stride, DestStartingOrder, DestIncrements); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; - cin = (const cmsUInt8Number*)Input + SourceStartingOrder[0]; - min = (const cmsUInt8Number*)Input + SourceStartingOrder[1]; - yin = (const cmsUInt8Number*)Input + SourceStartingOrder[2]; - kin = (const cmsUInt8Number*)Input + SourceStartingOrder[3]; + strideIn = strideOut = 0; + for (i = 0; i < LineCount; i++) { - for (ii=0; ii < TotalOut; ii++) - out[ii] = (cmsUInt8Number*)Output + DestStartingOrder[ii]; + cin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; + min = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; + yin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; + kin = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; - for (ii=0; ii < len; ii++) { - - c = fclamp100(*(cmsFloat32Number*)cin) / 100.0f; - m = fclamp100(*(cmsFloat32Number*)min) / 100.0f; - y = fclamp100(*(cmsFloat32Number*)yin) / 100.0f; - k = fclamp100(*(cmsFloat32Number*)kin) / 100.0f; + if (nalpha) + ain = (const cmsUInt8Number*)Input + SourceStartingOrder[4] + strideIn; - cin += SourceIncrements[0]; - min += SourceIncrements[1]; - yin += SourceIncrements[2]; - kin += SourceIncrements[3]; + TotalPlusAlpha = TotalOut; + if (ain) TotalPlusAlpha++; - pk = c * p->Domain[0]; // C - px = m * p->Domain[1]; // M - py = y * p->Domain[2]; // Y - pz = k * p->Domain[3]; // K + for (ii = 0; ii < TotalPlusAlpha; ii++) + out[ii] = (cmsUInt8Number*)Output + DestStartingOrder[ii] + strideOut; + for (ii = 0; ii < PixelsPerLine; ii++) { - k0 = (int)_cmsQuickFloor(pk); rk = (pk - (cmsFloat32Number)k0); - x0 = (int)_cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0); - y0 = (int)_cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0); - z0 = (int)_cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0); + c = fclamp100(*(cmsFloat32Number*)cin) / 100.0f; + m = fclamp100(*(cmsFloat32Number*)min) / 100.0f; + y = fclamp100(*(cmsFloat32Number*)yin) / 100.0f; + k = fclamp100(*(cmsFloat32Number*)kin) / 100.0f; + cin += SourceIncrements[0]; + min += SourceIncrements[1]; + yin += SourceIncrements[2]; + kin += SourceIncrements[3]; - K0 = p->opta[3] * k0; - K1 = K0 + (c >= 1.0 ? 0 : p->opta[3]); + pk = c * p->Domain[0]; // C + px = m * p->Domain[1]; // M + py = y * p->Domain[2]; // Y + pz = k * p->Domain[3]; // K - X0 = p->opta[2] * x0; - X1 = X0 + (m >= 1.0 ? 0 : p->opta[2]); - Y0 = p->opta[1] * y0; - Y1 = Y0 + (y >= 1.0 ? 0 : p->opta[1]); + k0 = (int)_cmsQuickFloor(pk); rk = (pk - (cmsFloat32Number)k0); + x0 = (int)_cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0); + y0 = (int)_cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0); + z0 = (int)_cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0); - Z0 = p->opta[0] * z0; - Z1 = Z0 + (k >= 1.0 ? 0 : p->opta[0]); - for (OutChan = 0; OutChan < TotalOut; OutChan++) { - - c0 = DENS(X0, Y0, Z0); + K0 = p->opta[3] * k0; + K1 = K0 + (c >= 1.0 ? 0 : p->opta[3]); - if (rx >= ry && ry >= rz) { + X0 = p->opta[2] * x0; + X1 = X0 + (m >= 1.0 ? 0 : p->opta[2]); - c1 = DENS(X1, Y0, Z0) - c0; - c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); - c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + Y0 = p->opta[1] * y0; + Y1 = Y0 + (y >= 1.0 ? 0 : p->opta[1]); - } - else - if (rx >= rz && rz >= ry) { + Z0 = p->opta[0] * z0; + Z1 = Z0 + (k >= 1.0 ? 0 : p->opta[0]); - c1 = DENS(X1, Y0, Z0) - c0; - c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); - c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); + for (OutChan = 0; OutChan < TotalOut; OutChan++) { - } - else - if (rz >= rx && rx >= ry) { + c0 = DENS(X0, Y0, Z0); - c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); - c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); - c3 = DENS(X0, Y0, Z1) - c0; + if (rx >= ry && ry >= rz) { - } - else - if (ry >= rx && rx >= rz) { + c1 = DENS(X1, Y0, Z0) - c0; + c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); + c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); - c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); - c2 = DENS(X0, Y1, Z0) - c0; - c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + } + else + if (rx >= rz && rz >= ry) { - } - else - if (ry >= rz && rz >= rx) { + c1 = DENS(X1, Y0, Z0) - c0; + c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); + c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); - c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); - c2 = DENS(X0, Y1, Z0) - c0; - c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); + } + else + if (rz >= rx && rx >= ry) { - } - else - if (rz >= ry && ry >= rx) { + c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); + c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); + c3 = DENS(X0, Y0, Z1) - c0; - c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); - c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); - c3 = DENS(X0, Y0, Z1) - c0; + } + else + if (ry >= rx && rx >= rz) { - } - else { - c1 = c2 = c3 = 0; - } + c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); + c2 = DENS(X0, Y1, Z0) - c0; + c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + } + else + if (ry >= rz && rz >= rx) { - Tmp1[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz; - - } + c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); + c2 = DENS(X0, Y1, Z0) - c0; + c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); + } + else + if (rz >= ry && ry >= rx) { - LutTable = (cmsFloat32Number*)p->Table; - LutTable += K1; + c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); + c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); + c3 = DENS(X0, Y0, Z1) - c0; - for (OutChan = 0; OutChan < p->nOutputs; OutChan++) { + } + else { + c1 = c2 = c3 = 0; + } - c0 = DENS(X0, Y0, Z0); - if (rx >= ry && ry >= rz) { + Tmp1[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz; - c1 = DENS(X1, Y0, Z0) - c0; - c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); - c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + } - } - else - if (rx >= rz && rz >= ry) { - c1 = DENS(X1, Y0, Z0) - c0; - c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); - c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); + LutTable = (cmsFloat32Number*)p->Table; + LutTable += K1; - } - else - if (rz >= rx && rx >= ry) { + for (OutChan = 0; OutChan < p->nOutputs; OutChan++) { - c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); - c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); - c3 = DENS(X0, Y0, Z1) - c0; + c0 = DENS(X0, Y0, Z0); - } - else - if (ry >= rx && rx >= rz) { + if (rx >= ry && ry >= rz) { - c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); - c2 = DENS(X0, Y1, Z0) - c0; - c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + c1 = DENS(X1, Y0, Z0) - c0; + c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); + c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); - } - else - if (ry >= rz && rz >= rx) { + } + else + if (rx >= rz && rz >= ry) { - c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); - c2 = DENS(X0, Y1, Z0) - c0; - c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); + c1 = DENS(X1, Y0, Z0) - c0; + c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); + c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); - } - else - if (rz >= ry && ry >= rx) { + } + else + if (rz >= rx && rx >= ry) { - c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); - c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); - c3 = DENS(X0, Y0, Z1) - c0; + c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); + c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); + c3 = DENS(X0, Y0, Z1) - c0; - } - else { - c1 = c2 = c3 = 0; - } + } + else + if (ry >= rx && rx >= rz) { - Tmp2[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz; - } + c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); + c2 = DENS(X0, Y1, Z0) - c0; + c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + } + else + if (ry >= rz && rz >= rx) { - for (OutChan = 0; OutChan < p->nOutputs; OutChan++) { + c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); + c2 = DENS(X0, Y1, Z0) - c0; + c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); - *(cmsFloat32Number*)(out[OutChan]) = LinearInterpInt(rk, Tmp1[OutChan], Tmp2[OutChan]); - out[OutChan] += DestIncrements[OutChan]; - } + } + else + if (rz >= ry && ry >= rx) { + c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); + c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); + c3 = DENS(X0, Y0, Z1) - c0; + + } + else { + c1 = c2 = c3 = 0; + } + + Tmp2[OutChan] = c0 + c1 * rx + c2 * ry + c3 * rz; + } + + + for (OutChan = 0; OutChan < p->nOutputs; OutChan++) { + + *(cmsFloat32Number*)(out[OutChan]) = LinearInterpInt(rk, Tmp1[OutChan], Tmp2[OutChan]); + out[OutChan] += DestIncrements[OutChan]; + } + + if (ain) + *out[TotalOut] = *ain; + + } + strideIn += Stride->BytesPerLineIn; + strideOut += Stride->BytesPerLineOut; } } @@ -300,7 +319,7 @@ void FloatCMYKCLUTEval(struct _cmstransform_struct *CMMcargo, // -------------------------------------------------------------------------------------------------------------- -cmsBool OptimizeCLUTCMYKTransform(_cmsTransformFn* TransformFn, +cmsBool OptimizeCLUTCMYKTransform(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, @@ -367,7 +386,7 @@ cmsBool OptimizeCLUTCMYKTransform(_cmsTransformFn* TransformFn, cmsPipelineFree(OriginalLut); *Lut = OptimizedLUT; - *TransformFn = (_cmsTransformFn) FloatCMYKCLUTEval; + *TransformFn = FloatCMYKCLUTEval; *UserData = p8; *FreeDataFn = _cmsFree; *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; diff --git a/plugins/fast_float/src/fast_float_curves.c b/plugins/fast_float/src/fast_float_curves.c index d43b596..2d598d2 100644 --- a/plugins/fast_float/src/fast_float_curves.c +++ b/plugins/fast_float/src/fast_float_curves.c @@ -58,12 +58,13 @@ static void free_aligned(cmsContext ContextID, void* Data) // Evaluator for float curves. This are just 1D tables static void FastEvaluateFloatRGBCurves(struct _cmstransform_struct *CMMcargo, - const cmsFloat32Number* Input, - cmsFloat32Number* Output, - cmsUInt32Number len, - cmsUInt32Number Stride) + const void* Input, + void* Output, + cmsUInt32Number PixelsPerLine, + cmsUInt32Number LineCount, + const cmsStride* Stride) { - cmsUInt32Number ii; + cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; @@ -72,10 +73,12 @@ static void FastEvaluateFloatRGBCurves(struct _cmstransform_struct *CMMcargo, const cmsUInt8Number* rin; const cmsUInt8Number* gin; const cmsUInt8Number* bin; - + const cmsUInt8Number* ain = NULL; + cmsUInt8Number* rout; cmsUInt8Number* gout; cmsUInt8Number* bout; + cmsUInt8Number* aout = NULL; cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM) CMMcargo); cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM) CMMcargo); @@ -83,45 +86,69 @@ static void FastEvaluateFloatRGBCurves(struct _cmstransform_struct *CMMcargo, CurvesFloatData* Data = (CurvesFloatData*) _cmsGetTransformUserData(CMMcargo); cmsUInt32Number nchans, nalpha; + cmsUInt32Number strideIn, strideOut; + + _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); + _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); + + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; + + + strideIn = strideOut = 0; + for (i = 0; i < LineCount; i++) { + + rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; + gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; + bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; - _cmsComputeComponentIncrements(InputFormat, Stride, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); - _cmsComputeComponentIncrements(OutputFormat, Stride, &nchans, &nalpha, DestStartingOrder, DestIncrements); + if (nalpha) + ain = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; - // SeparateRGB(InputFormat, Stride, SourceStartingOrder, SourceIncrements); - // SeparateRGB(OutputFormat, Stride, DestStartingOrder, DestIncrements); + rout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; + gout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut; + bout = (cmsUInt8Number*)Output + DestStartingOrder[2] + strideOut; - rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0]; - gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1]; - bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2]; + if (nalpha) + aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut; - rout = (cmsUInt8Number*)Output + DestStartingOrder[0]; - gout = (cmsUInt8Number*)Output + DestStartingOrder[1]; - bout = (cmsUInt8Number*)Output + DestStartingOrder[2]; - for (ii = 0; ii < len; ii++) { + for (ii = 0; ii < PixelsPerLine; ii++) { - *(cmsFloat32Number*)rout = flerp(Data->CurveR, *(cmsFloat32Number*)rin); - *(cmsFloat32Number*)gout = flerp(Data->CurveG, *(cmsFloat32Number*)gin); - *(cmsFloat32Number*)bout = flerp(Data->CurveB, *(cmsFloat32Number*)bin); + *(cmsFloat32Number*)rout = flerp(Data->CurveR, *(cmsFloat32Number*)rin); + *(cmsFloat32Number*)gout = flerp(Data->CurveG, *(cmsFloat32Number*)gin); + *(cmsFloat32Number*)bout = flerp(Data->CurveB, *(cmsFloat32Number*)bin); - rin += SourceIncrements[0]; - gin += SourceIncrements[1]; - bin += SourceIncrements[2]; + rin += SourceIncrements[0]; + gin += SourceIncrements[1]; + bin += SourceIncrements[2]; - rout += DestIncrements[0]; - gout += DestIncrements[1]; - bout += DestIncrements[2]; + rout += DestIncrements[0]; + gout += DestIncrements[1]; + bout += DestIncrements[2]; + + if (ain) + { + *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; + ain += SourceIncrements[3]; + aout += DestIncrements[3]; + } + } + + strideIn += Stride->BytesPerLineIn; + strideOut += Stride->BytesPerLineOut; } } // Do nothing but arrange the RGB format. static void FastFloatRGBIdentity(struct _cmstransform_struct *CMMcargo, - const cmsFloat32Number* Input, - cmsFloat32Number* Output, - cmsUInt32Number len, - cmsUInt32Number Stride) + const void* Input, + void* Output, + cmsUInt32Number PixelsPerLine, + cmsUInt32Number LineCount, + const cmsStride* Stride) { - cmsUInt32Number ii; + cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; @@ -129,37 +156,49 @@ static void FastFloatRGBIdentity(struct _cmstransform_struct *CMMcargo, const cmsUInt8Number* rin; const cmsUInt8Number* gin; const cmsUInt8Number* bin; + const cmsUInt8Number* ain = NULL; cmsUInt8Number* rout; cmsUInt8Number* gout; cmsUInt8Number* bout; - + cmsUInt8Number* aout = NULL; cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM) CMMcargo); cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM) CMMcargo); - cmsUInt32Number nchans, nalpha; + cmsUInt32Number strideIn, strideOut; + + _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); + _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); + + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; - _cmsComputeComponentIncrements(InputFormat, Stride, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); - _cmsComputeComponentIncrements(OutputFormat, Stride, &nchans, &nalpha, DestStartingOrder, DestIncrements); + strideIn = strideOut = 0; + for (i = 0; i < LineCount; i++) { - // SeparateRGB(InputFormat, Stride, SourceStartingOrder, SourceIncrements); - // SeparateRGB(OutputFormat, Stride, DestStartingOrder, DestIncrements); + rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; + gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; + bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; - rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0]; - gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1]; - bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2]; + if (nalpha) + ain = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; - rout = (cmsUInt8Number*)Output + DestStartingOrder[0]; - gout = (cmsUInt8Number*)Output + DestStartingOrder[1]; - bout = (cmsUInt8Number*)Output + DestStartingOrder[2]; - for (ii=0; ii < len; ii++) { + rout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; + gout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut; + bout = (cmsUInt8Number*)Output + DestStartingOrder[2] + strideOut; - memmove(rout, rin, 4); - memmove(gout, gin, 4); - memmove(bout, bin, 4); + if (nalpha) + aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut; + + for (ii=0; ii < PixelsPerLine; ii++) { + + *(cmsFloat32Number*)rout = *(cmsFloat32Number*)rin; + *(cmsFloat32Number*)gout = *(cmsFloat32Number*)gin; + *(cmsFloat32Number*)bout = *(cmsFloat32Number*)bin; + rin += SourceIncrements[0]; gin += SourceIncrements[1]; bin += SourceIncrements[2]; @@ -167,84 +206,146 @@ static void FastFloatRGBIdentity(struct _cmstransform_struct *CMMcargo, rout += DestIncrements[0]; gout += DestIncrements[1]; bout += DestIncrements[2]; + + + if (ain) + { + *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; + ain += SourceIncrements[3]; + aout += DestIncrements[3]; + } + } + + strideIn += Stride->BytesPerLineIn; + strideOut += Stride->BytesPerLineOut; } } // Evaluate 1 channel only -static void FastEvaluateFloatGrayCurves(struct _cmstransform_struct *CMMcargo, - const cmsFloat32Number* Input, - cmsFloat32Number* Output, - cmsUInt32Number len, - cmsUInt32Number Stride) -{ - cmsUInt32Number ii; +static void FastEvaluateFloatGrayCurves(struct _cmstransform_struct* CMMcargo, + const void* Input, + void* Output, + cmsUInt32Number PixelsPerLine, + cmsUInt32Number LineCount, + const cmsStride* Stride) +{ + cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; const cmsUInt8Number* kin; + const cmsUInt8Number* ain = NULL; cmsUInt8Number* kout; - - cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM) CMMcargo); - cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM) CMMcargo); + cmsUInt8Number* aout = NULL; - CurvesFloatData* Data = (CurvesFloatData*) _cmsGetTransformUserData(CMMcargo); + cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo); + cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo); + + CurvesFloatData* Data = (CurvesFloatData*)_cmsGetTransformUserData(CMMcargo); cmsUInt32Number nchans, nalpha; + cmsUInt32Number strideIn, strideOut; - _cmsComputeComponentIncrements(InputFormat, Stride, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); - _cmsComputeComponentIncrements(OutputFormat, Stride, &nchans, &nalpha, DestStartingOrder, DestIncrements); + _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); + _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, DestStartingOrder, DestIncrements); - // SeparateGray(InputFormat, Stride, &SourceStartingOrder, &SourceIncrement); - // SeparateGray(OutputFormat, Stride, &DestStartingOrder, &DestIncrement); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; - kin = (const cmsUInt8Number*)Input + SourceStartingOrder[0]; - kout = (cmsUInt8Number*)Output + DestStartingOrder[0]; - - for (ii = 0; ii < len; ii++) { + strideIn = strideOut = 0; + for (i = 0; i < LineCount; i++) { + + kin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; + kout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; + + if (nalpha) + { + ain = (const cmsUInt8Number*)Input + SourceStartingOrder[1]; + aout = (cmsUInt8Number*)Output + DestStartingOrder[1]; + } + + for (ii = 0; ii < PixelsPerLine; ii++) { + + *(cmsFloat32Number*)kout = flerp(Data->CurveR, *(cmsFloat32Number*)kin); - *(cmsFloat32Number*)kout = flerp(Data->CurveR, *(cmsFloat32Number*)kin); + kin += SourceIncrements[0]; + kout += DestIncrements[0]; - kin += SourceIncrements[0]; - kout += DestIncrements[0]; + if (ain) + { + *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; + ain += SourceIncrements[1]; + aout += DestIncrements[1]; + } + } + + strideIn += Stride->BytesPerLineIn; + strideOut += Stride->BytesPerLineOut; } } -static void FastFloatGrayIdentity(struct _cmstransform_struct *CMMcargo, - const cmsFloat32Number* Input, - cmsFloat32Number* Output, - cmsUInt32Number len, - cmsUInt32Number Stride) +static void FastFloatGrayIdentity(struct _cmstransform_struct* CMMcargo, + const void* Input, + void* Output, + cmsUInt32Number PixelsPerLine, + cmsUInt32Number LineCount, + const cmsStride* Stride) { - cmsUInt32Number ii; + cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; + const cmsUInt8Number* kin; + const cmsUInt8Number* ain = NULL; cmsUInt8Number* kout; - - cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM) CMMcargo); - cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM) CMMcargo); - + cmsUInt8Number* aout = NULL; + + cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo); + cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo); + cmsUInt32Number nchans, nalpha; + cmsUInt32Number strideIn, strideOut; - _cmsComputeComponentIncrements(InputFormat, Stride, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); - _cmsComputeComponentIncrements(OutputFormat, Stride, &nchans, &nalpha, DestStartingOrder, DestIncrements); + _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); + _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); - // SeparateGray(InputFormat, Stride, &SourceStartingOrder, &SourceIncrement); - // SeparateGray(OutputFormat, Stride, &DestStartingOrder, &DestIncrement); + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; - kin = (const cmsUInt8Number*) Input + SourceStartingOrder[0]; - kout = (cmsUInt8Number*)Output + DestStartingOrder[0]; - - for (ii=0; ii < len; ii++) { + strideIn = strideOut = 0; + for (i = 0; i < LineCount; i++) { - memmove(kout, kin, 4); - - kin += SourceIncrements[0]; - kout += DestIncrements[0]; + + kin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; + kout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; + + if (nalpha) + { + ain = (const cmsUInt8Number*)Input + SourceStartingOrder[1]; + aout = (cmsUInt8Number*)Output + DestStartingOrder[1]; + } + + + for (ii = 0; ii < PixelsPerLine; ii++) { + + *(cmsFloat32Number*)kout = *(cmsFloat32Number*)kin; + + kin += SourceIncrements[0]; + kout += DestIncrements[0]; + + if (ain) + { + *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; + ain += SourceIncrements[1]; + aout += DestIncrements[1]; + } + } + strideIn += Stride->BytesPerLineIn; + strideOut += Stride->BytesPerLineOut; } } @@ -325,7 +426,7 @@ CurvesFloatData* ComputeCompositeCurves(cmsUInt32Number nChan, cmsPipeline* Src // If the target LUT holds only curves, the optimization procedure is to join all those // curves together. That only works on curves and does not work on matrices. -cmsBool OptimizeFloatByJoiningCurves(_cmsTransformFn* TransformFn, +cmsBool OptimizeFloatByJoiningCurves(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -369,9 +470,9 @@ cmsBool OptimizeFloatByJoiningCurves(_cmsTransformFn* TransformFn, // Maybe the curves are linear at the end if (nChans == 1) - *TransformFn = (_cmsTransformFn) (KCurveIsLinear(Data) ? FastFloatGrayIdentity : FastEvaluateFloatGrayCurves); + *TransformFn = (KCurveIsLinear(Data) ? FastFloatGrayIdentity : FastEvaluateFloatGrayCurves); else - *TransformFn = (_cmsTransformFn) (AllRGBCurvesAreLinear(Data) ? FastFloatRGBIdentity : FastEvaluateFloatRGBCurves); + *TransformFn = (AllRGBCurvesAreLinear(Data) ? FastFloatRGBIdentity : FastEvaluateFloatRGBCurves); return TRUE; diff --git a/plugins/fast_float/src/fast_float_internal.h b/plugins/fast_float/src/fast_float_internal.h index 92b377a..94f1a4a 100644 --- a/plugins/fast_float/src/fast_float_internal.h +++ b/plugins/fast_float/src/fast_float_internal.h @@ -163,7 +163,7 @@ cmsFormatter Formatter_15Bit_Factory(cmsUInt32Number Type, // Optimizers // 8 bits on input allows matrix-shaper boost up a little bit -cmsBool Optimize8MatrixShaper(_cmsTransformFn* TransformFn, +cmsBool Optimize8MatrixShaper(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -172,7 +172,7 @@ cmsBool Optimize8MatrixShaper(_cmsTransformFn* TransformFn, cmsUInt32Number* dwFlags); // 8 bits using SSE -cmsBool Optimize8MatrixShaperSSE(_cmsTransformFn* TransformFn, +cmsBool Optimize8MatrixShaperSSE(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -180,7 +180,7 @@ cmsBool Optimize8MatrixShaperSSE(_cmsTransformFn* TransformFn, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags); -cmsBool OptimizeMatrixShaper15(_cmsTransformFn* TransformFn, +cmsBool OptimizeMatrixShaper15(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -189,7 +189,7 @@ cmsBool OptimizeMatrixShaper15(_cmsTransformFn* TransformFn, cmsUInt32Number* dwFlags); -cmsBool Optimize8ByJoiningCurves(_cmsTransformFn* TransformFn, +cmsBool Optimize8ByJoiningCurves(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -197,7 +197,7 @@ cmsBool Optimize8ByJoiningCurves(_cmsTransformFn* TransformFn, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags); -cmsBool OptimizeFloatByJoiningCurves(_cmsTransformFn* TransformFn, +cmsBool OptimizeFloatByJoiningCurves(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -205,7 +205,7 @@ cmsBool OptimizeFloatByJoiningCurves(_cmsTransformFn* TransformFn, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags); -cmsBool OptimizeFloatMatrixShaper(_cmsTransformFn* TransformFn, +cmsBool OptimizeFloatMatrixShaper(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -213,7 +213,7 @@ cmsBool OptimizeFloatMatrixShaper(_cmsTransformFn* TransformFn, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags); -cmsBool Optimize8BitRGBTransform(_cmsTransformFn* TransformFn, +cmsBool Optimize8BitRGBTransform(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, @@ -221,7 +221,7 @@ cmsBool Optimize8BitRGBTransform(_cmsTransformFn* TransformFn, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags); -cmsBool Optimize16BitRGBTransform(_cmsTransformFn* TransformFn, +cmsBool Optimize16BitRGBTransform(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, @@ -229,7 +229,7 @@ cmsBool Optimize16BitRGBTransform(_cmsTransformFn* TransformFn, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags); -cmsBool OptimizeCLUTRGBTransform(_cmsTransformFn* TransformFn, +cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, @@ -237,7 +237,7 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransformFn* TransformFn, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags); -cmsBool OptimizeCLUTCMYKTransform(_cmsTransformFn* TransformFn, +cmsBool OptimizeCLUTCMYKTransform(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, diff --git a/plugins/fast_float/src/fast_float_matsh.c b/plugins/fast_float/src/fast_float_matsh.c index 9704873..9424b62 100644 --- a/plugins/fast_float/src/fast_float_matsh.c +++ b/plugins/fast_float/src/fast_float_matsh.c @@ -136,16 +136,17 @@ VXMatShaperFloatData* SetMatShaper(cmsContext ContextID, cmsToneCurve* Curve1[3] // A fast matrix-shaper evaluator for floating point static -void MatShaperFloat(struct _cmstransform_struct *CMMcargo, - const cmsFloat32Number* Input, - cmsFloat32Number* Output, - cmsUInt32Number len, - cmsUInt32Number Stride) +void MatShaperFloat(struct _cmstransform_struct* CMMcargo, + const void* Input, + void* Output, + cmsUInt32Number PixelsPerLine, + cmsUInt32Number LineCount, + const cmsStride* Stride) { VXMatShaperFloatData* p = (VXMatShaperFloatData*) _cmsGetTransformUserData(CMMcargo); cmsFloat32Number l1, l2, l3; cmsFloat32Number r, g, b; - cmsUInt32Number ii; + cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; @@ -154,59 +155,84 @@ void MatShaperFloat(struct _cmstransform_struct *CMMcargo, const cmsUInt8Number* rin; const cmsUInt8Number* gin; const cmsUInt8Number* bin; + const cmsUInt8Number* ain = NULL; cmsUInt8Number* rout; cmsUInt8Number* gout; cmsUInt8Number* bout; + cmsUInt8Number* aout = NULL; cmsUInt32Number nchans, nalpha; + cmsUInt32Number strideIn, strideOut; - _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); - _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride, &nchans, &nalpha, DestStartingOrder, DestIncrements); + _cmsComputeComponentIncrements(cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); + _cmsComputeComponentIncrements(cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo), Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); - rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0]; - gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1]; - bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2]; + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; - rout = (cmsUInt8Number*)Output + DestStartingOrder[0]; - gout = (cmsUInt8Number*)Output + DestStartingOrder[1]; - bout = (cmsUInt8Number*)Output + DestStartingOrder[2]; - - for (ii=0; ii < len; ii++) { + strideIn = strideOut = 0; + for (i = 0; i < LineCount; i++) { - r = flerp(p->Shaper1R, *(cmsFloat32Number*)rin); - g = flerp(p->Shaper1G, *(cmsFloat32Number*)gin); - b = flerp(p->Shaper1B, *(cmsFloat32Number*)bin); + rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; + gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; + bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; - l1 = p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b ; - l2 = p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b ; - l3 = p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b ; + if (nalpha) + ain = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; - if (p->UseOff) { + rout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; + gout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut; + bout = (cmsUInt8Number*)Output + DestStartingOrder[2] + strideOut; - l1 += p->Off[0]; - l2 += p->Off[1]; - l3 += p->Off[2]; - } + if (nalpha) + aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut; - *(cmsFloat32Number*)rout = flerp(p->Shaper2R, l1); - *(cmsFloat32Number*)gout = flerp(p->Shaper2G, l2); - *(cmsFloat32Number*)bout = flerp(p->Shaper2B, l3); + for (ii = 0; ii < PixelsPerLine; ii++) { - rin += SourceIncrements[0]; - gin += SourceIncrements[1]; - bin += SourceIncrements[2]; + r = flerp(p->Shaper1R, *(cmsFloat32Number*)rin); + g = flerp(p->Shaper1G, *(cmsFloat32Number*)gin); + b = flerp(p->Shaper1B, *(cmsFloat32Number*)bin); - rout += DestIncrements[0]; - gout += DestIncrements[1]; - bout += DestIncrements[2]; - } + l1 = p->Mat[0][0] * r + p->Mat[0][1] * g + p->Mat[0][2] * b; + l2 = p->Mat[1][0] * r + p->Mat[1][1] * g + p->Mat[1][2] * b; + l3 = p->Mat[2][0] * r + p->Mat[2][1] * g + p->Mat[2][2] * b; + + if (p->UseOff) { + + l1 += p->Off[0]; + l2 += p->Off[1]; + l3 += p->Off[2]; + } + + *(cmsFloat32Number*)rout = flerp(p->Shaper2R, l1); + *(cmsFloat32Number*)gout = flerp(p->Shaper2G, l2); + *(cmsFloat32Number*)bout = flerp(p->Shaper2B, l3); + rin += SourceIncrements[0]; + gin += SourceIncrements[1]; + bin += SourceIncrements[2]; + + rout += DestIncrements[0]; + gout += DestIncrements[1]; + bout += DestIncrements[2]; + + if (ain) + { + *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; + ain += SourceIncrements[3]; + aout += DestIncrements[3]; + } + } + + strideIn += Stride->BytesPerLineIn; + strideOut += Stride->BytesPerLineOut; + } } -cmsBool OptimizeFloatMatrixShaper(_cmsTransformFn* TransformFn, +cmsBool OptimizeFloatMatrixShaper(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -314,7 +340,7 @@ cmsBool OptimizeFloatMatrixShaper(_cmsTransformFn* TransformFn, *UserData = SetMatShaper(ContextID, mpeC1 ->TheCurves, &res, (cmsVEC3*) Data2 ->Offset, mpeC2->TheCurves); *FreeUserData = FreeMatShaper; - *TransformFn = (_cmsTransformFn) MatShaperFloat; + *TransformFn = MatShaperFloat; } *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; diff --git a/plugins/fast_float/src/fast_float_sup.c b/plugins/fast_float/src/fast_float_sup.c index e4bf794..c691d15 100644 --- a/plugins/fast_float/src/fast_float_sup.c +++ b/plugins/fast_float/src/fast_float_sup.c @@ -25,7 +25,7 @@ // This is the main dispatcher static -cmsBool Floating_Point_Transforms_Dispatcher(_cmsTransformFn* TransformFn, +cmsBool Floating_Point_Transforms_Dispatcher(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, @@ -81,7 +81,10 @@ static cmsPluginTransform PluginList = { { cmsPluginMagicNumber, REQUIRED_LCMS_VERSION, cmsPluginTransformSig, (cmsPluginBase *) &PluginFastFloat }, - Floating_Point_Transforms_Dispatcher + // When initializing a union, the initializer list must have only one member, which initializes the first member of + // the union unless a designated initializer is used (C99) + + (_cmsTransformFactory) Floating_Point_Transforms_Dispatcher }; // This is the main plug-in installer. diff --git a/plugins/fast_float/src/fast_float_tethra.c b/plugins/fast_float/src/fast_float_tethra.c index 2052d28..3531f08 100644 --- a/plugins/fast_float/src/fast_float_tethra.c +++ b/plugins/fast_float/src/fast_float_tethra.c @@ -62,13 +62,17 @@ int XFormSampler(CMSREGISTER const cmsFloat32Number In[], CMSREGISTER cmsFloat32 #define DENS(i,j,k) (LutTable[(i)+(j)+(k)+OutChan]) static -void FloatCLUTEval(struct _cmstransform_struct *CMMcargo, - const cmsFloat32Number* Input, - cmsFloat32Number* Output, - cmsUInt32Number len, - cmsUInt32Number Stride) +void FloatCLUTEval(struct _cmstransform_struct* CMMcargo, + const void* Input, + void* Output, + cmsUInt32Number PixelsPerLine, + cmsUInt32Number LineCount, + const cmsStride* Stride) + { - + + FloatCLUTData* p8 = (FloatCLUTData*)_cmsGetTransformUserData(CMMcargo); + cmsFloat32Number r, g, b; cmsFloat32Number px, py, pz; int x0, y0, z0; @@ -77,15 +81,16 @@ void FloatCLUTEval(struct _cmstransform_struct *CMMcargo, cmsFloat32Number c0, c1 = 0, c2 = 0, c3 = 0; cmsUInt32Number OutChan; - FloatCLUTData* p8 = (FloatCLUTData*) _cmsGetTransformUserData(CMMcargo); - const cmsInterpParams* p = p8 ->p; - cmsUInt32Number TotalOut = p -> nOutputs; + const cmsInterpParams* p = p8->p; + cmsUInt32Number TotalOut = p->nOutputs; + cmsUInt32Number TotalPlusAlpha; const cmsFloat32Number* LutTable = (const cmsFloat32Number*)p->Table; - cmsUInt32Number ii; - const cmsUInt8Number* rin; - const cmsUInt8Number* gin; - const cmsUInt8Number* bin; + cmsUInt32Number i, ii; + const cmsUInt8Number* rin; + const cmsUInt8Number* gin; + const cmsUInt8Number* bin; + const cmsUInt8Number* ain = NULL; cmsUInt8Number* out[cmsMAXCHANNELS]; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; @@ -93,116 +98,130 @@ void FloatCLUTEval(struct _cmstransform_struct *CMMcargo, cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; - cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM) CMMcargo); - cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM) CMMcargo); + cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo); + cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo); cmsUInt32Number nchans, nalpha; + cmsUInt32Number strideIn, strideOut; + + _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); + _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); - _cmsComputeComponentIncrements(InputFormat, Stride, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); - _cmsComputeComponentIncrements(OutputFormat, Stride, &nchans, &nalpha, DestStartingOrder, DestIncrements); - - // SeparateRGB(InputFormat, Stride, SourceStartingOrder, SourceIncrements); - // SeparateRGB(OutputFormat, Stride, DestStartingOrder, DestIncrements); - - rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0]; - gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1]; - bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2]; - - for (ii=0; ii < TotalOut; ii++) - out[ii] = (cmsUInt8Number*) Output + DestStartingOrder[ii]; - - for (ii=0; ii < len; ii++) { - - r = fclamp(*(cmsFloat32Number*)rin); - g = fclamp(*(cmsFloat32Number*)gin); - b = fclamp(*(cmsFloat32Number*)bin); - - rin += SourceIncrements[0]; - gin += SourceIncrements[1]; - bin += SourceIncrements[2]; - - px = r * p->Domain[0]; - py = g * p->Domain[1]; - pz = b * p->Domain[2]; - - - x0 = (int)_cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0); - y0 = (int)_cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0); - z0 = (int)_cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0); - - X0 = p->opta[2] * x0; - X1 = X0 + (r >= 1.0 ? 0 : p->opta[2]); - - Y0 = p->opta[1] * y0; - Y1 = Y0 + (g >= 1.0 ? 0 : p->opta[1]); - - Z0 = p->opta[0] * z0; - Z1 = Z0 + (b >= 1.0 ? 0 : p->opta[0]); - - for (OutChan = 0; OutChan < TotalOut; OutChan++) { + if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) + nalpha = 0; - // These are the 6 Tetrahedral + strideIn = strideOut = 0; + for (i = 0; i < LineCount; i++) { - c0 = DENS(X0, Y0, Z0); + rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; + gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; + bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; + if (nalpha) + ain = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; - if (rx >= ry && ry >= rz) { + TotalPlusAlpha = TotalOut; + if (ain) TotalPlusAlpha++; - c1 = DENS(X1, Y0, Z0) - c0; - c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); - c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + for (ii = 0; ii < TotalPlusAlpha; ii++) + out[ii] = (cmsUInt8Number*)Output + DestStartingOrder[ii] + strideOut; - } - else - if (rx >= rz && rz >= ry) { + for (ii = 0; ii < PixelsPerLine; ii++) { - c1 = DENS(X1, Y0, Z0) - c0; - c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); - c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); + r = fclamp(*(cmsFloat32Number*)rin); + g = fclamp(*(cmsFloat32Number*)gin); + b = fclamp(*(cmsFloat32Number*)bin); - } - else - if (rz >= rx && rx >= ry) { + rin += SourceIncrements[0]; + gin += SourceIncrements[1]; + bin += SourceIncrements[2]; - c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); - c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); - c3 = DENS(X0, Y0, Z1) - c0; + px = r * p->Domain[0]; + py = g * p->Domain[1]; + pz = b * p->Domain[2]; - } - else - if (ry >= rx && rx >= rz) { - c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); - c2 = DENS(X0, Y1, Z0) - c0; - c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + x0 = (int)_cmsQuickFloor(px); rx = (px - (cmsFloat32Number)x0); + y0 = (int)_cmsQuickFloor(py); ry = (py - (cmsFloat32Number)y0); + z0 = (int)_cmsQuickFloor(pz); rz = (pz - (cmsFloat32Number)z0); - } - else - if (ry >= rz && rz >= rx) { + X0 = p->opta[2] * x0; + X1 = X0 + (r >= 1.0 ? 0 : p->opta[2]); + + Y0 = p->opta[1] * y0; + Y1 = Y0 + (g >= 1.0 ? 0 : p->opta[1]); + + Z0 = p->opta[0] * z0; + Z1 = Z0 + (b >= 1.0 ? 0 : p->opta[0]); + + for (OutChan = 0; OutChan < TotalOut; OutChan++) { + + // These are the 6 Tetrahedral + + c0 = DENS(X0, Y0, Z0); + + if (rx >= ry && ry >= rz) { - c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); - c2 = DENS(X0, Y1, Z0) - c0; - c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); + c1 = DENS(X1, Y0, Z0) - c0; + c2 = DENS(X1, Y1, Z0) - DENS(X1, Y0, Z0); + c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); - } - else - if (rz >= ry && ry >= rx) { + } + else + if (rx >= rz && rz >= ry) { - c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); - c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); - c3 = DENS(X0, Y0, Z1) - c0; + c1 = DENS(X1, Y0, Z0) - c0; + c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); + c3 = DENS(X1, Y0, Z1) - DENS(X1, Y0, Z0); + + } + else + if (rz >= rx && rx >= ry) { + + c1 = DENS(X1, Y0, Z1) - DENS(X0, Y0, Z1); + c2 = DENS(X1, Y1, Z1) - DENS(X1, Y0, Z1); + c3 = DENS(X0, Y0, Z1) - c0; + + } + else + if (ry >= rx && rx >= rz) { + + c1 = DENS(X1, Y1, Z0) - DENS(X0, Y1, Z0); + c2 = DENS(X0, Y1, Z0) - c0; + c3 = DENS(X1, Y1, Z1) - DENS(X1, Y1, Z0); + + } + else + if (ry >= rz && rz >= rx) { + + c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); + c2 = DENS(X0, Y1, Z0) - c0; + c3 = DENS(X0, Y1, Z1) - DENS(X0, Y1, Z0); + + } + else + if (rz >= ry && ry >= rx) { + + c1 = DENS(X1, Y1, Z1) - DENS(X0, Y1, Z1); + c2 = DENS(X0, Y1, Z1) - DENS(X0, Y0, Z1); + c3 = DENS(X0, Y0, Z1) - c0; + + } + else { + c1 = c2 = c3 = 0; + } - } - else { - c1 = c2 = c3 = 0; - } + *(cmsFloat32Number*)(out[OutChan]) = c0 + c1 * rx + c2 * ry + c3 * rz; - *(cmsFloat32Number*) (out[OutChan]) = c0 + c1 * rx + c2 * ry + c3 * rz; - - out[OutChan] += DestIncrements[OutChan]; + out[OutChan] += DestIncrements[OutChan]; + } + if (ain) + *out[TotalOut] = *ain; } + strideIn += Stride->BytesPerLineIn; + strideOut += Stride->BytesPerLineOut; } } @@ -212,7 +231,7 @@ void FloatCLUTEval(struct _cmstransform_struct *CMMcargo, // -------------------------------------------------------------------------------------------------------------- -cmsBool OptimizeCLUTRGBTransform(_cmsTransformFn* TransformFn, +cmsBool OptimizeCLUTRGBTransform(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeDataFn, cmsPipeline** Lut, @@ -280,7 +299,7 @@ cmsBool OptimizeCLUTRGBTransform(_cmsTransformFn* TransformFn, cmsPipelineFree(OriginalLut); *Lut = OptimizedLUT; - *TransformFn = (_cmsTransformFn) FloatCLUTEval; + *TransformFn = FloatCLUTEval; *UserData = p8; *FreeDataFn = _cmsFree; *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; diff --git a/plugins/fast_float/testbed/fast_float_testbed.c b/plugins/fast_float/testbed/fast_float_testbed.c index 71f05bc..6cf3340 100644 --- a/plugins/fast_float/testbed/fast_float_testbed.c +++ b/plugins/fast_float/testbed/fast_float_testbed.c @@ -624,7 +624,7 @@ void TryAllValuesFloat(cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, } static -void TryAllValuesFloatAlpha(cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, cmsInt32Number Intent) +void TryAllValuesFloatAlpha(cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfileOut, cmsInt32Number Intent, cmsBool copyAlpha) { cmsContext Raw = cmsCreateContext(NULL, NULL); cmsContext Plugin = cmsCreateContext(cmsFastFloatExtensions(), NULL); @@ -637,8 +637,10 @@ void TryAllValuesFloatAlpha(cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfile int j; cmsUInt32Number npixels = 256 * 256 * 256; - cmsHTRANSFORM xformRaw = cmsCreateTransformTHR(Raw, hlcmsProfileIn, TYPE_RGBA_FLT, hlcmsProfileOut, TYPE_RGBA_FLT, Intent, cmsFLAGS_NOCACHE); - cmsHTRANSFORM xformPlugin = cmsCreateTransformTHR(Plugin, hlcmsProfileIn, TYPE_RGBA_FLT, hlcmsProfileOut, TYPE_RGBA_FLT, Intent, cmsFLAGS_NOCACHE); + cmsUInt32Number flags = cmsFLAGS_NOCACHE | ( copyAlpha? cmsFLAGS_COPY_ALPHA : 0); + + cmsHTRANSFORM xformRaw = cmsCreateTransformTHR(Raw, hlcmsProfileIn, TYPE_RGBA_FLT, hlcmsProfileOut, TYPE_RGBA_FLT, Intent, flags); + cmsHTRANSFORM xformPlugin = cmsCreateTransformTHR(Plugin, hlcmsProfileIn, TYPE_RGBA_FLT, hlcmsProfileOut, TYPE_RGBA_FLT, Intent, flags); cmsCloseProfile(hlcmsProfileIn); cmsCloseProfile(hlcmsProfileOut); @@ -653,6 +655,9 @@ void TryAllValuesFloatAlpha(cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfile bufferRawOut = (Scanline_rgbaFloat*)malloc(npixels * sizeof(Scanline_rgbaFloat)); bufferPluginOut = (Scanline_rgbaFloat*)malloc(npixels * sizeof(Scanline_rgbaFloat)); + memset(bufferRawOut, 0, npixels * sizeof(Scanline_rgbaFloat)); + memset(bufferPluginOut, 0, npixels * sizeof(Scanline_rgbaFloat)); + // Same input to both transforms j = 0; for (r = 0; r < 256; r++) @@ -671,7 +676,6 @@ void TryAllValuesFloatAlpha(cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfile cmsDoTransform(xformRaw, bufferIn, bufferRawOut, npixels); cmsDoTransform(xformPlugin, bufferIn, bufferPluginOut, npixels); -#if 1 // Lets compare results j = 0; for (r = 0; r < 256; r++) @@ -687,7 +691,6 @@ void TryAllValuesFloatAlpha(cmsHPROFILE hlcmsProfileIn, cmsHPROFILE hlcmsProfile j++; } -#endif free(bufferIn); free(bufferRawOut); free(bufferPluginOut); @@ -902,12 +905,17 @@ void CheckLab2Roundtrip(void) static void CheckConversionFloat(void) { - printf("Crash test..."); - TryAllValuesFloatAlpha(cmsOpenProfileFromFile("test5.icc", "r"), cmsOpenProfileFromFile("test0.icc", "r"), INTENT_PERCEPTUAL); + printf("Crash test."); + TryAllValuesFloatAlpha(cmsOpenProfileFromFile("test5.icc", "r"), cmsOpenProfileFromFile("test0.icc", "r"), INTENT_PERCEPTUAL, FALSE); + printf(".."); + TryAllValuesFloatAlpha(cmsOpenProfileFromFile("test5.icc", "r"), cmsOpenProfileFromFile("test0.icc", "r"), INTENT_PERCEPTUAL, TRUE); printf("Ok\n"); - printf("Crash (II) test..."); - TryAllValuesFloatAlpha(cmsOpenProfileFromFile("test0.icc", "r"), cmsOpenProfileFromFile("test0.icc", "r"), INTENT_PERCEPTUAL); + + printf("Crash (II) test."); + TryAllValuesFloatAlpha(cmsOpenProfileFromFile("test0.icc", "r"), cmsOpenProfileFromFile("test0.icc", "r"), INTENT_PERCEPTUAL, FALSE); + printf(".."); + TryAllValuesFloatAlpha(cmsOpenProfileFromFile("test0.icc", "r"), cmsOpenProfileFromFile("test0.icc", "r"), INTENT_PERCEPTUAL, TRUE); printf("Ok\n"); // Matrix-shaper should be accurate @@ -1857,8 +1865,7 @@ int main() printf("Installing plug-in ... "); cmsPlugin(cmsFastFloatExtensions()); printf("done.\n\n"); - - + CheckComputeIncrements(); // 15 bit functionality diff --git a/src/cmsxform.c b/src/cmsxform.c index 080fc2b..4f55e2f 100644 --- a/src/cmsxform.c +++ b/src/cmsxform.c @@ -774,6 +774,12 @@ void CMSEXPORT _cmsGetTransformFormattersFloat(struct _cmstransform_struct *CMMc if (ToOutput) *ToOutput = CMMcargo ->ToOutputFloat; } +// returns original flags +cmsUInt32Number CMSEXPORT _cmsGetTransformFlags(struct _cmstransform_struct* CMMcargo) +{ + _cmsAssert(CMMcargo != NULL); + return CMMcargo->dwOriginalFlags; +} // Allocate transform struct and set it to defaults. Ask the optimization plug-in about if those formats are proper // for separated transforms. If this is the case, |