//--------------------------------------------------------------------------------- // // Little Color Management System, fast floating point extensions // Copyright (c) 1998-2022 Marti Maria Saguer, all rights reserved // // // This program is free software: you can redistribute it and/or modify // it under the terms of the GNU General Public License as published by // the Free Software Foundation, either version 3 of the License, or // (at your option) any later version. // // This program is distributed in the hope that it will be useful, // but WITHOUT ANY WARRANTY; without even the implied warranty of // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the // GNU General Public License for more details. // // You should have received a copy of the GNU General Public License // along with this program. If not, see . // //--------------------------------------------------------------------------------- #include "fast_float_internal.h" // Curves, optimization is valid for floating point curves typedef struct { cmsFloat32Number CurveR[MAX_NODES_IN_CURVE]; cmsFloat32Number CurveG[MAX_NODES_IN_CURVE]; cmsFloat32Number CurveB[MAX_NODES_IN_CURVE]; void* real_ptr; } CurvesFloatData; // A special malloc that returns memory aligned to DWORD boundary. Aligned memory access is way faster than unaligned // reference to the real block is kept for later free static CurvesFloatData* malloc_aligned(cmsContext ContextID) { cmsUInt8Number* real_ptr = (cmsUInt8Number*)_cmsMallocZero(ContextID, sizeof(CurvesFloatData) + 32); cmsUInt8Number* aligned = (cmsUInt8Number*)(((uintptr_t)real_ptr + 16) & ~0xf); CurvesFloatData* p = (CurvesFloatData*)aligned; p->real_ptr = real_ptr; return p; } // Free the private data container static void free_aligned(cmsContext ContextID, void* Data) { CurvesFloatData* p = (CurvesFloatData*)Data; if (p != NULL) _cmsFree(ContextID, p->real_ptr); } // Evaluator for float curves. This are just 1D tables static void FastEvaluateFloatRGBCurves(struct _cmstransform_struct *CMMcargo, const void* Input, void* Output, cmsUInt32Number PixelsPerLine, cmsUInt32Number LineCount, const cmsStride* Stride) { cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; const cmsUInt8Number* rin; const cmsUInt8Number* gin; const cmsUInt8Number* bin; const cmsUInt8Number* ain = NULL; cmsUInt8Number* rout; cmsUInt8Number* gout; cmsUInt8Number* bout; cmsUInt8Number* aout = NULL; cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM) CMMcargo); cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM) CMMcargo); CurvesFloatData* Data = (CurvesFloatData*) _cmsGetTransformUserData(CMMcargo); cmsUInt32Number nchans, nalpha; cmsUInt32Number strideIn, strideOut; _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) nalpha = 0; strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; if (nalpha) ain = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; rout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; gout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut; bout = (cmsUInt8Number*)Output + DestStartingOrder[2] + strideOut; if (nalpha) aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut; for (ii = 0; ii < PixelsPerLine; ii++) { *(cmsFloat32Number*)rout = flerp(Data->CurveR, *(cmsFloat32Number*)rin); *(cmsFloat32Number*)gout = flerp(Data->CurveG, *(cmsFloat32Number*)gin); *(cmsFloat32Number*)bout = flerp(Data->CurveB, *(cmsFloat32Number*)bin); rin += SourceIncrements[0]; gin += SourceIncrements[1]; bin += SourceIncrements[2]; rout += DestIncrements[0]; gout += DestIncrements[1]; bout += DestIncrements[2]; if (ain) { *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; ain += SourceIncrements[3]; aout += DestIncrements[3]; } } strideIn += Stride->BytesPerLineIn; strideOut += Stride->BytesPerLineOut; } } // Do nothing but arrange the RGB format. static void FastFloatRGBIdentity(struct _cmstransform_struct *CMMcargo, const void* Input, void* Output, cmsUInt32Number PixelsPerLine, cmsUInt32Number LineCount, const cmsStride* Stride) { cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; const cmsUInt8Number* rin; const cmsUInt8Number* gin; const cmsUInt8Number* bin; const cmsUInt8Number* ain = NULL; cmsUInt8Number* rout; cmsUInt8Number* gout; cmsUInt8Number* bout; cmsUInt8Number* aout = NULL; cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM) CMMcargo); cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM) CMMcargo); cmsUInt32Number nchans, nalpha; cmsUInt32Number strideIn, strideOut; _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) nalpha = 0; strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { rin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; gin = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; bin = (const cmsUInt8Number*)Input + SourceStartingOrder[2] + strideIn; if (nalpha) ain = (const cmsUInt8Number*)Input + SourceStartingOrder[3] + strideIn; rout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; gout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut; bout = (cmsUInt8Number*)Output + DestStartingOrder[2] + strideOut; if (nalpha) aout = (cmsUInt8Number*)Output + DestStartingOrder[3] + strideOut; for (ii=0; ii < PixelsPerLine; ii++) { *(cmsFloat32Number*)rout = *(cmsFloat32Number*)rin; *(cmsFloat32Number*)gout = *(cmsFloat32Number*)gin; *(cmsFloat32Number*)bout = *(cmsFloat32Number*)bin; rin += SourceIncrements[0]; gin += SourceIncrements[1]; bin += SourceIncrements[2]; rout += DestIncrements[0]; gout += DestIncrements[1]; bout += DestIncrements[2]; if (ain) { *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; ain += SourceIncrements[3]; aout += DestIncrements[3]; } } strideIn += Stride->BytesPerLineIn; strideOut += Stride->BytesPerLineOut; } } // Evaluate 1 channel only static void FastEvaluateFloatGrayCurves(struct _cmstransform_struct* CMMcargo, const void* Input, void* Output, cmsUInt32Number PixelsPerLine, cmsUInt32Number LineCount, const cmsStride* Stride) { cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; const cmsUInt8Number* kin; const cmsUInt8Number* ain = NULL; cmsUInt8Number* kout; cmsUInt8Number* aout = NULL; cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo); cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo); CurvesFloatData* Data = (CurvesFloatData*)_cmsGetTransformUserData(CMMcargo); cmsUInt32Number nchans, nalpha; cmsUInt32Number strideIn, strideOut; _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, DestStartingOrder, DestIncrements); if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) nalpha = 0; strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { kin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; kout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; if (nalpha) { ain = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; aout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut; } for (ii = 0; ii < PixelsPerLine; ii++) { *(cmsFloat32Number*)kout = flerp(Data->CurveR, *(cmsFloat32Number*)kin); kin += SourceIncrements[0]; kout += DestIncrements[0]; if (ain) { *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; ain += SourceIncrements[1]; aout += DestIncrements[1]; } } strideIn += Stride->BytesPerLineIn; strideOut += Stride->BytesPerLineOut; } } static void FastFloatGrayIdentity(struct _cmstransform_struct* CMMcargo, const void* Input, void* Output, cmsUInt32Number PixelsPerLine, cmsUInt32Number LineCount, const cmsStride* Stride) { cmsUInt32Number i, ii; cmsUInt32Number SourceStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number SourceIncrements[cmsMAXCHANNELS]; cmsUInt32Number DestStartingOrder[cmsMAXCHANNELS]; cmsUInt32Number DestIncrements[cmsMAXCHANNELS]; const cmsUInt8Number* kin; const cmsUInt8Number* ain = NULL; cmsUInt8Number* kout; cmsUInt8Number* aout = NULL; cmsUInt32Number InputFormat = cmsGetTransformInputFormat((cmsHTRANSFORM)CMMcargo); cmsUInt32Number OutputFormat = cmsGetTransformOutputFormat((cmsHTRANSFORM)CMMcargo); cmsUInt32Number nchans, nalpha; cmsUInt32Number strideIn, strideOut; _cmsComputeComponentIncrements(InputFormat, Stride->BytesPerPlaneIn, &nchans, &nalpha, SourceStartingOrder, SourceIncrements); _cmsComputeComponentIncrements(OutputFormat, Stride->BytesPerPlaneOut, &nchans, &nalpha, DestStartingOrder, DestIncrements); if (!(_cmsGetTransformFlags((cmsHTRANSFORM)CMMcargo) & cmsFLAGS_COPY_ALPHA)) nalpha = 0; strideIn = strideOut = 0; for (i = 0; i < LineCount; i++) { kin = (const cmsUInt8Number*)Input + SourceStartingOrder[0] + strideIn; kout = (cmsUInt8Number*)Output + DestStartingOrder[0] + strideOut; if (nalpha) { ain = (const cmsUInt8Number*)Input + SourceStartingOrder[1] + strideIn; aout = (cmsUInt8Number*)Output + DestStartingOrder[1] + strideOut; } for (ii = 0; ii < PixelsPerLine; ii++) { *(cmsFloat32Number*)kout = *(cmsFloat32Number*)kin; kin += SourceIncrements[0]; kout += DestIncrements[0]; if (ain) { *(cmsFloat32Number*)aout = *(cmsFloat32Number*)ain; ain += SourceIncrements[1]; aout += DestIncrements[1]; } } strideIn += Stride->BytesPerLineIn; strideOut += Stride->BytesPerLineOut; } } #define LINEAR_CURVES_EPSILON 0.00001 // Try to see if the curves are linear static cmsBool AllRGBCurvesAreLinear(CurvesFloatData* data) { int j; cmsFloat32Number expected; for (j = 0; j < MAX_NODES_IN_CURVE; j++) { expected = (cmsFloat32Number)j / (cmsFloat32Number)(MAX_NODES_IN_CURVE - 1); if (fabsf(data->CurveR[j] - expected) > LINEAR_CURVES_EPSILON || fabsf(data->CurveG[j] - expected) > LINEAR_CURVES_EPSILON || fabsf(data->CurveB[j] - expected) > LINEAR_CURVES_EPSILON) { return FALSE; } } return TRUE; } static cmsBool KCurveIsLinear(CurvesFloatData* data) { int j; cmsFloat32Number expected; for (j = 0; j < MAX_NODES_IN_CURVE; j++) { expected = (cmsFloat32Number)j / (cmsFloat32Number)(MAX_NODES_IN_CURVE - 1); if (fabs(data->CurveR[j] - expected) > LINEAR_CURVES_EPSILON) return FALSE; } return TRUE; } // Create linearization tables with a reasonable number of entries. Precision is about 32 bits. static CurvesFloatData* ComputeCompositeCurves(cmsUInt32Number nChan, cmsPipeline* Src) { cmsUInt32Number i, j; cmsFloat32Number InFloat[3], OutFloat[3]; CurvesFloatData* Data = malloc_aligned(cmsGetPipelineContextID(Src)); if (Data == NULL) return NULL; // Create target curves for (i = 0; i < MAX_NODES_IN_CURVE; i++) { for (j=0; j CurveR[i] = OutFloat[0]; } else { Data->CurveR[i] = OutFloat[0]; Data->CurveG[i] = OutFloat[1]; Data->CurveB[i] = OutFloat[2]; } } return Data; } // If the target LUT holds only curves, the optimization procedure is to join all those // curves together. That only works on curves and does not work on matrices. cmsBool OptimizeFloatByJoiningCurves(_cmsTransform2Fn* TransformFn, void** UserData, _cmsFreeUserDataFn* FreeUserData, cmsPipeline** Lut, cmsUInt32Number* InputFormat, cmsUInt32Number* OutputFormat, cmsUInt32Number* dwFlags) { cmsPipeline* Src = *Lut; cmsStage* mpe; CurvesFloatData* Data; cmsUInt32Number nChans; // Apply only to floating-point cases if (!T_FLOAT(*InputFormat) || !T_FLOAT(*OutputFormat)) return FALSE; // Only on 8-bit if (T_BYTES(*InputFormat) != 4 || T_BYTES(*OutputFormat) != 4) return FALSE; // Curves need same channels on input and output (despite extra channels may differ) nChans = T_CHANNELS(*InputFormat); if (nChans != T_CHANNELS(*OutputFormat)) return FALSE; // gray and RGB if (nChans != 1 && nChans != 3) return FALSE; // Only curves in this LUT? for (mpe = cmsPipelineGetPtrToFirstStage(Src); mpe != NULL; mpe = cmsStageNext(mpe)) { if (cmsStageType(mpe) != cmsSigCurveSetElemType) return FALSE; } Data = ComputeCompositeCurves(nChans, Src); *dwFlags |= cmsFLAGS_NOCACHE; *dwFlags &= ~cmsFLAGS_CAN_CHANGE_FORMATTER; *UserData = Data; *FreeUserData = free_aligned; // Maybe the curves are linear at the end if (nChans == 1) *TransformFn = (KCurveIsLinear(Data) ? FastFloatGrayIdentity : FastEvaluateFloatGrayCurves); else *TransformFn = (AllRGBCurvesAreLinear(Data) ? FastFloatRGBIdentity : FastEvaluateFloatRGBCurves); return TRUE; }