From 0a5dd3c700f1873be217707aa89a805d009bac3e Mon Sep 17 00:00:00 2001 From: Sebastian Holtermann Date: Sat, 6 Aug 2016 13:09:59 +0200 Subject: cmFilePathUuid: Add class to generate deterministic unique file names The class generates a semi-unique (checksum based) pathless file name from a full source file path. --- Source/CMakeLists.txt | 2 + Source/cmFilePathUuid.cxx | 132 ++++++++++++++++++++++++++++++++++++++++++++++ Source/cmFilePathUuid.h | 77 +++++++++++++++++++++++++++ 3 files changed, 211 insertions(+) create mode 100644 Source/cmFilePathUuid.cxx create mode 100644 Source/cmFilePathUuid.h diff --git a/Source/CMakeLists.txt b/Source/CMakeLists.txt index a79099497a..cdc8fb1325 100644 --- a/Source/CMakeLists.txt +++ b/Source/CMakeLists.txt @@ -238,6 +238,8 @@ set(SRCS cmFileLockPool.h cmFileLockResult.cxx cmFileLockResult.h + cmFilePathUuid.cxx + cmFilePathUuid.h cmFileTimeComparison.cxx cmFileTimeComparison.h cmFortranLexer.cxx diff --git a/Source/cmFilePathUuid.cxx b/Source/cmFilePathUuid.cxx new file mode 100644 index 0000000000..2839b63da0 --- /dev/null +++ b/Source/cmFilePathUuid.cxx @@ -0,0 +1,132 @@ +/*============================================================================ + CMake - Cross Platform Makefile Generator + Copyright 2016 Sebastian Holtermann (sebholt@xwmw.org) + + Distributed under the OSI-approved BSD License (the "License"); + see accompanying file Copyright.txt for details. + + This software is distributed WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the License for more information. +============================================================================*/ + +#include "cmFilePathUuid.h" + +#include "cmCryptoHash.h" +#include "cmMakefile.h" +#include "cmSystemTools.h" +#include "cmsys/Base64.h" + +cmFilePathUuid::cmFilePathUuid(cmMakefile* makefile) +{ + initParentDirs(makefile->GetCurrentSourceDirectory(), + makefile->GetCurrentBinaryDirectory(), + makefile->GetHomeDirectory(), + makefile->GetHomeOutputDirectory()); +} + +cmFilePathUuid::cmFilePathUuid(const std::string& currentSrcDir, + const std::string& currentBinDir, + const std::string& projectSrcDir, + const std::string& projectBinDir) +{ + initParentDirs(currentSrcDir, currentBinDir, projectSrcDir, projectBinDir); +} + +void cmFilePathUuid::initParentDirs(const std::string& currentSrcDir, + const std::string& currentBinDir, + const std::string& projectSrcDir, + const std::string& projectBinDir) +{ + parentDirs[0].first = cmsys::SystemTools::GetRealPath(currentSrcDir); + parentDirs[1].first = cmsys::SystemTools::GetRealPath(currentBinDir); + parentDirs[2].first = cmsys::SystemTools::GetRealPath(projectSrcDir); + parentDirs[3].first = cmsys::SystemTools::GetRealPath(projectBinDir); + + parentDirs[0].second = "CurrentSource"; + parentDirs[1].second = "CurrentBinary"; + parentDirs[2].second = "ProjectSource"; + parentDirs[3].second = "ProjectBinary"; +} + +std::string cmFilePathUuid::get(const std::string& filePath, + const char* outputPrefix, + const char* outputSuffix) +{ + std::string sourceFilename = cmsys::SystemTools::GetFilenameName(filePath); + std::string sourceBasename = + cmsys::SystemTools::GetFilenameWithoutLastExtension(sourceFilename); + + // Acquire checksum string + std::string checksum; + { + std::string sourceRelPath; + std::string sourceRelSeed; + GetRelPathSeed(filePath, sourceRelPath, sourceRelSeed); + checksum = GetChecksumString(sourceFilename, sourceRelPath, sourceRelSeed); + } + + // Compose the file name + std::string uuid; + if (outputPrefix) { + uuid += outputPrefix; + } + uuid += sourceBasename.substr(0, partLengthName); + uuid += "_"; + uuid += checksum.substr(0, partLengthCheckSum); + if (outputSuffix) { + uuid += outputSuffix; + } + return uuid; +} + +void cmFilePathUuid::GetRelPathSeed(const std::string& filePath, + std::string& sourceRelPath, + std::string& sourceRelSeed) +{ + const std::string sourceNameReal = cmsys::SystemTools::GetRealPath(filePath); + std::string parentDirectory; + // Find closest project parent directory + for (size_t ii = 0; ii != numParentDirs; ++ii) { + const std::string& pDir = parentDirs[ii].first; + if (!pDir.empty() && + cmsys::SystemTools::IsSubDirectory(sourceNameReal, pDir)) { + sourceRelSeed = parentDirs[ii].second; + parentDirectory = pDir; + break; + } + } + // Check if the file path is below a known project directory + if (parentDirectory.empty()) { + // Use file syste root as fallback parent directory + sourceRelSeed = "FileSystemRoot"; + cmsys::SystemTools::SplitPathRootComponent(sourceNameReal, + &parentDirectory); + } + sourceRelPath = cmsys::SystemTools::RelativePath( + parentDirectory, cmsys::SystemTools::GetParentDirectory(sourceNameReal)); +} + +std::string cmFilePathUuid::GetChecksumString( + const std::string& sourceFilename, const std::string& sourceRelPath, + const std::string& sourceRelSeed) +{ + std::string checksumBase64; + { + // Calculate the file ( seed + relative path + name ) checksum + std::vector hashBytes = + cmCryptoHash::New("SHA256")->ByteHashString( + (sourceRelSeed + sourceRelPath + sourceFilename).c_str()); + // Convert hash bytes to Base64 text string + std::vector base64Bytes(hashBytes.size() * 2, 0); + cmsysBase64_Encode(&hashBytes[0], hashBytes.size(), &base64Bytes[0], 0); + checksumBase64 = reinterpret_cast(&base64Bytes[0]); + } + // Base64 allows '/', '+' and '=' characters which are problematic + // when used in file names. Replace them with safer alternatives. + std::replace(checksumBase64.begin(), checksumBase64.end(), '/', '-'); + std::replace(checksumBase64.begin(), checksumBase64.end(), '+', '_'); + std::replace(checksumBase64.begin(), checksumBase64.end(), '=', '_'); + + return checksumBase64; +} diff --git a/Source/cmFilePathUuid.h b/Source/cmFilePathUuid.h new file mode 100644 index 0000000000..42e89b1065 --- /dev/null +++ b/Source/cmFilePathUuid.h @@ -0,0 +1,77 @@ +/*============================================================================ + CMake - Cross Platform Makefile Generator + Copyright 2016 Sebastian Holtermann (sebholt@xwmw.org) + + Distributed under the OSI-approved BSD License (the "License"); + see accompanying file Copyright.txt for details. + + This software is distributed WITHOUT ANY WARRANTY; without even the + implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. + See the License for more information. +============================================================================*/ + +#ifndef cmFilePathUuid_h +#define cmFilePathUuid_h + +#include "cmStandardIncludes.h" + +#include +#include + +class cmMakefile; + +/** \class cmFilePathUuid + * @brief Generates a unique pathless file name with a checksum component + * calculated from the file path. + * + * The checksum is calculated from the relative file path to the + * closest known project directory. This guarantees reproducibility + * when source and build directory differ e.g. for different project + * build directories. + */ +class cmFilePathUuid +{ +public: + /// Maximum number of characters to use from the file name + static const size_t partLengthName = 14; + /// Maximum number of characters to use from the path checksum + static const size_t partLengthCheckSum = 14; + + /// @brief Initilizes the parent directories from a makefile + cmFilePathUuid(cmMakefile* makefile); + + /// @brief Initilizes the parent directories manually + cmFilePathUuid(const std::string& currentSrcDir, + const std::string& currentBinDir, + const std::string& projectSrcDir, + const std::string& projectBinDir); + + /* @brief Calculates and returns the uuid for a file path + * + * @arg outputPrefix optional string to prepend to the result + * @arg outputSuffix optional string to append to the result + */ + std::string get(const std::string& filePath, const char* outputPrefix = NULL, + const char* outputSuffix = NULL); + +private: + void initParentDirs(const std::string& currentSrcDir, + const std::string& currentBinDir, + const std::string& projectSrcDir, + const std::string& projectBinDir); + + /// Returns the relative path and the parent directory key string (seed) + void GetRelPathSeed(const std::string& filePath, std::string& sourceRelPath, + std::string& sourceRelSeed); + + std::string GetChecksumString(const std::string& sourceFilename, + const std::string& sourceRelPath, + const std::string& sourceRelSeed); + + /// Size of the parent directory list + static const size_t numParentDirs = 4; + /// List of (directory name, seed name) pairs + std::pair parentDirs[numParentDirs]; +}; + +#endif -- cgit v1.2.1