diff options
author | Joel Rosdahl <joel@rosdahl.net> | 2022-08-15 07:39:24 +0200 |
---|---|---|
committer | Joel Rosdahl <joel@rosdahl.net> | 2022-08-15 22:13:31 +0200 |
commit | 6f227afd935aee61e6ad32c85b570a16c3709957 (patch) | |
tree | 4ab330df07dd0c98ccc4d85ab9aa4b5842376b5b | |
parent | 94508882ca6a9b715b02de5d9f2d14de69496af7 (diff) | |
download | ccache-6f227afd935aee61e6ad32c85b570a16c3709957.tar.gz |
enhance: Add util::{read_fd,read_file,write_file} functions
-rw-r--r-- | cmake/config.h.in | 3 | ||||
-rw-r--r-- | src/util/file.cpp | 172 | ||||
-rw-r--r-- | src/util/file.hpp | 29 | ||||
-rw-r--r-- | src/util/types.hpp | 31 | ||||
-rw-r--r-- | unittest/CMakeLists.txt | 1 | ||||
-rw-r--r-- | unittest/test_util_file.cpp | 119 |
6 files changed, 350 insertions, 5 deletions
diff --git a/cmake/config.h.in b/cmake/config.h.in index 83a3cf52..9210147e 100644 --- a/cmake/config.h.in +++ b/cmake/config.h.in @@ -210,6 +210,9 @@ typedef int pid_t; #if !defined(_WIN32) && !defined(O_BINARY) # define O_BINARY 0 #endif +#if !defined(_WIN32) && !defined(O_TEXT) +# define O_TEXT 0 +#endif #ifndef ESTALE # define ESTALE -1 diff --git a/src/util/file.cpp b/src/util/file.cpp index 44ca5cbd..99f7998a 100644 --- a/src/util/file.cpp +++ b/src/util/file.cpp @@ -18,11 +18,15 @@ #include "file.hpp" +#include <Fd.hpp> #include <Logging.hpp> #include <Util.hpp> -#include <core/exceptions.hpp> #include <fmtmacros.hpp> +#ifdef HAVE_UNISTD_H +# include <unistd.h> +#endif + #ifdef HAVE_UTIMENSAT # include <fcntl.h> # include <sys/stat.h> @@ -37,6 +41,13 @@ # endif #endif +#include <cerrno> +#include <codecvt> +#include <cstring> +#include <fstream> +#include <locale> +#include <type_traits> + namespace util { void @@ -53,13 +64,126 @@ create_cachedir_tag(const std::string& dir) if (stat) { return; } - try { - Util::write_file(path, cachedir_tag); - } catch (const core::Error& e) { - LOG("Failed to create {}: {}", path, e.what()); + const auto result = util::write_file(path, cachedir_tag); + if (!result) { + LOG("Failed to create {}: {}", path, result.error()); + } +} + +nonstd::expected<void, std::string> +read_fd(int fd, DataReceiver data_receiver) +{ + int64_t n; + char buffer[CCACHE_READ_BUFFER_SIZE]; + while ((n = read(fd, buffer, sizeof(buffer))) != 0) { + if (n == -1 && errno != EINTR) { + break; + } + if (n > 0) { + data_receiver(buffer, n); + } + } + if (n == -1) { + return nonstd::make_unexpected(strerror(errno)); + } + return {}; +} + +#ifdef _WIN32 +static bool +has_utf16_le_bom(std::string_view text) +{ + return text.size() > 1 + && ((static_cast<uint8_t>(text[0]) == 0xff + && static_cast<uint8_t>(text[1]) == 0xfe)); +} +#endif + +template<typename T> +nonstd::expected<T, std::string> +read_file(const std::string& path, size_t size_hint) +{ + if (size_hint == 0) { + const auto stat = Stat::stat(path); + if (!stat) { + LOG("Failed to stat {}: {}", path, strerror(errno)); + return nonstd::make_unexpected(strerror(errno)); + } + size_hint = stat.size(); + } + + // +1 to be able to detect EOF in the first read call + size_hint = (size_hint < 1024) ? 1024 : size_hint + 1; + + const int open_flags = [] { + if constexpr (std::is_same<T, std::string>::value) { + return O_RDONLY | O_TEXT; + } else { + return O_RDONLY | O_BINARY; + } + }(); + Fd fd(open(path.c_str(), open_flags)); + if (!fd) { + LOG("Failed to open {}: {}", path, strerror(errno)); + return nonstd::make_unexpected(strerror(errno)); + } + + int64_t ret = 0; + size_t pos = 0; + T result; + result.resize(size_hint); + + while (true) { + if (pos == result.size()) { + result.resize(2 * result.size()); + } + const size_t max_read = result.size() - pos; + ret = read(*fd, &result[pos], max_read); + if (ret == 0 || (ret == -1 && errno != EINTR)) { + break; + } + if (ret > 0) { + pos += ret; + if (static_cast<size_t>(ret) < max_read) { + break; + } + } + } + + if (ret == -1) { + LOG("Failed to read {}: {}", path, strerror(errno)); + return nonstd::make_unexpected(strerror(errno)); + } + + result.resize(pos); + +#ifdef _WIN32 + if constexpr (std::is_same<T, std::string>::value) { + // Convert to UTF-8 if the content starts with a UTF-16 little-endian BOM. + // + // Note that this code assumes a little-endian machine, which is why it's + // #ifdef-ed to only run on Windows (which is always little-endian) where + // it's actually needed. + if (has_utf16_le_bom(result)) { + result.erase(0, 2); // Remove BOM. + std::u16string result_as_u16((result.size() / 2) + 1, '\0'); + result_as_u16 = reinterpret_cast<const char16_t*>(result.c_str()); + std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t> + converter; + result = converter.to_bytes(result_as_u16); + } } +#endif + + return result; } +template nonstd::expected<std::string, std::string> +read_file(const std::string& path, size_t size_hint); + +template nonstd::expected<util::Blob, std::string> +read_file(const std::string& path, size_t size_hint); + void set_timestamps(const std::string& path, std::optional<timespec> mtime, @@ -93,4 +217,42 @@ set_timestamps(const std::string& path, #endif } +nonstd::expected<void, std::string> +write_fd(int fd, const void* data, size_t size) +{ + int64_t written = 0; + do { + const auto count = + write(fd, static_cast<const uint8_t*>(data) + written, size - written); + if (count == -1) { + if (errno != EAGAIN && errno != EINTR) { + return nonstd::make_unexpected(strerror(errno)); + } + } else { + written += count; + } + } while (static_cast<size_t>(written) < size); + return {}; +} + +nonstd::expected<void, std::string> +write_file(const std::string& path, const std::string& data) +{ + Fd fd(open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_TEXT, 0666)); + if (!fd) { + return nonstd::make_unexpected(strerror(errno)); + } + return write_fd(*fd, data.data(), data.size()); +} + +nonstd::expected<void, std::string> +write_file(const std::string& path, const util::Blob& data) +{ + Fd fd(open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666)); + if (!fd) { + return nonstd::make_unexpected(strerror(errno)); + } + return write_fd(*fd, data.data(), data.size()); +} + } // namespace util diff --git a/src/util/file.hpp b/src/util/file.hpp index 61a66bd1..886740ff 100644 --- a/src/util/file.hpp +++ b/src/util/file.hpp @@ -18,6 +18,10 @@ #pragma once +#include <util/types.hpp> + +#include <third_party/nonstd/expected.hpp> + #include <ctime> #include <optional> #include <string> @@ -28,10 +32,35 @@ namespace util { void create_cachedir_tag(const std::string& dir); +// Read data from `fd` until end of file and call `data_receiver` with the read +// data. Returns an error if the underlying read(2) call returned -1. +nonstd::expected<void, std::string> read_fd(int fd, DataReceiver data_receiver); + +// Return data from `path`, where `T` is `std::string` for text data and +// `util::Blob` for binary data. If `T` is `std::string` and the content starts +// with a UTF-16 little-endian BOM on Windows then it will be converted to +// UTF-8. If `size_hint` is not 0 then it is assumed that `path` has this size +// (this saves system calls). +template<typename T> +nonstd::expected<T, std::string> read_file(const std::string& path, + size_t size_hint = 0); + // Set atime/mtime of `path`. If `mtime` is std::nullopt, set to the current // time. If `atime` is std::nullopt, set to what `mtime` specifies. void set_timestamps(const std::string& path, std::optional<timespec> mtime = std::nullopt, std::optional<timespec> atime = std::nullopt); +// Write `size` bytes from `data` to `fd`. Returns errno on error. +nonstd::expected<void, std::string> +write_fd(int fd, const void* data, size_t size); + +// Write text `data` to `path`. +nonstd::expected<void, std::string> write_file(const std::string& path, + const std::string& data); + +// Write binary `data` to `path`. +nonstd::expected<void, std::string> write_file(const std::string& path, + const util::Blob& data); + } // namespace util diff --git a/src/util/types.hpp b/src/util/types.hpp new file mode 100644 index 00000000..8c9d1966 --- /dev/null +++ b/src/util/types.hpp @@ -0,0 +1,31 @@ +// Copyright (C) 2022 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#pragma once + +#include <cstddef> +#include <cstdint> +#include <functional> +#include <vector> + +namespace util { + +using Blob = std::vector<uint8_t>; +using DataReceiver = std::function<void(const void* data, size_t size)>; + +} // namespace util diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt index 4f52bf9c..9b852250 100644 --- a/unittest/CMakeLists.txt +++ b/unittest/CMakeLists.txt @@ -27,6 +27,7 @@ set( test_util_XXH3_128.cpp test_util_XXH3_64.cpp test_util_expected.cpp + test_util_file.cpp test_util_path.cpp test_util_string.cpp ) diff --git a/unittest/test_util_file.cpp b/unittest/test_util_file.cpp new file mode 100644 index 00000000..dbbf6667 --- /dev/null +++ b/unittest/test_util_file.cpp @@ -0,0 +1,119 @@ +// Copyright (C) 2022 Joel Rosdahl and other contributors +// +// See doc/AUTHORS.adoc for a complete list of contributors. +// +// This program is free software; you can redistribute it and/or modify it +// under the terms of the GNU General Public License as published by the Free +// Software Foundation; either version 3 of the License, or (at your option) +// any later version. +// +// This program is distributed in the hope that it will be useful, but WITHOUT +// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or +// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for +// more details. +// +// You should have received a copy of the GNU General Public License along with +// this program; if not, write to the Free Software Foundation, Inc., 51 +// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + +#include "TestUtil.hpp" + +#include <util/file.hpp> + +#include <third_party/doctest.h> + +#include <cstring> + +using TestUtil::TestContext; + +TEST_CASE("util::read_file and util::write_file, text data") +{ + TestContext test_context; + + REQUIRE(util::write_file("test", "foo\nbar\n")); + auto data = util::read_file<std::string>("test"); + REQUIRE(data); + CHECK(*data == "foo\nbar\n"); + + REQUIRE(util::write_file("test", "foo\r\nbar\r\n")); + data = util::read_file<std::string>("test"); + REQUIRE(data); + CHECK(*data == "foo\r\nbar\r\n"); + + // Newline handling + REQUIRE(util::write_file("test", "foo\r\nbar\n")); + auto bin_data = util::read_file<util::Blob>("test"); + REQUIRE(bin_data); +#ifdef _WIN32 + const std::string expected_bin_data = "foo\r\r\nbar\r\n"; +#else + const std::string expected_bin_data = "foo\r\nbar\n"; +#endif + CHECK(*bin_data + == util::Blob(expected_bin_data.begin(), expected_bin_data.end())); + + REQUIRE(util::write_file("size_hint_test", std::string(8192, '\0'))); + data = util::read_file<std::string>("size_hint_test", 8191 /*size_hint*/); + REQUIRE(data); + CHECK(data->size() == 8192); + data = util::read_file<std::string>("size_hint_test", 8193 /*size_hint*/); + REQUIRE(data); + CHECK(data->size() == 8192); + + data = util::read_file<std::string>("does/not/exist"); + REQUIRE(!data); + CHECK(data.error() == "No such file or directory"); + + auto result = util::write_file("", "does/not/exist"); + REQUIRE(!result); + CHECK(result.error() == "No such file or directory"); + + result = util::write_file("does/not/exist", "does/not/exist"); + REQUIRE(!result); + CHECK(result.error() == "No such file or directory"); +} + +TEST_CASE("util::read_file and util::write_file, binary data") +{ + TestContext test_context; + + util::Blob expected; + for (size_t i = 0; i < 512; ++i) { + expected.push_back((32 + i) % 256); + } + + CHECK(util::write_file("test", expected)); + auto actual = util::read_file<util::Blob>("test"); + REQUIRE(actual); + CHECK(*actual == expected); + + REQUIRE(util::write_file("size_hint_test", util::Blob(8192, 0))); + auto data = util::read_file<util::Blob>("size_hint_test", 8191 /*size_hint*/); + REQUIRE(data); + CHECK(data->size() == 8192); + data = util::read_file<util::Blob>("size_hint_test", 8193 /*size_hint*/); + REQUIRE(data); + CHECK(data->size() == 8192); +} + +#ifdef _WIN32 +TEST_CASE("util::read_file<std::string> with UTF-16 little endian encoding") +{ + TestContext test_context; + + std::string data; + data.push_back(static_cast<unsigned char>(0xff)); + data.push_back(static_cast<unsigned char>(0xfe)); + data.push_back('a'); + data.push_back('\0'); + data.push_back('b'); + data.push_back('\0'); + data.push_back('c'); + data.push_back('\0'); + + CHECK(util::write_file("test", data)); + auto read_data = util::read_file<std::string>("test"); + REQUIRE(read_data); + CHECK(*read_data == "abc"); +} +#endif |