summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJoel Rosdahl <joel@rosdahl.net>2022-08-15 07:39:24 +0200
committerJoel Rosdahl <joel@rosdahl.net>2022-08-15 22:13:31 +0200
commit6f227afd935aee61e6ad32c85b570a16c3709957 (patch)
tree4ab330df07dd0c98ccc4d85ab9aa4b5842376b5b
parent94508882ca6a9b715b02de5d9f2d14de69496af7 (diff)
downloadccache-6f227afd935aee61e6ad32c85b570a16c3709957.tar.gz
enhance: Add util::{read_fd,read_file,write_file} functions
-rw-r--r--cmake/config.h.in3
-rw-r--r--src/util/file.cpp172
-rw-r--r--src/util/file.hpp29
-rw-r--r--src/util/types.hpp31
-rw-r--r--unittest/CMakeLists.txt1
-rw-r--r--unittest/test_util_file.cpp119
6 files changed, 350 insertions, 5 deletions
diff --git a/cmake/config.h.in b/cmake/config.h.in
index 83a3cf52..9210147e 100644
--- a/cmake/config.h.in
+++ b/cmake/config.h.in
@@ -210,6 +210,9 @@ typedef int pid_t;
#if !defined(_WIN32) && !defined(O_BINARY)
# define O_BINARY 0
#endif
+#if !defined(_WIN32) && !defined(O_TEXT)
+# define O_TEXT 0
+#endif
#ifndef ESTALE
# define ESTALE -1
diff --git a/src/util/file.cpp b/src/util/file.cpp
index 44ca5cbd..99f7998a 100644
--- a/src/util/file.cpp
+++ b/src/util/file.cpp
@@ -18,11 +18,15 @@
#include "file.hpp"
+#include <Fd.hpp>
#include <Logging.hpp>
#include <Util.hpp>
-#include <core/exceptions.hpp>
#include <fmtmacros.hpp>
+#ifdef HAVE_UNISTD_H
+# include <unistd.h>
+#endif
+
#ifdef HAVE_UTIMENSAT
# include <fcntl.h>
# include <sys/stat.h>
@@ -37,6 +41,13 @@
# endif
#endif
+#include <cerrno>
+#include <codecvt>
+#include <cstring>
+#include <fstream>
+#include <locale>
+#include <type_traits>
+
namespace util {
void
@@ -53,13 +64,126 @@ create_cachedir_tag(const std::string& dir)
if (stat) {
return;
}
- try {
- Util::write_file(path, cachedir_tag);
- } catch (const core::Error& e) {
- LOG("Failed to create {}: {}", path, e.what());
+ const auto result = util::write_file(path, cachedir_tag);
+ if (!result) {
+ LOG("Failed to create {}: {}", path, result.error());
+ }
+}
+
+nonstd::expected<void, std::string>
+read_fd(int fd, DataReceiver data_receiver)
+{
+ int64_t n;
+ char buffer[CCACHE_READ_BUFFER_SIZE];
+ while ((n = read(fd, buffer, sizeof(buffer))) != 0) {
+ if (n == -1 && errno != EINTR) {
+ break;
+ }
+ if (n > 0) {
+ data_receiver(buffer, n);
+ }
+ }
+ if (n == -1) {
+ return nonstd::make_unexpected(strerror(errno));
+ }
+ return {};
+}
+
+#ifdef _WIN32
+static bool
+has_utf16_le_bom(std::string_view text)
+{
+ return text.size() > 1
+ && ((static_cast<uint8_t>(text[0]) == 0xff
+ && static_cast<uint8_t>(text[1]) == 0xfe));
+}
+#endif
+
+template<typename T>
+nonstd::expected<T, std::string>
+read_file(const std::string& path, size_t size_hint)
+{
+ if (size_hint == 0) {
+ const auto stat = Stat::stat(path);
+ if (!stat) {
+ LOG("Failed to stat {}: {}", path, strerror(errno));
+ return nonstd::make_unexpected(strerror(errno));
+ }
+ size_hint = stat.size();
+ }
+
+ // +1 to be able to detect EOF in the first read call
+ size_hint = (size_hint < 1024) ? 1024 : size_hint + 1;
+
+ const int open_flags = [] {
+ if constexpr (std::is_same<T, std::string>::value) {
+ return O_RDONLY | O_TEXT;
+ } else {
+ return O_RDONLY | O_BINARY;
+ }
+ }();
+ Fd fd(open(path.c_str(), open_flags));
+ if (!fd) {
+ LOG("Failed to open {}: {}", path, strerror(errno));
+ return nonstd::make_unexpected(strerror(errno));
+ }
+
+ int64_t ret = 0;
+ size_t pos = 0;
+ T result;
+ result.resize(size_hint);
+
+ while (true) {
+ if (pos == result.size()) {
+ result.resize(2 * result.size());
+ }
+ const size_t max_read = result.size() - pos;
+ ret = read(*fd, &result[pos], max_read);
+ if (ret == 0 || (ret == -1 && errno != EINTR)) {
+ break;
+ }
+ if (ret > 0) {
+ pos += ret;
+ if (static_cast<size_t>(ret) < max_read) {
+ break;
+ }
+ }
+ }
+
+ if (ret == -1) {
+ LOG("Failed to read {}: {}", path, strerror(errno));
+ return nonstd::make_unexpected(strerror(errno));
+ }
+
+ result.resize(pos);
+
+#ifdef _WIN32
+ if constexpr (std::is_same<T, std::string>::value) {
+ // Convert to UTF-8 if the content starts with a UTF-16 little-endian BOM.
+ //
+ // Note that this code assumes a little-endian machine, which is why it's
+ // #ifdef-ed to only run on Windows (which is always little-endian) where
+ // it's actually needed.
+ if (has_utf16_le_bom(result)) {
+ result.erase(0, 2); // Remove BOM.
+ std::u16string result_as_u16((result.size() / 2) + 1, '\0');
+ result_as_u16 = reinterpret_cast<const char16_t*>(result.c_str());
+ std::wstring_convert<std::codecvt_utf8_utf16<char16_t>, char16_t>
+ converter;
+ result = converter.to_bytes(result_as_u16);
+ }
}
+#endif
+
+ return result;
}
+template nonstd::expected<std::string, std::string>
+read_file(const std::string& path, size_t size_hint);
+
+template nonstd::expected<util::Blob, std::string>
+read_file(const std::string& path, size_t size_hint);
+
void
set_timestamps(const std::string& path,
std::optional<timespec> mtime,
@@ -93,4 +217,42 @@ set_timestamps(const std::string& path,
#endif
}
+nonstd::expected<void, std::string>
+write_fd(int fd, const void* data, size_t size)
+{
+ int64_t written = 0;
+ do {
+ const auto count =
+ write(fd, static_cast<const uint8_t*>(data) + written, size - written);
+ if (count == -1) {
+ if (errno != EAGAIN && errno != EINTR) {
+ return nonstd::make_unexpected(strerror(errno));
+ }
+ } else {
+ written += count;
+ }
+ } while (static_cast<size_t>(written) < size);
+ return {};
+}
+
+nonstd::expected<void, std::string>
+write_file(const std::string& path, const std::string& data)
+{
+ Fd fd(open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_TEXT, 0666));
+ if (!fd) {
+ return nonstd::make_unexpected(strerror(errno));
+ }
+ return write_fd(*fd, data.data(), data.size());
+}
+
+nonstd::expected<void, std::string>
+write_file(const std::string& path, const util::Blob& data)
+{
+ Fd fd(open(path.c_str(), O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, 0666));
+ if (!fd) {
+ return nonstd::make_unexpected(strerror(errno));
+ }
+ return write_fd(*fd, data.data(), data.size());
+}
+
} // namespace util
diff --git a/src/util/file.hpp b/src/util/file.hpp
index 61a66bd1..886740ff 100644
--- a/src/util/file.hpp
+++ b/src/util/file.hpp
@@ -18,6 +18,10 @@
#pragma once
+#include <util/types.hpp>
+
+#include <third_party/nonstd/expected.hpp>
+
#include <ctime>
#include <optional>
#include <string>
@@ -28,10 +32,35 @@ namespace util {
void create_cachedir_tag(const std::string& dir);
+// Read data from `fd` until end of file and call `data_receiver` with the read
+// data. Returns an error if the underlying read(2) call returned -1.
+nonstd::expected<void, std::string> read_fd(int fd, DataReceiver data_receiver);
+
+// Return data from `path`, where `T` is `std::string` for text data and
+// `util::Blob` for binary data. If `T` is `std::string` and the content starts
+// with a UTF-16 little-endian BOM on Windows then it will be converted to
+// UTF-8. If `size_hint` is not 0 then it is assumed that `path` has this size
+// (this saves system calls).
+template<typename T>
+nonstd::expected<T, std::string> read_file(const std::string& path,
+ size_t size_hint = 0);
+
// Set atime/mtime of `path`. If `mtime` is std::nullopt, set to the current
// time. If `atime` is std::nullopt, set to what `mtime` specifies.
void set_timestamps(const std::string& path,
std::optional<timespec> mtime = std::nullopt,
std::optional<timespec> atime = std::nullopt);
+// Write `size` bytes from `data` to `fd`. Returns errno on error.
+nonstd::expected<void, std::string>
+write_fd(int fd, const void* data, size_t size);
+
+// Write text `data` to `path`.
+nonstd::expected<void, std::string> write_file(const std::string& path,
+ const std::string& data);
+
+// Write binary `data` to `path`.
+nonstd::expected<void, std::string> write_file(const std::string& path,
+ const util::Blob& data);
+
} // namespace util
diff --git a/src/util/types.hpp b/src/util/types.hpp
new file mode 100644
index 00000000..8c9d1966
--- /dev/null
+++ b/src/util/types.hpp
@@ -0,0 +1,31 @@
+// Copyright (C) 2022 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#pragma once
+
+#include <cstddef>
+#include <cstdint>
+#include <functional>
+#include <vector>
+
+namespace util {
+
+using Blob = std::vector<uint8_t>;
+using DataReceiver = std::function<void(const void* data, size_t size)>;
+
+} // namespace util
diff --git a/unittest/CMakeLists.txt b/unittest/CMakeLists.txt
index 4f52bf9c..9b852250 100644
--- a/unittest/CMakeLists.txt
+++ b/unittest/CMakeLists.txt
@@ -27,6 +27,7 @@ set(
test_util_XXH3_128.cpp
test_util_XXH3_64.cpp
test_util_expected.cpp
+ test_util_file.cpp
test_util_path.cpp
test_util_string.cpp
)
diff --git a/unittest/test_util_file.cpp b/unittest/test_util_file.cpp
new file mode 100644
index 00000000..dbbf6667
--- /dev/null
+++ b/unittest/test_util_file.cpp
@@ -0,0 +1,119 @@
+// Copyright (C) 2022 Joel Rosdahl and other contributors
+//
+// See doc/AUTHORS.adoc for a complete list of contributors.
+//
+// This program is free software; you can redistribute it and/or modify it
+// under the terms of the GNU General Public License as published by the Free
+// Software Foundation; either version 3 of the License, or (at your option)
+// any later version.
+//
+// This program is distributed in the hope that it will be useful, but WITHOUT
+// ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+// FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+// more details.
+//
+// You should have received a copy of the GNU General Public License along with
+// this program; if not, write to the Free Software Foundation, Inc., 51
+// Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+
+#include "TestUtil.hpp"
+
+#include <util/file.hpp>
+
+#include <third_party/doctest.h>
+
+#include <cstring>
+
+using TestUtil::TestContext;
+
+TEST_CASE("util::read_file and util::write_file, text data")
+{
+ TestContext test_context;
+
+ REQUIRE(util::write_file("test", "foo\nbar\n"));
+ auto data = util::read_file<std::string>("test");
+ REQUIRE(data);
+ CHECK(*data == "foo\nbar\n");
+
+ REQUIRE(util::write_file("test", "foo\r\nbar\r\n"));
+ data = util::read_file<std::string>("test");
+ REQUIRE(data);
+ CHECK(*data == "foo\r\nbar\r\n");
+
+ // Newline handling
+ REQUIRE(util::write_file("test", "foo\r\nbar\n"));
+ auto bin_data = util::read_file<util::Blob>("test");
+ REQUIRE(bin_data);
+#ifdef _WIN32
+ const std::string expected_bin_data = "foo\r\r\nbar\r\n";
+#else
+ const std::string expected_bin_data = "foo\r\nbar\n";
+#endif
+ CHECK(*bin_data
+ == util::Blob(expected_bin_data.begin(), expected_bin_data.end()));
+
+ REQUIRE(util::write_file("size_hint_test", std::string(8192, '\0')));
+ data = util::read_file<std::string>("size_hint_test", 8191 /*size_hint*/);
+ REQUIRE(data);
+ CHECK(data->size() == 8192);
+ data = util::read_file<std::string>("size_hint_test", 8193 /*size_hint*/);
+ REQUIRE(data);
+ CHECK(data->size() == 8192);
+
+ data = util::read_file<std::string>("does/not/exist");
+ REQUIRE(!data);
+ CHECK(data.error() == "No such file or directory");
+
+ auto result = util::write_file("", "does/not/exist");
+ REQUIRE(!result);
+ CHECK(result.error() == "No such file or directory");
+
+ result = util::write_file("does/not/exist", "does/not/exist");
+ REQUIRE(!result);
+ CHECK(result.error() == "No such file or directory");
+}
+
+TEST_CASE("util::read_file and util::write_file, binary data")
+{
+ TestContext test_context;
+
+ util::Blob expected;
+ for (size_t i = 0; i < 512; ++i) {
+ expected.push_back((32 + i) % 256);
+ }
+
+ CHECK(util::write_file("test", expected));
+ auto actual = util::read_file<util::Blob>("test");
+ REQUIRE(actual);
+ CHECK(*actual == expected);
+
+ REQUIRE(util::write_file("size_hint_test", util::Blob(8192, 0)));
+ auto data = util::read_file<util::Blob>("size_hint_test", 8191 /*size_hint*/);
+ REQUIRE(data);
+ CHECK(data->size() == 8192);
+ data = util::read_file<util::Blob>("size_hint_test", 8193 /*size_hint*/);
+ REQUIRE(data);
+ CHECK(data->size() == 8192);
+}
+
+#ifdef _WIN32
+TEST_CASE("util::read_file<std::string> with UTF-16 little endian encoding")
+{
+ TestContext test_context;
+
+ std::string data;
+ data.push_back(static_cast<unsigned char>(0xff));
+ data.push_back(static_cast<unsigned char>(0xfe));
+ data.push_back('a');
+ data.push_back('\0');
+ data.push_back('b');
+ data.push_back('\0');
+ data.push_back('c');
+ data.push_back('\0');
+
+ CHECK(util::write_file("test", data));
+ auto read_data = util::read_file<std::string>("test");
+ REQUIRE(read_data);
+ CHECK(*read_data == "abc");
+}
+#endif