summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--CONTRIBUTING.md33
-rw-r--r--README.md26
-rw-r--r--snappy-internal.h12
-rw-r--r--snappy.cc48
4 files changed, 72 insertions, 47 deletions
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
index d0ce551..66a60d5 100644
--- a/CONTRIBUTING.md
+++ b/CONTRIBUTING.md
@@ -3,30 +3,10 @@
We'd love to accept your patches and contributions to this project. There are
just a few small guidelines you need to follow.
-## Project Goals
-
-In addition to the aims listed at the top of the [README](README.md) Snappy
-explicitly supports the following:
-
-1. C++11
-2. Clang (gcc and MSVC are best-effort).
-3. Low level optimizations (e.g. assembly or equivalent intrinsics) for:
- 1. [x86](https://en.wikipedia.org/wiki/X86)
- 2. [x86-64](https://en.wikipedia.org/wiki/X86-64)
- 3. ARMv7 (32-bit)
- 4. ARMv8 (AArch64)
-4. Supports only the Snappy compression scheme as described in
- [format_description.txt](format_description.txt).
-5. CMake for building
-
-Changes adding features or dependencies outside of the core area of focus listed
-above might not be accepted. If in doubt post a message to the
-[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression).
-
## Contributor License Agreement
Contributions to this project must be accompanied by a Contributor License
-Agreement. You (or your employer) retain the copyright to your contribution,
+Agreement. You (or your employer) retain the copyright to your contribution;
this simply gives us permission to use and redistribute your contributions as
part of the project. Head over to <https://cla.developers.google.com/> to see
your current agreements on file or to sign a new one.
@@ -35,12 +15,17 @@ You generally only need to submit a CLA once, so if you've already submitted one
(even if it was for a different project), you probably don't need to do it
again.
-## Code reviews
+## Code Reviews
All submissions, including submissions by project members, require review. We
use GitHub pull requests for this purpose. Consult
[GitHub Help](https://help.github.com/articles/about-pull-requests/) for more
information on using pull requests.
-Please make sure that all the automated checks (CLA, AppVeyor, Travis) pass for
-your pull requests. Pull requests whose checks fail may be ignored.
+See [the README](README.md#contributing-to-the-snappy-project) for areas
+where we are likely to accept external contributions.
+
+## Community Guidelines
+
+This project follows [Google's Open Source Community
+Guidelines](https://opensource.google/conduct/).
diff --git a/README.md b/README.md
index c6f7a07..398be7d 100644
--- a/README.md
+++ b/README.md
@@ -131,6 +131,32 @@ should provide a reasonably balanced starting point for benchmarking. (Note that
baddata[1-3].snappy are not intended as benchmarks; they are used to verify
correctness in the presence of corrupted data in the unit test.)
+Contributing to the Snappy Project
+==================================
+
+In addition to the aims listed at the top of the [README](README.md) Snappy
+explicitly supports the following:
+
+1. C++11
+2. Clang (gcc and MSVC are best-effort).
+3. Low level optimizations (e.g. assembly or equivalent intrinsics) for:
+ 1. [x86](https://en.wikipedia.org/wiki/X86)
+ 2. [x86-64](https://en.wikipedia.org/wiki/X86-64)
+ 3. ARMv7 (32-bit)
+ 4. ARMv8 (AArch64)
+4. Supports only the Snappy compression scheme as described in
+ [format_description.txt](format_description.txt).
+5. CMake for building
+
+Changes adding features or dependencies outside of the core area of focus listed
+above might not be accepted. If in doubt post a message to the
+[Snappy discussion mailing list](https://groups.google.com/g/snappy-compression).
+
+We are unlikely to accept contributions to the build configuration files, such
+as `CMakeLists.txt`. We are focused on maintaining a build configuration that
+allows us to test that the project works in a few supported configurations
+inside Google. We are not currently interested in supporting other requirements,
+such as different operating systems, compilers, or build systems.
Contact
=======
diff --git a/snappy-internal.h b/snappy-internal.h
index ae7ab5a..e552ea0 100644
--- a/snappy-internal.h
+++ b/snappy-internal.h
@@ -230,8 +230,9 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
uint64_t xorval = a1 ^ a2;
int shift = Bits::FindLSBSetNonZero64(xorval);
size_t matched_bytes = shift >> 3;
+ uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
#ifndef __x86_64__
- *data = UNALIGNED_LOAD64(s2 + matched_bytes);
+ a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
#else
// Ideally this would just be
//
@@ -242,13 +243,12 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
// use a conditional move (it's tuned to cut data dependencies). In this
// case there is a longer parallel chain anyway AND this will be fairly
// unpredictable.
- uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
asm("testl %k2, %k2\n\t"
"cmovzq %1, %0\n\t"
: "+r"(a2)
: "r"(a3), "r"(xorval));
- *data = a2 >> (shift & (3 * 8));
#endif
+ *data = a2 >> (shift & (3 * 8));
return std::pair<size_t, bool>(matched_bytes, true);
} else {
matched = 8;
@@ -270,16 +270,16 @@ static inline std::pair<size_t, bool> FindMatchLength(const char* s1,
uint64_t xorval = a1 ^ a2;
int shift = Bits::FindLSBSetNonZero64(xorval);
size_t matched_bytes = shift >> 3;
+ uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
#ifndef __x86_64__
- *data = UNALIGNED_LOAD64(s2 + matched_bytes);
+ a2 = static_cast<uint32_t>(xorval) == 0 ? a3 : a2;
#else
- uint64_t a3 = UNALIGNED_LOAD64(s2 + 4);
asm("testl %k2, %k2\n\t"
"cmovzq %1, %0\n\t"
: "+r"(a2)
: "r"(a3), "r"(xorval));
- *data = a2 >> (shift & (3 * 8));
#endif
+ *data = a2 >> (shift & (3 * 8));
matched += matched_bytes;
assert(matched >= 8);
return std::pair<size_t, bool>(matched, false);
diff --git a/snappy.cc b/snappy.cc
index bb9e0e5..6502cfd 100644
--- a/snappy.cc
+++ b/snappy.cc
@@ -340,6 +340,7 @@ static inline bool Copy64BytesWithPatternExtension(char* dst, size_t offset) {
if (SNAPPY_PREDICT_TRUE(offset < 16)) {
if (SNAPPY_PREDICT_FALSE(offset == 0)) return false;
// Extend the pattern to the first 16 bytes.
+ // The simpler formulation of `dst[i - offset]` induces undefined behavior.
for (int i = 0; i < 16; i++) dst[i] = (dst - offset)[i];
// Find a multiple of pattern >= 16.
static std::array<uint8_t, 16> pattern_sizes = []() {
@@ -983,22 +984,35 @@ inline bool Copy64BytesWithPatternExtension(ptrdiff_t dst, size_t offset) {
return offset != 0;
}
-void MemCopy(char* dst, const uint8_t* src, size_t size) {
- std::memcpy(dst, src, size);
-}
-
-void MemCopy(ptrdiff_t dst, const uint8_t* src, size_t size) {
- // TODO: Switch to [[maybe_unused]] when we can assume C++17.
- (void)dst;
- (void)src;
- (void)size;
-}
-
-void MemMove(char* dst, const void* src, size_t size) {
- std::memmove(dst, src, size);
+// Copies between size bytes and 64 bytes from src to dest. size cannot exceed
+// 64. More than size bytes, but never exceeding 64, might be copied if doing
+// so gives better performance. [src, src + size) must not overlap with
+// [dst, dst + size), but [src, src + 64) may overlap with [dst, dst + 64).
+void MemCopy64(char* dst, const void* src, size_t size) {
+ // Always copy this many bytes, test if we need to copy more.
+ constexpr int kShortMemCopy = 32;
+ // We're always allowed to copy 64 bytes, so if we exceed kShortMemCopy just
+ // copy 64 rather than the exact amount.
+ constexpr int kLongMemCopy = 64;
+
+ assert(size <= kLongMemCopy);
+ assert(std::less_equal<const void*>()(static_cast<const char*>(src) + size,
+ dst) ||
+ std::less_equal<const void*>()(dst + size, src));
+
+ // We know that src and dst are at least size bytes apart. However, because we
+ // might copy more than size bytes the copy still might overlap past size.
+ // E.g. if src and dst appear consecutively in memory (src + size == dst).
+ std::memmove(dst, src, kShortMemCopy);
+ // Profiling shows that nearly all copies are short.
+ if (SNAPPY_PREDICT_FALSE(size > kShortMemCopy)) {
+ std::memmove(dst + kShortMemCopy,
+ static_cast<const uint8_t*>(src) + kShortMemCopy,
+ kLongMemCopy - kShortMemCopy);
+ }
}
-void MemMove(ptrdiff_t dst, const void* src, size_t size) {
+void MemCopy64(ptrdiff_t dst, const void* src, size_t size) {
// TODO: Switch to [[maybe_unused]] when we can assume C++17.
(void)dst;
(void)src;
@@ -1041,7 +1055,7 @@ size_t AdvanceToNextTagX86Optimized(const uint8_t** ip_p, size_t* tag) {
size_t literal_len = *tag >> 2;
size_t tag_type = *tag;
bool is_literal;
-#if defined(__GNUC__) && defined(__x86_64__)
+#if defined(__GCC_ASM_FLAG_OUTPUTS__) && defined(__x86_64__)
// TODO clang misses the fact that the (c & 3) already correctly
// sets the zero flag.
asm("and $3, %k[tag_type]\n\t"
@@ -1170,7 +1184,7 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
// Due to the spurious offset in literals have this will trigger
// at the start of a block when op is still smaller than 256.
if (tag_type != 0) goto break_loop;
- MemCopy(op_base + op, old_ip, 64);
+ MemCopy64(op_base + op, old_ip, len);
op += len;
continue;
}
@@ -1179,7 +1193,7 @@ std::pair<const uint8_t*, ptrdiff_t> DecompressBranchless(
// we need to copy from ip instead of from the stream.
const void* from =
tag_type ? reinterpret_cast<void*>(op_base + delta) : old_ip;
- MemMove(op_base + op, from, 64);
+ MemCopy64(op_base + op, from, len);
op += len;
}
} while (ip < ip_limit_min_slop && op < op_limit_min_slop);