summaryrefslogtreecommitdiff
path: root/lib
diff options
context:
space:
mode:
authorJosé Valim <jose.valim@plataformatec.com.br>2017-10-30 18:43:33 +0100
committerJosé Valim <jose.valim@plataformatec.com.br>2017-10-30 18:45:16 +0100
commitb3992aa57499ce46c386f95992f729c330ee173c (patch)
tree6319a537a606f95d03d9858e1ddd004962aeef2a /lib
parentdfcc88fc33a12f162ad926c3aa1ae7293a06db3f (diff)
downloadelixir-b3992aa57499ce46c386f95992f729c330ee173c.tar.gz
Minimize generated beam size by using ranges
This approach is less performant when processing sigma but compiles twice faster and takes two times less space in disk.
Diffstat (limited to 'lib')
-rw-r--r--lib/elixir/unicode/properties.ex46
1 files changed, 40 insertions, 6 deletions
diff --git a/lib/elixir/unicode/properties.ex b/lib/elixir/unicode/properties.ex
index f4ea97c73..f273be5af 100644
--- a/lib/elixir/unicode/properties.ex
+++ b/lib/elixir/unicode/properties.ex
@@ -11,8 +11,23 @@ to_binary = fn
|> IO.iodata_to_binary()
end
+rangify = fn [head | tail] ->
+ {first, last, acc} =
+ Enum.reduce(tail, {head, head, []}, fn
+ number, {first, last, acc} when number == first - 1 ->
+ {number, last, acc}
+
+ number, {first, last, acc} ->
+ {number, number, [{first, last} | acc]}
+ end)
+
+ [{first, last} | acc]
+end
+
+acc = {[], [], [], %{}, %{}}
+
{codes, letters, non_breakable, decompositions, combining_classes} =
- Enum.reduce(File.stream!(data_path), {[], [], [], %{}, %{}}, fn line, {cacc, lacc, wacc, dacc, kacc} ->
+ Enum.reduce(File.stream!(data_path), acc, fn line, {cacc, lacc, wacc, dacc, kacc} ->
[
codepoint,
_name,
@@ -42,7 +57,7 @@ end
lacc =
case category do
- "L" <> _ -> [to_binary.(codepoint) | lacc]
+ "L" <> _ -> [String.to_integer(codepoint, 16) | lacc]
_ -> lacc
end
@@ -92,11 +107,16 @@ defmodule String.Casing do
def downcase(string) when is_binary(string), do: downcase(string, "")
- for codepoint <- letters do
- defp downcase(<<0x03A3::utf8, unquote(codepoint), rest::bits>>, acc) do
- downcase(<<unquote(codepoint), rest::bits>>, <<acc::binary, 0x03C3::utf8>>)
- end
+ defp downcase(<<0x03A3::utf8, codepoint::utf8, rest::bits>>, acc) do
+ downcased =
+ case letter?(codepoint) do
+ true -> 0x03C3
+ false -> 0x03C2
+ end
+
+ downcase(<<codepoint::utf8, rest::bits>>, <<acc::binary, downcased::utf8>>)
end
+
defp downcase(<<0x03A3::utf8, rest::bits>>, acc) do
downcase(rest, <<acc::binary, 0x03C2::utf8>>)
end
@@ -113,6 +133,20 @@ defmodule String.Casing do
defp downcase("", acc), do: acc
+ # Sigma handling
+
+ for {first, last} <- rangify.(letters) do
+ if first == last do
+ defp letter?(unquote(first)), do: true
+ else
+ defp letter?(codepoint)
+ when codepoint >= unquote(first) and codepoint <= unquote(last),
+ do: true
+ end
+ end
+
+ defp letter?(_), do: false
+
# Upcase
def upcase(string) when is_binary(string), do: upcase(string, "")