diff options
author | José Valim <jose.valim@plataformatec.com.br> | 2017-10-30 18:43:33 +0100 |
---|---|---|
committer | José Valim <jose.valim@plataformatec.com.br> | 2017-10-30 18:45:16 +0100 |
commit | b3992aa57499ce46c386f95992f729c330ee173c (patch) | |
tree | 6319a537a606f95d03d9858e1ddd004962aeef2a /lib | |
parent | dfcc88fc33a12f162ad926c3aa1ae7293a06db3f (diff) | |
download | elixir-b3992aa57499ce46c386f95992f729c330ee173c.tar.gz |
Minimize generated beam size by using ranges
This approach is less performant when processing
sigma but compiles twice faster and takes two times
less space in disk.
Diffstat (limited to 'lib')
-rw-r--r-- | lib/elixir/unicode/properties.ex | 46 |
1 files changed, 40 insertions, 6 deletions
diff --git a/lib/elixir/unicode/properties.ex b/lib/elixir/unicode/properties.ex index f4ea97c73..f273be5af 100644 --- a/lib/elixir/unicode/properties.ex +++ b/lib/elixir/unicode/properties.ex @@ -11,8 +11,23 @@ to_binary = fn |> IO.iodata_to_binary() end +rangify = fn [head | tail] -> + {first, last, acc} = + Enum.reduce(tail, {head, head, []}, fn + number, {first, last, acc} when number == first - 1 -> + {number, last, acc} + + number, {first, last, acc} -> + {number, number, [{first, last} | acc]} + end) + + [{first, last} | acc] +end + +acc = {[], [], [], %{}, %{}} + {codes, letters, non_breakable, decompositions, combining_classes} = - Enum.reduce(File.stream!(data_path), {[], [], [], %{}, %{}}, fn line, {cacc, lacc, wacc, dacc, kacc} -> + Enum.reduce(File.stream!(data_path), acc, fn line, {cacc, lacc, wacc, dacc, kacc} -> [ codepoint, _name, @@ -42,7 +57,7 @@ end lacc = case category do - "L" <> _ -> [to_binary.(codepoint) | lacc] + "L" <> _ -> [String.to_integer(codepoint, 16) | lacc] _ -> lacc end @@ -92,11 +107,16 @@ defmodule String.Casing do def downcase(string) when is_binary(string), do: downcase(string, "") - for codepoint <- letters do - defp downcase(<<0x03A3::utf8, unquote(codepoint), rest::bits>>, acc) do - downcase(<<unquote(codepoint), rest::bits>>, <<acc::binary, 0x03C3::utf8>>) - end + defp downcase(<<0x03A3::utf8, codepoint::utf8, rest::bits>>, acc) do + downcased = + case letter?(codepoint) do + true -> 0x03C3 + false -> 0x03C2 + end + + downcase(<<codepoint::utf8, rest::bits>>, <<acc::binary, downcased::utf8>>) end + defp downcase(<<0x03A3::utf8, rest::bits>>, acc) do downcase(rest, <<acc::binary, 0x03C2::utf8>>) end @@ -113,6 +133,20 @@ defmodule String.Casing do defp downcase("", acc), do: acc + # Sigma handling + + for {first, last} <- rangify.(letters) do + if first == last do + defp letter?(unquote(first)), do: true + else + defp letter?(codepoint) + when codepoint >= unquote(first) and codepoint <= unquote(last), + do: true + end + end + + defp letter?(_), do: false + # Upcase def upcase(string) when is_binary(string), do: upcase(string, "") |