diff options
author | José Valim <jose.valim@plataformatec.com.br> | 2018-07-12 11:37:19 +0200 |
---|---|---|
committer | José Valim <jose.valim@plataformatec.com.br> | 2018-07-12 11:38:26 +0200 |
commit | 16b358cd540f61697e7e571cc771f82092c365ba (patch) | |
tree | 343880b2229cced1b52db3bdb1b430f8f48e800f | |
parent | 61d42c2a555417de2cfbccaaad0acd26b5fc1210 (diff) | |
download | elixir-16b358cd540f61697e7e571cc771f82092c365ba.tar.gz |
Do not send binread to streams with encoding, closes #7729
-rw-r--r-- | lib/elixir/lib/file/stream.ex | 27 | ||||
-rw-r--r-- | lib/elixir/test/elixir/file_test.exs | 301 |
2 files changed, 187 insertions, 141 deletions
diff --git a/lib/elixir/lib/file/stream.ex b/lib/elixir/lib/file/stream.ex index f3e1c0758..08884ab4c 100644 --- a/lib/elixir/lib/file/stream.ex +++ b/lib/elixir/lib/file/stream.ex @@ -77,7 +77,7 @@ defmodule File.Stream do start_fun = fn -> case :file.open(path, read_modes(modes)) do {:ok, device} -> - if :trim_bom in modes, do: trim_bom(device), else: device + if :trim_bom in modes, do: trim_bom(device, raw) |> elem(0), else: device {:error, reason} -> raise File.Error, reason: reason, action: "stream", path: path @@ -106,19 +106,24 @@ defmodule File.Stream do end end - def count(%{path: path, line_or_bytes: bytes}) do + def count(%{path: path, line_or_bytes: bytes, raw: true, modes: modes}) do case File.stat(path) do {:ok, %{size: 0}} -> {:error, __MODULE__} {:ok, %{size: size}} -> - {:ok, div(size, bytes) + if(rem(size, bytes) == 0, do: 0, else: 1)} + remainder = if rem(size, bytes) == 0, do: 0, else: 1 + {:ok, div(size, bytes) + remainder - count_raw_bom(path, modes)} {:error, reason} -> raise File.Error, reason: reason, action: "stream", path: path end end + def count(_stream) do + {:error, __MODULE__} + end + def member?(_stream, _term) do {:error, __MODULE__} end @@ -127,10 +132,18 @@ defmodule File.Stream do {:error, __MODULE__} end - defp trim_bom(device) do - header = IO.binread(device, 4) - {:ok, _new_pos} = :file.position(device, bom_length(header)) - device + defp count_raw_bom(path, modes) do + if :trim_bom in modes do + File.open!(path, read_modes(modes), &(&1 |> trim_bom(true) |> elem(1))) + else + 0 + end + end + + defp trim_bom(device, raw) do + header = if raw, do: IO.binread(device, 4), else: IO.read(device, 1) + {:ok, new_pos} = :file.position(device, bom_length(header)) + {device, new_pos} end defp bom_length(<<239, 187, 191, _rest::binary>>), do: 3 diff --git a/lib/elixir/test/elixir/file_test.exs b/lib/elixir/test/elixir/file_test.exs index e08ab2537..8d9987f82 100644 --- a/lib/elixir/test/elixir/file_test.exs +++ b/lib/elixir/test/elixir/file_test.exs @@ -1418,180 +1418,213 @@ defmodule FileTest do end end - test "stream struct" do - src = fixture_path("file.txt") - stream = File.stream!(src) - assert %File.Stream{} = stream - assert stream.modes == [:raw, :read_ahead, :binary] - assert stream.raw - assert stream.line_or_bytes == :line - - stream = File.stream!(src, read_ahead: false) - assert %File.Stream{} = stream - assert stream.modes == [:raw, :binary] - assert stream.raw - - stream = File.stream!(src, read_ahead: 5000) - assert %File.Stream{} = stream - assert stream.modes == [:raw, {:read_ahead, 5000}, :binary] - assert stream.raw - - stream = File.stream!(src, [:utf8], 10) - assert %File.Stream{} = stream - assert stream.modes == [{:encoding, :utf8}, :binary] - refute stream.raw - assert stream.line_or_bytes == 10 - end - - test "stream count" do - src = fixture_path("file.txt") - stream = File.stream!(src) - assert Enum.count(stream) == 1 + describe "file stream" do + test "returns a struct" do + src = fixture_path("file.txt") + stream = File.stream!(src) + assert %File.Stream{} = stream + assert stream.modes == [:raw, :read_ahead, :binary] + assert stream.raw + assert stream.line_or_bytes == :line - stream = File.stream!(src, [:utf8]) - assert Enum.count(stream) == 1 + stream = File.stream!(src, read_ahead: false) + assert %File.Stream{} = stream + assert stream.modes == [:raw, :binary] + assert stream.raw - stream = File.stream!(src, [], 2) - assert Enum.count(stream) == 2 - end + stream = File.stream!(src, read_ahead: 5000) + assert %File.Stream{} = stream + assert stream.modes == [:raw, {:read_ahead, 5000}, :binary] + assert stream.raw - test "stream keeps BOM" do - src = fixture_path("utf8_bom.txt") + stream = File.stream!(src, [:utf8], 10) + assert %File.Stream{} = stream + assert stream.modes == [{:encoding, :utf8}, :binary] + refute stream.raw + assert stream.line_or_bytes == 10 + end - bom_line = - src - |> File.stream!() - |> Enum.take(1) + test "counts bytes/characters" do + src = fixture_path("file.txt") + stream = File.stream!(src) + assert Enum.count(stream) == 1 - assert [<<239, 187, 191>> <> "Русский\n"] == bom_line - end + stream = File.stream!(src, [:utf8]) + assert Enum.count(stream) == 1 - test "trim BOM via option" do - src = fixture_path("utf8_bom.txt") + stream = File.stream!(src, [], 2) + assert Enum.count(stream) == 2 + end - bom_line = - src - |> File.stream!([:trim_bom]) - |> Enum.take(1) + test "reads and writes lines" do + src = fixture_path("file.txt") + dest = tmp_path("tmp_test.txt") - assert ["Русский\n"] == bom_line - end + try do + stream = File.stream!(src) - test "stream line UTF-8" do - src = fixture_path("file.txt") - dest = tmp_path("tmp_test.txt") + File.open(dest, [:write], fn target -> + Enum.each(stream, fn line -> + IO.write(target, String.replace(line, "O", "A")) + end) + end) - try do - stream = File.stream!(src) + assert File.read(dest) == {:ok, "FAA\n"} + after + File.rm(dest) + end + end - File.open(dest, [:write, :utf8], fn target -> - Enum.each(stream, fn line -> - IO.write(target, String.replace(line, "O", "A")) + test "reads and writes bytes" do + src = fixture_path("file.txt") + dest = tmp_path("tmp_test.txt") + + try do + stream = File.stream!(src, [], 1) + + File.open(dest, [:write], fn target -> + Enum.each(stream, fn <<char>> -> + IO.write(target, <<char + 1>>) + end) end) - end) - assert File.read(dest) == {:ok, "FAA\n"} - after - File.rm(dest) + assert File.read(dest) == {:ok, "GPP\v"} + after + File.rm(dest) + end end - end - test "stream bytes UTF-8" do - src = fixture_path("file.txt") - dest = tmp_path("tmp_test.txt") + test "is collectable" do + src = fixture_path("file.txt") + dest = tmp_path("tmp_test.txt") - try do - stream = File.stream!(src, [:utf8], 1) + try do + refute File.exists?(dest) + original = File.stream!(dest) - File.open(dest, [:write], fn target -> - Enum.each(stream, fn line -> - IO.write(target, String.replace(line, "OO", "AA")) - end) - end) + stream = + File.stream!(src) + |> Stream.map(&String.replace(&1, "O", "A")) + |> Enum.into(original) - assert File.read(dest) == {:ok, "FOO\n"} - after - File.rm(dest) + assert stream == original + assert File.read(dest) == {:ok, "FAA\n"} + after + File.rm(dest) + end end - end - test "stream line" do - src = fixture_path("file.txt") - dest = tmp_path("tmp_test.txt") + test "is collectable with append" do + src = fixture_path("file.txt") + dest = tmp_path("tmp_test.txt") - try do - stream = File.stream!(src) + try do + refute File.exists?(dest) + original = File.stream!(dest, [:append]) - File.open(dest, [:write], fn target -> - Enum.each(stream, fn line -> - IO.write(target, String.replace(line, "O", "A")) - end) - end) + File.stream!(src, [:append]) + |> Stream.map(&String.replace(&1, "O", "A")) + |> Enum.into(original) - assert File.read(dest) == {:ok, "FAA\n"} - after - File.rm(dest) + File.stream!(src, [:append]) + |> Enum.into(original) + + assert File.read(dest) == {:ok, "FAA\nFOO\n"} + after + File.rm(dest) + end end - end - test "stream bytes" do - src = fixture_path("file.txt") - dest = tmp_path("tmp_test.txt") + test "keeps BOM when raw" do + src = fixture_path("utf8_bom.txt") - try do - stream = File.stream!(src, [], 1) + assert src + |> File.stream!([]) + |> Enum.take(1) == [<<239, 187, 191>> <> "Русский\n"] - File.open(dest, [:write], fn target -> - Enum.each(stream, fn line -> - IO.write(target, String.replace(line, "OO", "AA")) - end) - end) + assert src + |> File.stream!([], 1) + |> Enum.take(5) == [<<239>>, <<187>>, <<191>>, <<208>>, <<160>>] - assert File.read(dest) == {:ok, "FOO\n"} - after - File.rm(dest) + assert src |> File.stream!([]) |> Enum.count() == 2 + assert src |> File.stream!([], 1) |> Enum.count() == 22 end - end - test "stream into" do - src = fixture_path("file.txt") - dest = tmp_path("tmp_test.txt") + test "trims BOM via option when raw" do + src = fixture_path("utf8_bom.txt") - try do - refute File.exists?(dest) + assert src + |> File.stream!([:trim_bom]) + |> Enum.take(1) == ["Русский\n"] - original = File.stream!(dest) + assert src + |> File.stream!([:trim_bom], 1) + |> Enum.take(5) == [<<208>>, <<160>>, <<209>>, <<131>>, <<209>>] - stream = - File.stream!(src) - |> Stream.map(&String.replace(&1, "O", "A")) - |> Enum.into(original) + assert src |> File.stream!([:trim_bom]) |> Enum.count() == 2 + assert src |> File.stream!([:trim_bom], 1) |> Enum.count() == 19 + end - assert stream == original - assert File.read(dest) == {:ok, "FAA\n"} - after - File.rm(dest) + test "keeps BOM with utf8 encoding" do + src = fixture_path("utf8_bom.txt") + + assert src + |> File.stream!([:utf8]) + |> Enum.take(1) == [<<239, 187, 191>> <> "Русский\n"] + + assert src + |> File.stream!([:utf8], 1) + |> Enum.take(9) == ["\uFEFF", "Р", "у", "с", "с", "к", "и", "й", "\n"] end - end - test "stream into append" do - src = fixture_path("file.txt") - dest = tmp_path("tmp_test.txt") + test "trims BOM via option with utf8 encoding" do + src = fixture_path("utf8_bom.txt") - try do - refute File.exists?(dest) - original = File.stream!(dest, [:append]) + assert src + |> File.stream!([:utf8, :trim_bom]) + |> Enum.take(1) == ["Русский\n"] - File.stream!(src, [:append]) - |> Stream.map(&String.replace(&1, "O", "A")) - |> Enum.into(original) + assert src + |> File.stream!([:utf8, :trim_bom], 1) + |> Enum.take(8) == ["Р", "у", "с", "с", "к", "и", "й", "\n"] + end - File.stream!(src, [:append]) - |> Enum.into(original) + test "reads and writes line by line in UTF-8" do + src = fixture_path("file.txt") + dest = tmp_path("tmp_test.txt") - assert File.read(dest) == {:ok, "FAA\nFOO\n"} - after - File.rm(dest) + try do + stream = File.stream!(src) + + File.open(dest, [:write, :utf8], fn target -> + Enum.each(stream, fn line -> + IO.write(target, String.replace(line, "O", "A")) + end) + end) + + assert File.read(dest) == {:ok, "FAA\n"} + after + File.rm(dest) + end + end + + test "reads and writes character in UTF-8" do + src = fixture_path("file.txt") + dest = tmp_path("tmp_test.txt") + + try do + stream = File.stream!(src, [:utf8], 1) + + File.open(dest, [:write], fn target -> + Enum.each(stream, fn <<char::utf8>> -> + IO.write(target, <<char + 1::utf8>>) + end) + end) + + assert File.read(dest) == {:ok, "GPP\v"} + after + File.rm(dest) + end end end |