summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJosé Valim <jose.valim@plataformatec.com.br>2018-07-12 11:37:19 +0200
committerJosé Valim <jose.valim@plataformatec.com.br>2018-07-12 11:38:26 +0200
commit16b358cd540f61697e7e571cc771f82092c365ba (patch)
tree343880b2229cced1b52db3bdb1b430f8f48e800f
parent61d42c2a555417de2cfbccaaad0acd26b5fc1210 (diff)
downloadelixir-16b358cd540f61697e7e571cc771f82092c365ba.tar.gz
Do not send binread to streams with encoding, closes #7729
-rw-r--r--lib/elixir/lib/file/stream.ex27
-rw-r--r--lib/elixir/test/elixir/file_test.exs301
2 files changed, 187 insertions, 141 deletions
diff --git a/lib/elixir/lib/file/stream.ex b/lib/elixir/lib/file/stream.ex
index f3e1c0758..08884ab4c 100644
--- a/lib/elixir/lib/file/stream.ex
+++ b/lib/elixir/lib/file/stream.ex
@@ -77,7 +77,7 @@ defmodule File.Stream do
start_fun = fn ->
case :file.open(path, read_modes(modes)) do
{:ok, device} ->
- if :trim_bom in modes, do: trim_bom(device), else: device
+ if :trim_bom in modes, do: trim_bom(device, raw) |> elem(0), else: device
{:error, reason} ->
raise File.Error, reason: reason, action: "stream", path: path
@@ -106,19 +106,24 @@ defmodule File.Stream do
end
end
- def count(%{path: path, line_or_bytes: bytes}) do
+ def count(%{path: path, line_or_bytes: bytes, raw: true, modes: modes}) do
case File.stat(path) do
{:ok, %{size: 0}} ->
{:error, __MODULE__}
{:ok, %{size: size}} ->
- {:ok, div(size, bytes) + if(rem(size, bytes) == 0, do: 0, else: 1)}
+ remainder = if rem(size, bytes) == 0, do: 0, else: 1
+ {:ok, div(size, bytes) + remainder - count_raw_bom(path, modes)}
{:error, reason} ->
raise File.Error, reason: reason, action: "stream", path: path
end
end
+ def count(_stream) do
+ {:error, __MODULE__}
+ end
+
def member?(_stream, _term) do
{:error, __MODULE__}
end
@@ -127,10 +132,18 @@ defmodule File.Stream do
{:error, __MODULE__}
end
- defp trim_bom(device) do
- header = IO.binread(device, 4)
- {:ok, _new_pos} = :file.position(device, bom_length(header))
- device
+ defp count_raw_bom(path, modes) do
+ if :trim_bom in modes do
+ File.open!(path, read_modes(modes), &(&1 |> trim_bom(true) |> elem(1)))
+ else
+ 0
+ end
+ end
+
+ defp trim_bom(device, raw) do
+ header = if raw, do: IO.binread(device, 4), else: IO.read(device, 1)
+ {:ok, new_pos} = :file.position(device, bom_length(header))
+ {device, new_pos}
end
defp bom_length(<<239, 187, 191, _rest::binary>>), do: 3
diff --git a/lib/elixir/test/elixir/file_test.exs b/lib/elixir/test/elixir/file_test.exs
index e08ab2537..8d9987f82 100644
--- a/lib/elixir/test/elixir/file_test.exs
+++ b/lib/elixir/test/elixir/file_test.exs
@@ -1418,180 +1418,213 @@ defmodule FileTest do
end
end
- test "stream struct" do
- src = fixture_path("file.txt")
- stream = File.stream!(src)
- assert %File.Stream{} = stream
- assert stream.modes == [:raw, :read_ahead, :binary]
- assert stream.raw
- assert stream.line_or_bytes == :line
-
- stream = File.stream!(src, read_ahead: false)
- assert %File.Stream{} = stream
- assert stream.modes == [:raw, :binary]
- assert stream.raw
-
- stream = File.stream!(src, read_ahead: 5000)
- assert %File.Stream{} = stream
- assert stream.modes == [:raw, {:read_ahead, 5000}, :binary]
- assert stream.raw
-
- stream = File.stream!(src, [:utf8], 10)
- assert %File.Stream{} = stream
- assert stream.modes == [{:encoding, :utf8}, :binary]
- refute stream.raw
- assert stream.line_or_bytes == 10
- end
-
- test "stream count" do
- src = fixture_path("file.txt")
- stream = File.stream!(src)
- assert Enum.count(stream) == 1
+ describe "file stream" do
+ test "returns a struct" do
+ src = fixture_path("file.txt")
+ stream = File.stream!(src)
+ assert %File.Stream{} = stream
+ assert stream.modes == [:raw, :read_ahead, :binary]
+ assert stream.raw
+ assert stream.line_or_bytes == :line
- stream = File.stream!(src, [:utf8])
- assert Enum.count(stream) == 1
+ stream = File.stream!(src, read_ahead: false)
+ assert %File.Stream{} = stream
+ assert stream.modes == [:raw, :binary]
+ assert stream.raw
- stream = File.stream!(src, [], 2)
- assert Enum.count(stream) == 2
- end
+ stream = File.stream!(src, read_ahead: 5000)
+ assert %File.Stream{} = stream
+ assert stream.modes == [:raw, {:read_ahead, 5000}, :binary]
+ assert stream.raw
- test "stream keeps BOM" do
- src = fixture_path("utf8_bom.txt")
+ stream = File.stream!(src, [:utf8], 10)
+ assert %File.Stream{} = stream
+ assert stream.modes == [{:encoding, :utf8}, :binary]
+ refute stream.raw
+ assert stream.line_or_bytes == 10
+ end
- bom_line =
- src
- |> File.stream!()
- |> Enum.take(1)
+ test "counts bytes/characters" do
+ src = fixture_path("file.txt")
+ stream = File.stream!(src)
+ assert Enum.count(stream) == 1
- assert [<<239, 187, 191>> <> "Русский\n"] == bom_line
- end
+ stream = File.stream!(src, [:utf8])
+ assert Enum.count(stream) == 1
- test "trim BOM via option" do
- src = fixture_path("utf8_bom.txt")
+ stream = File.stream!(src, [], 2)
+ assert Enum.count(stream) == 2
+ end
- bom_line =
- src
- |> File.stream!([:trim_bom])
- |> Enum.take(1)
+ test "reads and writes lines" do
+ src = fixture_path("file.txt")
+ dest = tmp_path("tmp_test.txt")
- assert ["Русский\n"] == bom_line
- end
+ try do
+ stream = File.stream!(src)
- test "stream line UTF-8" do
- src = fixture_path("file.txt")
- dest = tmp_path("tmp_test.txt")
+ File.open(dest, [:write], fn target ->
+ Enum.each(stream, fn line ->
+ IO.write(target, String.replace(line, "O", "A"))
+ end)
+ end)
- try do
- stream = File.stream!(src)
+ assert File.read(dest) == {:ok, "FAA\n"}
+ after
+ File.rm(dest)
+ end
+ end
- File.open(dest, [:write, :utf8], fn target ->
- Enum.each(stream, fn line ->
- IO.write(target, String.replace(line, "O", "A"))
+ test "reads and writes bytes" do
+ src = fixture_path("file.txt")
+ dest = tmp_path("tmp_test.txt")
+
+ try do
+ stream = File.stream!(src, [], 1)
+
+ File.open(dest, [:write], fn target ->
+ Enum.each(stream, fn <<char>> ->
+ IO.write(target, <<char + 1>>)
+ end)
end)
- end)
- assert File.read(dest) == {:ok, "FAA\n"}
- after
- File.rm(dest)
+ assert File.read(dest) == {:ok, "GPP\v"}
+ after
+ File.rm(dest)
+ end
end
- end
- test "stream bytes UTF-8" do
- src = fixture_path("file.txt")
- dest = tmp_path("tmp_test.txt")
+ test "is collectable" do
+ src = fixture_path("file.txt")
+ dest = tmp_path("tmp_test.txt")
- try do
- stream = File.stream!(src, [:utf8], 1)
+ try do
+ refute File.exists?(dest)
+ original = File.stream!(dest)
- File.open(dest, [:write], fn target ->
- Enum.each(stream, fn line ->
- IO.write(target, String.replace(line, "OO", "AA"))
- end)
- end)
+ stream =
+ File.stream!(src)
+ |> Stream.map(&String.replace(&1, "O", "A"))
+ |> Enum.into(original)
- assert File.read(dest) == {:ok, "FOO\n"}
- after
- File.rm(dest)
+ assert stream == original
+ assert File.read(dest) == {:ok, "FAA\n"}
+ after
+ File.rm(dest)
+ end
end
- end
- test "stream line" do
- src = fixture_path("file.txt")
- dest = tmp_path("tmp_test.txt")
+ test "is collectable with append" do
+ src = fixture_path("file.txt")
+ dest = tmp_path("tmp_test.txt")
- try do
- stream = File.stream!(src)
+ try do
+ refute File.exists?(dest)
+ original = File.stream!(dest, [:append])
- File.open(dest, [:write], fn target ->
- Enum.each(stream, fn line ->
- IO.write(target, String.replace(line, "O", "A"))
- end)
- end)
+ File.stream!(src, [:append])
+ |> Stream.map(&String.replace(&1, "O", "A"))
+ |> Enum.into(original)
- assert File.read(dest) == {:ok, "FAA\n"}
- after
- File.rm(dest)
+ File.stream!(src, [:append])
+ |> Enum.into(original)
+
+ assert File.read(dest) == {:ok, "FAA\nFOO\n"}
+ after
+ File.rm(dest)
+ end
end
- end
- test "stream bytes" do
- src = fixture_path("file.txt")
- dest = tmp_path("tmp_test.txt")
+ test "keeps BOM when raw" do
+ src = fixture_path("utf8_bom.txt")
- try do
- stream = File.stream!(src, [], 1)
+ assert src
+ |> File.stream!([])
+ |> Enum.take(1) == [<<239, 187, 191>> <> "Русский\n"]
- File.open(dest, [:write], fn target ->
- Enum.each(stream, fn line ->
- IO.write(target, String.replace(line, "OO", "AA"))
- end)
- end)
+ assert src
+ |> File.stream!([], 1)
+ |> Enum.take(5) == [<<239>>, <<187>>, <<191>>, <<208>>, <<160>>]
- assert File.read(dest) == {:ok, "FOO\n"}
- after
- File.rm(dest)
+ assert src |> File.stream!([]) |> Enum.count() == 2
+ assert src |> File.stream!([], 1) |> Enum.count() == 22
end
- end
- test "stream into" do
- src = fixture_path("file.txt")
- dest = tmp_path("tmp_test.txt")
+ test "trims BOM via option when raw" do
+ src = fixture_path("utf8_bom.txt")
- try do
- refute File.exists?(dest)
+ assert src
+ |> File.stream!([:trim_bom])
+ |> Enum.take(1) == ["Русский\n"]
- original = File.stream!(dest)
+ assert src
+ |> File.stream!([:trim_bom], 1)
+ |> Enum.take(5) == [<<208>>, <<160>>, <<209>>, <<131>>, <<209>>]
- stream =
- File.stream!(src)
- |> Stream.map(&String.replace(&1, "O", "A"))
- |> Enum.into(original)
+ assert src |> File.stream!([:trim_bom]) |> Enum.count() == 2
+ assert src |> File.stream!([:trim_bom], 1) |> Enum.count() == 19
+ end
- assert stream == original
- assert File.read(dest) == {:ok, "FAA\n"}
- after
- File.rm(dest)
+ test "keeps BOM with utf8 encoding" do
+ src = fixture_path("utf8_bom.txt")
+
+ assert src
+ |> File.stream!([:utf8])
+ |> Enum.take(1) == [<<239, 187, 191>> <> "Русский\n"]
+
+ assert src
+ |> File.stream!([:utf8], 1)
+ |> Enum.take(9) == ["\uFEFF", "Р", "у", "с", "с", "к", "и", "й", "\n"]
end
- end
- test "stream into append" do
- src = fixture_path("file.txt")
- dest = tmp_path("tmp_test.txt")
+ test "trims BOM via option with utf8 encoding" do
+ src = fixture_path("utf8_bom.txt")
- try do
- refute File.exists?(dest)
- original = File.stream!(dest, [:append])
+ assert src
+ |> File.stream!([:utf8, :trim_bom])
+ |> Enum.take(1) == ["Русский\n"]
- File.stream!(src, [:append])
- |> Stream.map(&String.replace(&1, "O", "A"))
- |> Enum.into(original)
+ assert src
+ |> File.stream!([:utf8, :trim_bom], 1)
+ |> Enum.take(8) == ["Р", "у", "с", "с", "к", "и", "й", "\n"]
+ end
- File.stream!(src, [:append])
- |> Enum.into(original)
+ test "reads and writes line by line in UTF-8" do
+ src = fixture_path("file.txt")
+ dest = tmp_path("tmp_test.txt")
- assert File.read(dest) == {:ok, "FAA\nFOO\n"}
- after
- File.rm(dest)
+ try do
+ stream = File.stream!(src)
+
+ File.open(dest, [:write, :utf8], fn target ->
+ Enum.each(stream, fn line ->
+ IO.write(target, String.replace(line, "O", "A"))
+ end)
+ end)
+
+ assert File.read(dest) == {:ok, "FAA\n"}
+ after
+ File.rm(dest)
+ end
+ end
+
+ test "reads and writes character in UTF-8" do
+ src = fixture_path("file.txt")
+ dest = tmp_path("tmp_test.txt")
+
+ try do
+ stream = File.stream!(src, [:utf8], 1)
+
+ File.open(dest, [:write], fn target ->
+ Enum.each(stream, fn <<char::utf8>> ->
+ IO.write(target, <<char + 1::utf8>>)
+ end)
+ end)
+
+ assert File.read(dest) == {:ok, "GPP\v"}
+ after
+ File.rm(dest)
+ end
end
end