-
Notifications
You must be signed in to change notification settings - Fork 587
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Add option to decide what to do on invalid UTF-8 urlencoded params #1200
Changes from all commits
7793604
557df29
8e7570f
af345e3
2e1769d
a5127dd
5b740cd
ea00b54
ea3169b
9d7e331
264be80
8d50863
bfb4476
8753900
15d7043
ced8d9a
4265e2e
b8f33f2
b31ee07
ad3847a
d8f68dd
1f46f7a
5bc1b3a
00e6429
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -5,3 +5,4 @@ | |
/doc | ||
erl_crash.dump | ||
*.ez | ||
/.vscode |
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,4 +1,6 @@ | ||
defmodule Plug.Conn.Utils do | ||
require Logger | ||
|
||
@moduledoc """ | ||
Utilities for working with connection data | ||
""" | ||
|
@@ -11,6 +13,7 @@ defmodule Plug.Conn.Utils do | |
@other [?., ?-, ?+] | ||
@space [?\s, ?\t] | ||
@specials ~c|()<>@,;:\\"/[]?={}| | ||
@utf8_error_code Application.compile_env(:plug, :utf8_error_code, 500) | ||
|
||
@doc ~S""" | ||
Parses media types (with wildcards). | ||
|
@@ -278,24 +281,78 @@ defmodule Plug.Conn.Utils do | |
end | ||
|
||
@doc """ | ||
Validates the given binary is valid UTF-8. | ||
Validates if the given binary data is valid UTF-8. | ||
|
||
This function checks a binary string to determine if it is a valid UTF-8 encoded string. | ||
It operates based on the given `error_code`, which dictates the behavior when encountering invalid UTF-8 bytes. | ||
|
||
## Parameters | ||
|
||
- `binary`: The binary data to be checked. | ||
- `exception`: The exception module to be used for raising errors in case of invalid UTF-8. | ||
- `context`: A string providing context about the binary data being checked. | ||
- `error_code`: An integer that determines the behavior when invalid UTF-8 is encountered. | ||
- `500` will raise the specified `exception` (default). | ||
- `404` will return an error tuple. | ||
- Codes within `100..999` will log a warning. | ||
|
||
## Examples | ||
|
||
iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context") | ||
** (RuntimeError) invalid UTF-8 on test context, got byte 255 in position 0 | ||
|
||
iex> Plug.Conn.Utils.validate_utf8!("valid string", RuntimeError, "test context", 404) | ||
:ok | ||
|
||
iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context", 404) | ||
{:error, "invalid UTF-8 on test context, got byte 255 in position 0"} | ||
|
||
|
||
|
||
# Example with logging for invalid UTF-8 | ||
iex> Plug.Conn.Utils.validate_utf8!(<<255, "invalid">>, RuntimeError, "test context", 200) | ||
:ok | ||
# Logs "invalid UTF-8 on test context, got byte 255 in position 0" | ||
|
||
""" | ||
@spec validate_utf8!(binary, module, binary) :: :ok | no_return | ||
def validate_utf8!(binary, exception, context) | ||
@spec validate_utf8!(binary, module, binary, integer()) :: | ||
:ok | no_return | {:error, String.t()} | ||
def validate_utf8!(binary, exception, context, error_code \\ @utf8_error_code) | ||
|
||
def validate_utf8!(<<binary::binary>>, exception, context, error_code) do | ||
do_validate_utf8!(binary, exception, context, error_code, 0) | ||
end | ||
|
||
def validate_utf8!(<<binary::binary>>, exception, context) do | ||
do_validate_utf8!(binary, exception, context) | ||
defp do_validate_utf8!(<<a::56, rest::bits>>, exception, context, error_code, byte_position) | ||
when Bitwise.band(a, 0x80808080808080) == 0 do | ||
do_validate_utf8!(rest, exception, context, error_code, byte_position + 7) | ||
end | ||
|
||
defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context) do | ||
do_validate_utf8!(rest, exception, context) | ||
defp do_validate_utf8!(<<_::utf8, rest::bits>>, exception, context, error_code, byte_position) do | ||
do_validate_utf8!(rest, exception, context, error_code, byte_position + 1) | ||
end | ||
|
||
defp do_validate_utf8!(<<byte, _::bits>>, exception, context) do | ||
raise exception, "invalid UTF-8 on #{context}, got byte #{byte}" | ||
defp do_validate_utf8!(<<byte, _::bits>>, exception, context, error_code, byte_position) do | ||
case error_code do | ||
500 -> | ||
raise exception, | ||
"invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}" | ||
|
||
404 -> | ||
{:error, "invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}"} | ||
|
||
error_code when error_code in 100..999 -> | ||
:ok = | ||
Logger.warning( | ||
"invalid UTF-8 on #{context}, got byte #{byte} in position #{byte_position}", | ||
error: @utf8_error_code, | ||
context: context, | ||
byte: byte | ||
) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. It should always raise, and not behave differently based on the format. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. What information should be in the exception? I guess I'm not exactly clear on what exactly should change with different error codes. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. If you want a way to not raise, then we should create a |
||
end | ||
end | ||
|
||
defp do_validate_utf8!(<<>>, _exception, _context) do | ||
defp do_validate_utf8!(<<>>, _exception, _context, _error_code, _byte_position) do | ||
:ok | ||
end | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,83 @@ | ||
defmodule Plug.Conn.UtilsTest do | ||
use ExUnit.Case, async: true | ||
import ExUnit.CaptureLog | ||
|
||
import Plug.Conn.Utils | ||
alias Plug.Conn.Utils, as: Utils | ||
doctest Plug.Conn.Utils | ||
|
||
@exception RuntimeError | ||
@context "test context" | ||
@valid_utf8 "utm_campaign=summer+sale&foo=bar&utm_medium=email&utm_source=sendgrid.com&utm_term=utm_term&utm_content=utm_content&utm_id=utm_id" | ||
@invalid_utf8 <<"utm_campaign=summer+sale&foo=bar&utm_medium=email&utm_source=sen", 255>> | ||
|
||
setup_all do | ||
%{ | ||
exception: @exception, | ||
context: @context, | ||
valid_utf8: @valid_utf8, | ||
invalid_utf8: @invalid_utf8 | ||
} | ||
end | ||
|
||
describe "validate_utf8! with error_code 500" do | ||
setup context, do: Map.merge(context, %{error_code: 500}) | ||
|
||
test "raises an exception for invalid UTF-8 input", context do | ||
assert_raise context.exception, | ||
"invalid UTF-8 on #{context.context}, got byte 255 in position #{byte_size(@invalid_utf8) - 1}", | ||
fn -> | ||
Utils.validate_utf8!( | ||
context.invalid_utf8, | ||
context.exception, | ||
context.context, | ||
context.error_code | ||
) | ||
end | ||
end | ||
end | ||
|
||
describe "validate_utf8! with error_code 404" do | ||
setup context, do: Map.merge(context, %{error_code: 404}) | ||
|
||
test "returns {:error, message} for invalid UTF-8 w/ error code 404", | ||
context_map do | ||
%{context: context} = context_map | ||
|
||
error_tuple = | ||
{:error, | ||
"invalid UTF-8 on #{context}, got byte 255 in position #{byte_size(@invalid_utf8) - 1}"} | ||
|
||
assert error_tuple == | ||
Utils.validate_utf8!( | ||
context_map.invalid_utf8, | ||
context_map.exception, | ||
context_map.context, | ||
context_map.error_code | ||
) | ||
end | ||
end | ||
|
||
describe "validate_utf8! with error_code 401" do | ||
setup context, do: Map.merge(context, %{error_code: 401}) | ||
|
||
test "logs a detailed warning for invalid UTF-8 input in position #{byte_size(@invalid_utf8) - 1}", | ||
context do | ||
log = | ||
capture_log(fn -> | ||
assert :ok = | ||
Utils.validate_utf8!( | ||
context.invalid_utf8, | ||
context.exception, | ||
context.context, | ||
context.error_code | ||
) | ||
end) | ||
|
||
expected_log_regex = | ||
~r/^.*?invalid UTF-8 on test context, got byte 255 in position #{byte_size(@invalid_utf8) - 1}/i | ||
|
||
assert String.match?(log, expected_log_regex) | ||
end | ||
end | ||
end |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
ced8d9a