Sam Ford b7a4360433
#parse_curl_output: increase default max_iterations
The `max_iterations` value in `#parse_curl_output` is only intended
to prevent its `while` loop from potentially turning into an endless
loop. This should only come into play in exceptional circumstances
but the current default value (5) is low enough that we're seeing it
under normal circumstances.

`#parse_curl_output` isn't intended to restrict the number of
redirections (this should be done using the `--max-redirs` option in
`curl) but it's effectively doing this in rare cases due to the low
`max_iterations` default. This is a problem because `curl` can
successfully return a response only to have `#parse_curl_output`
error in relation to `max_iterations`.

Originally the code in `#parse_curl_output` was used in the context
of livecheck, where it's not a huge issue if a check fails. However,
now the `#parse_curl_output` method is used in important parts of
brew like `#curl_download`. We've received a report of a download
failing with the "Too many redirects (max = 5)` error, effectively
preventing the user from installing a formula [from a third-party
tap].

Until we can come up with a more adaptive way of bounding this
`while` loop, I think we should simply raise the default to something
that's less likely to be encountered under normal circumstances
(e.g., 25).
2022-04-26 15:49:02 -04:00

487 lines
17 KiB
Ruby

# typed: false
# frozen_string_literal: true
require "open3"
require "extend/time"
module Utils
# Helper function for interacting with `curl`.
#
# @api private
module Curl
extend T::Sig
using TimeRemaining
# This regex is used to extract the part of an ETag within quotation marks,
# ignoring any leading weak validator indicator (`W/`). This simplifies
# ETag comparison in `#curl_check_http_content`.
ETAG_VALUE_REGEX = %r{^(?:[wW]/)?"((?:[^"]|\\")*)"}.freeze
# HTTP responses and body content are typically separated by a double
# `CRLF` (whereas HTTP header lines are separated by a single `CRLF`).
# In rare cases, this can also be a double newline (`\n\n`).
HTTP_RESPONSE_BODY_SEPARATOR = "\r\n\r\n"
# This regex is used to isolate the parts of an HTTP status line, namely
# the status code and any following descriptive text (e.g., `Not Found`).
HTTP_STATUS_LINE_REGEX = %r{^HTTP/.* (?<code>\d+)(?: (?<text>[^\r\n]+))?}.freeze
private_constant :ETAG_VALUE_REGEX, :HTTP_RESPONSE_BODY_SEPARATOR, :HTTP_STATUS_LINE_REGEX
module_function
def curl_executable(use_homebrew_curl: false)
return Pathname.new(ENV["HOMEBREW_BREWED_CURL_PATH"]) if use_homebrew_curl
@curl_executable ||= HOMEBREW_SHIMS_PATH/"shared/curl"
end
def curl_path
@curl_path ||= Utils.popen_read(curl_executable, "--homebrew=print-path").chomp.presence
end
sig {
params(
extra_args: T.untyped,
connect_timeout: T.any(Integer, Float, NilClass),
max_time: T.any(Integer, Float, NilClass),
retries: T.nilable(Integer),
retry_max_time: T.any(Integer, Float, NilClass),
show_output: T.nilable(T::Boolean),
user_agent: T.any(String, Symbol, NilClass),
).returns(T::Array[T.untyped])
}
def curl_args(
*extra_args,
connect_timeout: nil,
max_time: nil,
retries: Homebrew::EnvConfig.curl_retries.to_i,
retry_max_time: nil,
show_output: false,
user_agent: nil
)
args = []
# do not load .curlrc unless requested (must be the first argument)
args << "--disable" unless Homebrew::EnvConfig.curlrc?
# echo any cookies received on a redirect
args << "--cookie" << "/dev/null"
args << "--globoff"
args << "--show-error"
args << "--user-agent" << case user_agent
when :browser, :fake
HOMEBREW_USER_AGENT_FAKE_SAFARI
when :default, nil
HOMEBREW_USER_AGENT_CURL
when String
user_agent
else
raise TypeError, ":user_agent must be :browser/:fake, :default, or a String"
end
args << "--header" << "Accept-Language: en"
unless show_output == true
args << "--fail"
args << "--progress-bar" unless Context.current.verbose?
args << "--verbose" if Homebrew::EnvConfig.curl_verbose?
args << "--silent" unless $stdout.tty?
end
args << "--connect-timeout" << connect_timeout.round(3) if connect_timeout.present?
args << "--max-time" << max_time.round(3) if max_time.present?
# A non-positive integer (e.g., 0) or `nil` will omit this argument
args << "--retry" << retries if retries&.positive?
args << "--retry-max-time" << retry_max_time.round if retry_max_time.present?
args + extra_args
end
def curl_with_workarounds(
*args,
secrets: nil, print_stdout: nil, print_stderr: nil, debug: nil,
verbose: nil, env: {}, timeout: nil, use_homebrew_curl: false, **options
)
end_time = Time.now + timeout if timeout
command_options = {
secrets: secrets,
print_stdout: print_stdout,
print_stderr: print_stderr,
debug: debug,
verbose: verbose,
}.compact
result = system_command curl_executable(use_homebrew_curl: use_homebrew_curl),
args: curl_args(*args, **options),
env: env,
timeout: end_time&.remaining,
**command_options
return result if result.success? || !args.exclude?("--http1.1")
raise Timeout::Error, result.stderr.lines.last.chomp if timeout && result.status.exitstatus == 28
# Error in the HTTP2 framing layer
if result.status.exitstatus == 16
return curl_with_workarounds(
*args, "--http1.1",
timeout: end_time&.remaining, **command_options, **options
)
end
# This is a workaround for https://github.com/curl/curl/issues/1618.
if result.status.exitstatus == 56 # Unexpected EOF
out = curl_output("-V").stdout
# If `curl` doesn't support HTTP2, the exception is unrelated to this bug.
return result unless out.include?("HTTP2")
# The bug is fixed in `curl` >= 7.60.0.
curl_version = out[/curl (\d+(\.\d+)+)/, 1]
return result if Gem::Version.new(curl_version) >= Gem::Version.new("7.60.0")
return curl_with_workarounds(*args, "--http1.1", **command_options, **options)
end
result
end
def curl(*args, print_stdout: true, **options)
result = curl_with_workarounds(*args, print_stdout: print_stdout, **options)
result.assert_success!
result
end
def curl_download(*args, to: nil, try_partial: false, **options)
destination = Pathname(to)
destination.dirname.mkpath
if try_partial
range_stdout = curl_output("--location", "--head", *args, **options).stdout
parsed_output = parse_curl_output(range_stdout)
headers = if parsed_output[:responses].present?
parsed_output[:responses].last[:headers]
else
{}
end
# Any value for `accept-ranges` other than none indicates that the server supports partial requests.
# Its absence indicates no support.
supports_partial = headers.key?("accept-ranges") && headers["accept-ranges"] != "none"
if supports_partial &&
destination.exist? &&
destination.size == headers["content-length"].to_i
return # We've already downloaded all the bytes
end
end
args = ["--location", "--remote-time", "--output", destination, *args]
# continue-at shouldn't be used with servers that don't support partial requests.
args = ["--continue-at", "-", *args] if destination.exist? && supports_partial
curl(*args, **options)
end
def curl_output(*args, **options)
curl_with_workarounds(*args, print_stderr: false, show_output: true, **options)
end
# Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
def url_protected_by_cloudflare?(details)
return false if details[:headers].blank?
return unless [403, 503].include?(details[:status].to_i)
details[:headers].fetch("set-cookie", nil)&.match?(/^(__cfduid|__cf_bm)=/i) &&
details[:headers].fetch("server", nil)&.match?(/^cloudflare/i)
end
# Check if a URL is protected by Incapsula (e.g. corsair.com).
def url_protected_by_incapsula?(details)
return false if details[:headers].blank?
return false if details[:status].to_i != 403
details[:headers].fetch("set-cookie", nil)&.match?(/^(visid_incap|incap_ses)_/i)
end
def curl_check_http_content(url, url_type, specs: {}, user_agents: [:default],
check_content: false, strict: false, use_homebrew_curl: false)
return unless url.start_with? "http"
secure_url = url.sub(/\Ahttp:/, "https:")
secure_details = nil
hash_needed = false
if url != secure_url
user_agents.each do |user_agent|
secure_details = begin
curl_http_content_headers_and_checksum(
secure_url,
specs: specs,
hash_needed: true,
use_homebrew_curl: use_homebrew_curl,
user_agent: user_agent,
)
rescue Timeout::Error
next
end
next unless http_status_ok?(secure_details[:status])
hash_needed = true
user_agents = [user_agent]
break
end
end
details = nil
user_agents.each do |user_agent|
details =
curl_http_content_headers_and_checksum(
url,
specs: specs,
hash_needed: hash_needed,
use_homebrew_curl: use_homebrew_curl,
user_agent: user_agent,
)
break if http_status_ok?(details[:status])
end
unless details[:status]
# Hack around https://github.com/Homebrew/brew/issues/3199
return if MacOS.version == :el_capitan
return "The #{url_type} #{url} is not reachable"
end
unless http_status_ok?(details[:status])
return if url_protected_by_cloudflare?(details) || url_protected_by_incapsula?(details)
return "The #{url_type} #{url} is not reachable (HTTP status code #{details[:status]})"
end
if url.start_with?("https://") && Homebrew::EnvConfig.no_insecure_redirect? &&
(details[:final_url].present? && !details[:final_url].start_with?("https://"))
return "The #{url_type} #{url} redirects back to HTTP"
end
return unless secure_details
return if !http_status_ok?(details[:status]) || !http_status_ok?(secure_details[:status])
etag_match = details[:etag] &&
details[:etag] == secure_details[:etag]
content_length_match =
details[:content_length] &&
details[:content_length] == secure_details[:content_length]
file_match = details[:file_hash] == secure_details[:file_hash]
http_with_https_available =
url.start_with?("http://") &&
(secure_details[:final_url].present? && secure_details[:final_url].start_with?("https://"))
if (etag_match || content_length_match || file_match) && http_with_https_available
return "The #{url_type} #{url} should use HTTPS rather than HTTP"
end
return unless check_content
no_protocol_file_contents = %r{https?:\\?/\\?/}
http_content = details[:file]&.gsub(no_protocol_file_contents, "/")
https_content = secure_details[:file]&.gsub(no_protocol_file_contents, "/")
# Check for the same content after removing all protocols
if (http_content && https_content) && (http_content == https_content) && http_with_https_available
return "The #{url_type} #{url} should use HTTPS rather than HTTP"
end
return unless strict
# Same size, different content after normalization
# (typical causes: Generated ID, Timestamp, Unix time)
if http_content.length == https_content.length
return "The #{url_type} #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser."
end
lenratio = (100 * https_content.length / http_content.length).to_i
return unless (90..110).cover?(lenratio)
"The #{url_type} #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser."
end
def curl_http_content_headers_and_checksum(
url, specs: {}, hash_needed: false,
use_homebrew_curl: false, user_agent: :default
)
file = Tempfile.new.tap(&:close)
# Convert specs to options. This is mostly key-value options,
# unless the value is a boolean in which case treat as as flag.
specs = specs.flat_map do |option, argument|
next [] if argument == false # No flag.
args = ["--#{option.to_s.tr("_", "-")}"]
args << argument unless argument == true # It's a flag.
args
end
max_time = hash_needed ? 600 : 25
output, _, status = curl_output(
*specs, "--dump-header", "-", "--output", file.path, "--location", url,
use_homebrew_curl: use_homebrew_curl,
connect_timeout: 15,
max_time: max_time,
retry_max_time: max_time,
user_agent: user_agent
)
parsed_output = parse_curl_output(output)
responses = parsed_output[:responses]
final_url = curl_response_last_location(responses)
headers = if responses.last.present?
status_code = responses.last[:status_code]
responses.last[:headers]
else
{}
end
etag = headers["etag"][ETAG_VALUE_REGEX, 1] if headers["etag"].present?
content_length = headers["content-length"]
if status.success?
file_contents = File.read(file.path)
file_hash = Digest::SHA2.hexdigest(file_contents) if hash_needed
end
{
url: url,
final_url: final_url,
status: status_code,
headers: headers,
etag: etag,
content_length: content_length,
file: file_contents,
file_hash: file_hash,
}
ensure
file.unlink
end
def curl_supports_tls13?
@curl_supports_tls13 ||= Hash.new do |h, key|
h[key] = quiet_system(curl_executable, "--tlsv1.3", "--head", "https://brew.sh/")
end
@curl_supports_tls13[ENV["HOMEBREW_CURL"]]
end
def http_status_ok?(status)
(100..299).cover?(status.to_i)
end
# Separates the output text from `curl` into an array of HTTP responses and
# the final response body (i.e. content). Response hashes contain the
# `:status_code`, `:status_text`, and `:headers`.
# @param output [String] The output text from `curl` containing HTTP
# responses, body content, or both.
# @param max_iterations [Integer] The maximum number of iterations for the
# `while` loop that parses HTTP response text. This should correspond to
# the maximum number of requests in the output. If `curl`'s `--max-redirs`
# option is used, `max_iterations` should be `max-redirs + 1`, to
# account for any final response after the redirections.
# @return [Hash] A hash containing an array of response hashes and the body
# content, if found.
sig { params(output: String, max_iterations: Integer).returns(T::Hash[Symbol, T.untyped]) }
def parse_curl_output(output, max_iterations: 25)
responses = []
iterations = 0
output = output.lstrip
while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_RESPONSE_BODY_SEPARATOR)
iterations += 1
raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations
response_text, _, output = output.partition(HTTP_RESPONSE_BODY_SEPARATOR)
output = output.lstrip
next if response_text.blank?
response_text.chomp!
response = parse_curl_response(response_text)
responses << response if response.present?
end
{ responses: responses, body: output }
end
# Returns the URL from the last location header found in cURL responses,
# if any.
# @param responses [Array<Hash>] An array of hashes containing response
# status information and headers from `#parse_curl_response`.
# @param absolutize [true, false] Whether to make the location URL absolute.
# @param base_url [String, nil] The URL to use as a base for making the
# `location` URL absolute.
# @return [String, nil] The URL from the last-occurring `location` header
# in the responses or `nil` (if no `location` headers found).
sig {
params(
responses: T::Array[T::Hash[Symbol, T.untyped]],
absolutize: T::Boolean,
base_url: T.nilable(String),
).returns(T.nilable(String))
}
def curl_response_last_location(responses, absolutize: false, base_url: nil)
responses.reverse_each do |response|
next if response[:headers].blank?
location = response[:headers]["location"]
next if location.blank?
absolute_url = URI.join(base_url, location).to_s if absolutize && base_url.present?
return absolute_url || location
end
nil
end
private
# Parses HTTP response text from `curl` output into a hash containing the
# information from the status line (status code and, optionally,
# descriptive text) and headers.
# @param response_text [String] The text of a `curl` response, consisting
# of a status line followed by header lines.
# @return [Hash] A hash containing the response status information and
# headers (as a hash with header names as keys).
sig { params(response_text: String).returns(T::Hash[Symbol, T.untyped]) }
def parse_curl_response(response_text)
response = {}
return response unless response_text.match?(HTTP_STATUS_LINE_REGEX)
# Parse the status line and remove it
match = response_text.match(HTTP_STATUS_LINE_REGEX)
response[:status_code] = match["code"] if match["code"].present?
response[:status_text] = match["text"] if match["text"].present?
response_text = response_text.sub(%r{^HTTP/.* (\d+).*$\s*}, "")
# Create a hash from the header lines
response[:headers] =
response_text.split("\r\n")
.to_h { |header| header.split(/:\s*/, 2) }
.transform_keys(&:downcase)
response
end
end
end
# FIXME: Include `Utils::Curl` explicitly everywhere it is used.
include Utils::Curl # rubocop:disable Style/MixinUsage