2023-03-09 21:10:44 -08:00
|
|
|
# typed: true
|
2019-04-19 15:38:03 +09:00
|
|
|
# frozen_string_literal: true
|
|
|
|
|
2016-07-12 19:46:29 +01:00
|
|
|
require "open3"
|
2016-06-03 13:05:18 +01:00
|
|
|
|
2021-03-24 10:55:33 +01:00
|
|
|
require "extend/time"
|
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
module Utils
|
|
|
|
# Helper function for interacting with `curl`.
|
|
|
|
#
|
|
|
|
# @api private
|
|
|
|
module Curl
|
2021-09-06 18:53:20 -04:00
|
|
|
extend T::Sig
|
|
|
|
|
2021-03-24 10:55:33 +01:00
|
|
|
using TimeRemaining
|
|
|
|
|
2021-03-17 13:22:39 -04:00
|
|
|
# This regex is used to extract the part of an ETag within quotation marks,
|
|
|
|
# ignoring any leading weak validator indicator (`W/`). This simplifies
|
|
|
|
# ETag comparison in `#curl_check_http_content`.
|
|
|
|
ETAG_VALUE_REGEX = %r{^(?:[wW]/)?"((?:[^"]|\\")*)"}.freeze
|
|
|
|
|
2021-04-26 10:13:12 -04:00
|
|
|
# HTTP responses and body content are typically separated by a double
|
|
|
|
# `CRLF` (whereas HTTP header lines are separated by a single `CRLF`).
|
|
|
|
# In rare cases, this can also be a double newline (`\n\n`).
|
|
|
|
HTTP_RESPONSE_BODY_SEPARATOR = "\r\n\r\n"
|
|
|
|
|
|
|
|
# This regex is used to isolate the parts of an HTTP status line, namely
|
|
|
|
# the status code and any following descriptive text (e.g., `Not Found`).
|
|
|
|
HTTP_STATUS_LINE_REGEX = %r{^HTTP/.* (?<code>\d+)(?: (?<text>[^\r\n]+))?}.freeze
|
|
|
|
|
2021-03-17 13:22:39 -04:00
|
|
|
private_constant :ETAG_VALUE_REGEX, :HTTP_RESPONSE_BODY_SEPARATOR, :HTTP_STATUS_LINE_REGEX
|
2021-04-26 10:13:12 -04:00
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
module_function
|
|
|
|
|
2021-07-26 12:39:25 +02:00
|
|
|
def curl_executable(use_homebrew_curl: false)
|
2022-05-30 04:25:24 +01:00
|
|
|
return HOMEBREW_BREWED_CURL_PATH if use_homebrew_curl
|
2021-07-26 12:39:25 +02:00
|
|
|
|
2021-10-03 21:47:17 +01:00
|
|
|
@curl_executable ||= HOMEBREW_SHIMS_PATH/"shared/curl"
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
2016-06-03 13:05:18 +01:00
|
|
|
|
2021-10-06 23:58:04 +08:00
|
|
|
def curl_path
|
|
|
|
@curl_path ||= Utils.popen_read(curl_executable, "--homebrew=print-path").chomp.presence
|
|
|
|
end
|
|
|
|
|
2022-05-30 04:10:25 +01:00
|
|
|
def clear_path_cache
|
|
|
|
@curl_path = nil
|
|
|
|
end
|
|
|
|
|
2021-09-06 18:53:20 -04:00
|
|
|
sig {
|
|
|
|
params(
|
|
|
|
extra_args: T.untyped,
|
|
|
|
connect_timeout: T.any(Integer, Float, NilClass),
|
|
|
|
max_time: T.any(Integer, Float, NilClass),
|
|
|
|
retries: T.nilable(Integer),
|
|
|
|
retry_max_time: T.any(Integer, Float, NilClass),
|
|
|
|
show_output: T.nilable(T::Boolean),
|
2023-02-04 12:04:37 +00:00
|
|
|
show_error: T.nilable(T::Boolean),
|
2021-09-06 18:53:20 -04:00
|
|
|
user_agent: T.any(String, Symbol, NilClass),
|
2023-03-16 00:31:47 +00:00
|
|
|
referer: T.nilable(String),
|
2021-09-06 18:53:20 -04:00
|
|
|
).returns(T::Array[T.untyped])
|
|
|
|
}
|
|
|
|
def curl_args(
|
|
|
|
*extra_args,
|
|
|
|
connect_timeout: nil,
|
|
|
|
max_time: nil,
|
|
|
|
retries: Homebrew::EnvConfig.curl_retries.to_i,
|
|
|
|
retry_max_time: nil,
|
|
|
|
show_output: false,
|
2023-02-04 12:04:37 +00:00
|
|
|
show_error: true,
|
2023-03-16 00:31:47 +00:00
|
|
|
user_agent: nil,
|
|
|
|
referer: nil
|
2021-09-06 18:53:20 -04:00
|
|
|
)
|
2020-10-10 15:23:03 +02:00
|
|
|
args = []
|
2018-04-08 15:51:58 -07:00
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
# do not load .curlrc unless requested (must be the first argument)
|
|
|
|
args << "--disable" unless Homebrew::EnvConfig.curlrc?
|
2018-04-08 15:51:58 -07:00
|
|
|
|
2021-07-29 08:58:12 -07:00
|
|
|
# echo any cookies received on a redirect
|
2021-10-21 22:52:18 +01:00
|
|
|
args << "--cookie" << "/dev/null"
|
2021-07-29 08:58:12 -07:00
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
args << "--globoff"
|
2019-10-01 08:38:44 +02:00
|
|
|
|
2023-02-04 12:04:37 +00:00
|
|
|
args << "--show-error" if show_error
|
2016-06-03 13:05:18 +01:00
|
|
|
|
2021-09-06 18:53:20 -04:00
|
|
|
args << "--user-agent" << case user_agent
|
2020-10-10 15:23:03 +02:00
|
|
|
when :browser, :fake
|
|
|
|
HOMEBREW_USER_AGENT_FAKE_SAFARI
|
2020-12-19 17:56:25 -05:00
|
|
|
when :default, nil
|
2020-10-10 15:23:03 +02:00
|
|
|
HOMEBREW_USER_AGENT_CURL
|
2020-12-19 17:56:25 -05:00
|
|
|
when String
|
2021-09-06 18:53:20 -04:00
|
|
|
user_agent
|
|
|
|
else
|
|
|
|
raise TypeError, ":user_agent must be :browser/:fake, :default, or a String"
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
2016-12-25 23:01:40 +00:00
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
args << "--header" << "Accept-Language: en"
|
2020-09-15 18:51:37 +02:00
|
|
|
|
2021-09-06 18:53:20 -04:00
|
|
|
unless show_output == true
|
2020-10-10 15:23:03 +02:00
|
|
|
args << "--fail"
|
|
|
|
args << "--progress-bar" unless Context.current.verbose?
|
|
|
|
args << "--verbose" if Homebrew::EnvConfig.curl_verbose?
|
|
|
|
args << "--silent" unless $stdout.tty?
|
|
|
|
end
|
2016-12-25 23:01:40 +00:00
|
|
|
|
2021-09-06 18:53:20 -04:00
|
|
|
args << "--connect-timeout" << connect_timeout.round(3) if connect_timeout.present?
|
|
|
|
args << "--max-time" << max_time.round(3) if max_time.present?
|
|
|
|
|
|
|
|
# A non-positive integer (e.g., 0) or `nil` will omit this argument
|
|
|
|
args << "--retry" << retries if retries&.positive?
|
|
|
|
|
|
|
|
args << "--retry-max-time" << retry_max_time.round if retry_max_time.present?
|
2019-05-17 10:14:54 +01:00
|
|
|
|
2023-03-16 00:31:47 +00:00
|
|
|
args << "--referer" << referer if referer.present?
|
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
args + extra_args
|
2020-09-05 07:41:56 +02:00
|
|
|
end
|
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
def curl_with_workarounds(
|
2021-03-24 10:55:33 +01:00
|
|
|
*args,
|
2021-07-26 12:39:25 +02:00
|
|
|
secrets: nil, print_stdout: nil, print_stderr: nil, debug: nil,
|
|
|
|
verbose: nil, env: {}, timeout: nil, use_homebrew_curl: false, **options
|
2020-10-10 15:23:03 +02:00
|
|
|
)
|
2021-03-24 10:55:33 +01:00
|
|
|
end_time = Time.now + timeout if timeout
|
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
command_options = {
|
|
|
|
secrets: secrets,
|
|
|
|
print_stdout: print_stdout,
|
|
|
|
print_stderr: print_stderr,
|
2020-12-19 11:53:19 -05:00
|
|
|
debug: debug,
|
2020-10-10 15:23:03 +02:00
|
|
|
verbose: verbose,
|
|
|
|
}.compact
|
|
|
|
|
2021-07-26 12:39:25 +02:00
|
|
|
result = system_command curl_executable(use_homebrew_curl: use_homebrew_curl),
|
2021-03-24 10:55:33 +01:00
|
|
|
args: curl_args(*args, **options),
|
2021-10-01 15:06:04 +01:00
|
|
|
env: env,
|
2021-03-24 10:55:33 +01:00
|
|
|
timeout: end_time&.remaining,
|
2020-10-10 15:23:03 +02:00
|
|
|
**command_options
|
|
|
|
|
2023-03-01 09:13:42 -08:00
|
|
|
return result if result.success? || args.include?("--http1.1")
|
2020-10-10 15:23:03 +02:00
|
|
|
|
2021-03-24 10:55:38 +01:00
|
|
|
raise Timeout::Error, result.stderr.lines.last.chomp if timeout && result.status.exitstatus == 28
|
2021-03-24 10:55:33 +01:00
|
|
|
|
2021-02-03 09:54:09 +09:00
|
|
|
# Error in the HTTP2 framing layer
|
2021-03-24 10:55:33 +01:00
|
|
|
if result.status.exitstatus == 16
|
|
|
|
return curl_with_workarounds(
|
|
|
|
*args, "--http1.1",
|
|
|
|
timeout: end_time&.remaining, **command_options, **options
|
|
|
|
)
|
|
|
|
end
|
2020-10-10 15:23:03 +02:00
|
|
|
|
2021-02-03 09:54:09 +09:00
|
|
|
# This is a workaround for https://github.com/curl/curl/issues/1618.
|
|
|
|
if result.status.exitstatus == 56 # Unexpected EOF
|
|
|
|
out = curl_output("-V").stdout
|
2020-10-10 15:23:03 +02:00
|
|
|
|
2021-02-03 09:54:09 +09:00
|
|
|
# If `curl` doesn't support HTTP2, the exception is unrelated to this bug.
|
|
|
|
return result unless out.include?("HTTP2")
|
2020-10-10 15:23:03 +02:00
|
|
|
|
2021-02-03 09:54:09 +09:00
|
|
|
# The bug is fixed in `curl` >= 7.60.0.
|
|
|
|
curl_version = out[/curl (\d+(\.\d+)+)/, 1]
|
|
|
|
return result if Gem::Version.new(curl_version) >= Gem::Version.new("7.60.0")
|
|
|
|
|
|
|
|
return curl_with_workarounds(*args, "--http1.1", **command_options, **options)
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
result
|
2020-09-05 07:41:56 +02:00
|
|
|
end
|
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
def curl(*args, print_stdout: true, **options)
|
|
|
|
result = curl_with_workarounds(*args, print_stdout: print_stdout, **options)
|
|
|
|
result.assert_success!
|
|
|
|
result
|
2020-07-02 18:58:32 +01:00
|
|
|
end
|
2018-09-17 02:45:00 +02:00
|
|
|
|
#curl_download: default try_partial to false
When its `try_partial` argument is `true`, `#curl_download` makes a
`HEAD` request before downloading the file using `#curl`. Currently
`try_partial` defaults to `true`, so any `#curl_download` call that
doesn't explicitly specify `try_partial: false` will make a `HEAD`
request first. This can potentially involve several requests if the
URL redirects, so it can be a bit of unnecessary overhead when a
partial download isn't needed.
Partial downloads are generally only useful when we're working with
larger files, however there's currently only one place in brew where
`#curl_download` is used and this is the case:
`CurlDownloadStrategy`. The other `#curl_download` calls are fetching
smaller [text] files and don't need to support partial downloads.
This commit changes the default `try_partial` value to `false`,
making partial downloads opt-in rather than opt-out.
We want `try_partial` to continue to default to `true` in
`CurlDownloadStrategy` and there are various ways to accomplish this.
In this commit, I've chosen to update its `#initialize` method to
accept a `try_partial` argument that defaults to `true`, as this
value can also be used in classes that inherit from
`CurlDownloadStrategy` (e.g., `HomebrewCurlDownloadStrategy`). This
instance variable is passed to `#curl_download` in related methods,
effectively maintaining the previous `try_partial: true` value, while
also allowing this value to be overridden when necessary.
Other uses of `#curl_download` in brew are
`Formulary::FromUrlLoader#load_file` and
`Cask::CaskLoader::FromURILoader#load`, which did not provide a
`try_partial` argument but should have been using
`try_partial: false`. With the `try_partial: false` default in this
commit, these calls are now fine without a `try_partial` argument.
The only other use of `#curl_download` in brew is
`SPDX#download_latest_license_data!`. These calls were previously
using `try_partial: false` but we can now omit this argument with
the new `false` default (aligning with the above).
2022-04-22 12:05:14 -04:00
|
|
|
def curl_download(*args, to: nil, try_partial: false, **options)
|
2020-10-10 15:23:03 +02:00
|
|
|
destination = Pathname(to)
|
|
|
|
destination.dirname.mkpath
|
|
|
|
|
2021-05-13 11:39:59 -04:00
|
|
|
if try_partial
|
2021-05-14 09:50:57 -04:00
|
|
|
range_stdout = curl_output("--location", "--head", *args, **options).stdout
|
2021-03-17 13:22:39 -04:00
|
|
|
parsed_output = parse_curl_output(range_stdout)
|
|
|
|
|
|
|
|
headers = if parsed_output[:responses].present?
|
|
|
|
parsed_output[:responses].last[:headers]
|
|
|
|
else
|
|
|
|
{}
|
|
|
|
end
|
2020-10-10 15:23:03 +02:00
|
|
|
|
2021-05-13 12:27:54 -04:00
|
|
|
# Any value for `accept-ranges` other than none indicates that the server supports partial requests.
|
2021-05-13 12:11:34 -04:00
|
|
|
# Its absence indicates no support.
|
2021-05-14 15:28:56 -04:00
|
|
|
supports_partial = headers.key?("accept-ranges") && headers["accept-ranges"] != "none"
|
2021-05-13 12:11:34 -04:00
|
|
|
|
|
|
|
if supports_partial &&
|
2020-10-10 15:23:03 +02:00
|
|
|
destination.exist? &&
|
2021-05-13 12:11:34 -04:00
|
|
|
destination.size == headers["content-length"].to_i
|
2020-10-10 15:23:03 +02:00
|
|
|
return # We've already downloaded all the bytes
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2021-05-14 15:14:56 -04:00
|
|
|
args = ["--location", "--remote-time", "--output", destination, *args]
|
2021-05-13 12:27:54 -04:00
|
|
|
# continue-at shouldn't be used with servers that don't support partial requests.
|
2021-05-14 15:14:56 -04:00
|
|
|
args = ["--continue-at", "-", *args] if destination.exist? && supports_partial
|
2021-05-13 12:27:54 -04:00
|
|
|
|
|
|
|
curl(*args, **options)
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
2017-12-03 14:02:55 +01:00
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
def curl_output(*args, **options)
|
|
|
|
curl_with_workarounds(*args, print_stderr: false, show_output: true, **options)
|
|
|
|
end
|
2017-12-03 14:02:55 +01:00
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
# Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
|
2022-05-25 13:45:31 -04:00
|
|
|
# @param response [Hash] A response hash from `#parse_curl_response`.
|
2022-05-02 15:56:39 -04:00
|
|
|
# @return [true, false] Whether a response contains headers indicating that
|
|
|
|
# the URL is protected by Cloudflare.
|
2022-05-25 13:45:31 -04:00
|
|
|
sig { params(response: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
|
|
|
|
def url_protected_by_cloudflare?(response)
|
|
|
|
return false if response[:headers].blank?
|
|
|
|
return false unless [403, 503].include?(response[:status_code].to_i)
|
2021-03-17 13:22:39 -04:00
|
|
|
|
2022-05-25 13:45:31 -04:00
|
|
|
set_cookie_header = Array(response[:headers]["set-cookie"])
|
2022-05-02 15:56:39 -04:00
|
|
|
has_cloudflare_cookie_header = set_cookie_header.compact.any? do |cookie|
|
|
|
|
cookie.match?(/^(__cfduid|__cf_bm)=/i)
|
|
|
|
end
|
|
|
|
|
2022-05-25 13:45:31 -04:00
|
|
|
server_header = Array(response[:headers]["server"])
|
2022-05-02 15:56:39 -04:00
|
|
|
has_cloudflare_server = server_header.compact.any? do |server|
|
|
|
|
server.match?(/^cloudflare/i)
|
|
|
|
end
|
|
|
|
|
|
|
|
has_cloudflare_cookie_header && has_cloudflare_server
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
2017-12-03 14:02:55 +01:00
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
# Check if a URL is protected by Incapsula (e.g. corsair.com).
|
2022-05-25 13:45:31 -04:00
|
|
|
# @param response [Hash] A response hash from `#parse_curl_response`.
|
2022-05-02 15:56:39 -04:00
|
|
|
# @return [true, false] Whether a response contains headers indicating that
|
|
|
|
# the URL is protected by Incapsula.
|
2022-05-25 13:45:31 -04:00
|
|
|
sig { params(response: T::Hash[Symbol, T.untyped]).returns(T::Boolean) }
|
|
|
|
def url_protected_by_incapsula?(response)
|
|
|
|
return false if response[:headers].blank?
|
|
|
|
return false if response[:status_code].to_i != 403
|
2021-03-17 13:22:39 -04:00
|
|
|
|
2022-05-25 13:45:31 -04:00
|
|
|
set_cookie_header = Array(response[:headers]["set-cookie"])
|
2022-05-02 15:56:39 -04:00
|
|
|
set_cookie_header.compact.any? { |cookie| cookie.match?(/^(visid_incap|incap_ses)_/i) }
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
2017-12-03 14:02:55 +01:00
|
|
|
|
2023-03-16 00:31:47 +00:00
|
|
|
def curl_check_http_content(url, url_type, specs: {}, user_agents: [:default], referer: nil,
|
2021-07-26 12:39:25 +02:00
|
|
|
check_content: false, strict: false, use_homebrew_curl: false)
|
2020-10-10 15:23:03 +02:00
|
|
|
return unless url.start_with? "http"
|
|
|
|
|
2021-01-02 11:00:15 +01:00
|
|
|
secure_url = url.sub(/\Ahttp:/, "https:")
|
2023-03-09 21:10:44 -08:00
|
|
|
secure_details = T.let(nil, T.nilable(T::Hash[Symbol, T.untyped]))
|
|
|
|
hash_needed = T.let(false, T::Boolean)
|
2021-01-02 11:00:15 +01:00
|
|
|
if url != secure_url
|
|
|
|
user_agents.each do |user_agent|
|
2021-03-24 10:55:38 +01:00
|
|
|
secure_details = begin
|
2021-09-06 22:56:25 -04:00
|
|
|
curl_http_content_headers_and_checksum(
|
|
|
|
secure_url,
|
|
|
|
specs: specs,
|
|
|
|
hash_needed: true,
|
|
|
|
use_homebrew_curl: use_homebrew_curl,
|
|
|
|
user_agent: user_agent,
|
2023-03-16 00:31:47 +00:00
|
|
|
referer: referer,
|
2021-09-06 22:56:25 -04:00
|
|
|
)
|
2021-03-24 10:55:38 +01:00
|
|
|
rescue Timeout::Error
|
|
|
|
next
|
|
|
|
end
|
2021-01-02 11:00:15 +01:00
|
|
|
|
2022-05-25 13:31:54 -04:00
|
|
|
next unless http_status_ok?(secure_details[:status_code])
|
2021-01-02 11:00:15 +01:00
|
|
|
|
|
|
|
hash_needed = true
|
|
|
|
user_agents = [user_agent]
|
|
|
|
break
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
2023-03-09 21:10:44 -08:00
|
|
|
details = T.let(nil, T.nilable(T::Hash[Symbol, T.untyped]))
|
2021-01-02 11:00:15 +01:00
|
|
|
user_agents.each do |user_agent|
|
2021-01-23 17:26:51 -05:00
|
|
|
details =
|
2021-09-06 22:56:25 -04:00
|
|
|
curl_http_content_headers_and_checksum(
|
|
|
|
url,
|
|
|
|
specs: specs,
|
|
|
|
hash_needed: hash_needed,
|
|
|
|
use_homebrew_curl: use_homebrew_curl,
|
|
|
|
user_agent: user_agent,
|
2023-03-16 00:31:47 +00:00
|
|
|
referer: referer,
|
2021-09-06 22:56:25 -04:00
|
|
|
)
|
2022-05-25 13:31:54 -04:00
|
|
|
break if http_status_ok?(details[:status_code])
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
|
|
|
|
2022-05-25 13:31:54 -04:00
|
|
|
unless details[:status_code]
|
2020-10-10 15:23:03 +02:00
|
|
|
# Hack around https://github.com/Homebrew/brew/issues/3199
|
|
|
|
return if MacOS.version == :el_capitan
|
|
|
|
|
2021-04-20 21:20:01 -04:00
|
|
|
return "The #{url_type} #{url} is not reachable"
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
|
|
|
|
2022-05-25 13:31:54 -04:00
|
|
|
unless http_status_ok?(details[:status_code])
|
2022-05-25 13:45:31 -04:00
|
|
|
return if details[:responses].any? do |response|
|
|
|
|
url_protected_by_cloudflare?(response) || url_protected_by_incapsula?(response)
|
|
|
|
end
|
2020-10-10 15:23:03 +02:00
|
|
|
|
2022-09-18 23:29:15 -05:00
|
|
|
# https://github.com/Homebrew/brew/issues/13789
|
|
|
|
# If the `:homepage` of a formula is private, it will fail an `audit`
|
|
|
|
# since there's no way to specify a `strategy` with `using:` and
|
2022-09-21 07:35:42 -05:00
|
|
|
# GitHub does not authorize access to the web UI using token
|
2022-09-18 23:29:15 -05:00
|
|
|
#
|
|
|
|
# Strategy:
|
2022-09-21 07:35:42 -05:00
|
|
|
# If the `:homepage` 404s, it's a GitHub link, and we have a token then
|
|
|
|
# check the API (which does use tokens) for the repository
|
2022-09-18 23:29:15 -05:00
|
|
|
repo_details = url.match(%r{https?://github\.com/(?<user>[^/]+)/(?<repo>[^/]+)/?.*})
|
|
|
|
check_github_api = url_type == SharedAudits::URL_TYPE_HOMEPAGE &&
|
|
|
|
details[:status_code] == "404" &&
|
|
|
|
repo_details &&
|
|
|
|
Homebrew::EnvConfig.github_api_token
|
|
|
|
|
|
|
|
unless check_github_api
|
|
|
|
return "The #{url_type} #{url} is not reachable (HTTP status code #{details[:status_code]})"
|
|
|
|
end
|
|
|
|
|
|
|
|
"Unable to find homepage" if SharedAudits.github_repo_data(repo_details[:user], repo_details[:repo]).nil?
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
if url.start_with?("https://") && Homebrew::EnvConfig.no_insecure_redirect? &&
|
2021-03-17 13:22:39 -04:00
|
|
|
(details[:final_url].present? && !details[:final_url].start_with?("https://"))
|
2021-04-20 21:20:01 -04:00
|
|
|
return "The #{url_type} #{url} redirects back to HTTP"
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
|
|
|
|
2021-01-02 11:00:15 +01:00
|
|
|
return unless secure_details
|
2020-10-10 15:23:03 +02:00
|
|
|
|
2022-05-25 13:31:54 -04:00
|
|
|
return if !http_status_ok?(details[:status_code]) || !http_status_ok?(secure_details[:status_code])
|
2020-10-10 15:23:03 +02:00
|
|
|
|
|
|
|
etag_match = details[:etag] &&
|
|
|
|
details[:etag] == secure_details[:etag]
|
|
|
|
content_length_match =
|
|
|
|
details[:content_length] &&
|
|
|
|
details[:content_length] == secure_details[:content_length]
|
|
|
|
file_match = details[:file_hash] == secure_details[:file_hash]
|
|
|
|
|
2021-03-17 13:22:39 -04:00
|
|
|
http_with_https_available =
|
|
|
|
url.start_with?("http://") &&
|
|
|
|
(secure_details[:final_url].present? && secure_details[:final_url].start_with?("https://"))
|
|
|
|
|
|
|
|
if (etag_match || content_length_match || file_match) && http_with_https_available
|
2021-04-20 21:20:01 -04:00
|
|
|
return "The #{url_type} #{url} should use HTTPS rather than HTTP"
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
return unless check_content
|
|
|
|
|
|
|
|
no_protocol_file_contents = %r{https?:\\?/\\?/}
|
2022-05-04 00:13:56 +01:00
|
|
|
http_content = details[:file]&.scrub&.gsub(no_protocol_file_contents, "/")
|
|
|
|
https_content = secure_details[:file]&.scrub&.gsub(no_protocol_file_contents, "/")
|
2020-10-10 15:23:03 +02:00
|
|
|
|
|
|
|
# Check for the same content after removing all protocols
|
2021-03-17 13:22:39 -04:00
|
|
|
if (http_content && https_content) && (http_content == https_content) && http_with_https_available
|
2021-04-20 21:20:01 -04:00
|
|
|
return "The #{url_type} #{url} should use HTTPS rather than HTTP"
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
|
|
|
|
|
|
|
return unless strict
|
|
|
|
|
|
|
|
# Same size, different content after normalization
|
|
|
|
# (typical causes: Generated ID, Timestamp, Unix time)
|
2021-01-02 11:00:15 +01:00
|
|
|
if http_content.length == https_content.length
|
2021-04-20 21:20:01 -04:00
|
|
|
return "The #{url_type} #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser."
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
|
|
|
|
2023-01-02 19:18:51 +00:00
|
|
|
lenratio = (https_content.length * 100 / http_content.length).to_i
|
2020-10-10 15:23:03 +02:00
|
|
|
return unless (90..110).cover?(lenratio)
|
|
|
|
|
2021-04-20 21:20:01 -04:00
|
|
|
"The #{url_type} #{url} may be able to use HTTPS rather than HTTP. Please verify it in a browser."
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
2018-09-17 02:45:00 +02:00
|
|
|
|
2021-09-06 22:56:25 -04:00
|
|
|
def curl_http_content_headers_and_checksum(
|
|
|
|
url, specs: {}, hash_needed: false,
|
2023-03-16 00:31:47 +00:00
|
|
|
use_homebrew_curl: false, user_agent: :default, referer: nil
|
2021-09-06 22:56:25 -04:00
|
|
|
)
|
2020-10-10 15:23:03 +02:00
|
|
|
file = Tempfile.new.tap(&:close)
|
|
|
|
|
2021-10-05 13:24:29 +01:00
|
|
|
# Convert specs to options. This is mostly key-value options,
|
|
|
|
# unless the value is a boolean in which case treat as as flag.
|
2021-10-04 17:42:19 +01:00
|
|
|
specs = specs.flat_map do |option, argument|
|
2021-10-04 18:17:00 +01:00
|
|
|
next [] if argument == false # No flag.
|
2021-10-04 17:42:19 +01:00
|
|
|
|
|
|
|
args = ["--#{option.to_s.tr("_", "-")}"]
|
|
|
|
args << argument unless argument == true # It's a flag.
|
|
|
|
args
|
|
|
|
end
|
2021-10-05 13:24:29 +01:00
|
|
|
|
2021-09-06 22:56:25 -04:00
|
|
|
max_time = hash_needed ? 600 : 25
|
2021-01-02 11:00:15 +01:00
|
|
|
output, _, status = curl_output(
|
2021-09-06 22:56:25 -04:00
|
|
|
*specs, "--dump-header", "-", "--output", file.path, "--location", url,
|
|
|
|
use_homebrew_curl: use_homebrew_curl,
|
|
|
|
connect_timeout: 15,
|
|
|
|
max_time: max_time,
|
|
|
|
retry_max_time: max_time,
|
2023-03-16 00:31:47 +00:00
|
|
|
user_agent: user_agent,
|
|
|
|
referer: referer
|
2020-10-10 15:23:03 +02:00
|
|
|
)
|
|
|
|
|
2022-04-26 01:21:45 -04:00
|
|
|
parsed_output = parse_curl_output(output)
|
|
|
|
responses = parsed_output[:responses]
|
2021-03-17 13:22:39 -04:00
|
|
|
|
2022-04-26 01:21:45 -04:00
|
|
|
final_url = curl_response_last_location(responses)
|
|
|
|
headers = if responses.last.present?
|
|
|
|
status_code = responses.last[:status_code]
|
|
|
|
responses.last[:headers]
|
|
|
|
else
|
|
|
|
{}
|
|
|
|
end
|
|
|
|
etag = headers["etag"][ETAG_VALUE_REGEX, 1] if headers["etag"].present?
|
|
|
|
content_length = headers["content-length"]
|
2021-03-17 13:22:39 -04:00
|
|
|
|
2022-04-26 01:21:45 -04:00
|
|
|
if status.success?
|
2022-05-04 00:13:56 +01:00
|
|
|
open_args = {}
|
|
|
|
# Try to get encoding from Content-Type header
|
|
|
|
# TODO: add guessing encoding by <meta http-equiv="Content-Type" ...> tag
|
|
|
|
if (content_type = headers["content-type"]) &&
|
|
|
|
(match = content_type.match(/;\s*charset\s*=\s*([^\s]+)/)) &&
|
|
|
|
(charset = match[1])
|
|
|
|
begin
|
|
|
|
open_args[:encoding] = Encoding.find(charset)
|
|
|
|
rescue ArgumentError
|
|
|
|
# Unknown charset in Content-Type header
|
|
|
|
end
|
|
|
|
end
|
2023-03-09 21:10:44 -08:00
|
|
|
file_contents = File.read(T.must(file.path), **open_args)
|
2021-01-02 11:00:15 +01:00
|
|
|
file_hash = Digest::SHA2.hexdigest(file_contents) if hash_needed
|
|
|
|
end
|
2020-10-10 15:23:03 +02:00
|
|
|
|
|
|
|
{
|
|
|
|
url: url,
|
|
|
|
final_url: final_url,
|
2022-05-25 13:31:54 -04:00
|
|
|
status_code: status_code,
|
2020-10-10 15:23:03 +02:00
|
|
|
headers: headers,
|
2021-03-17 13:22:39 -04:00
|
|
|
etag: etag,
|
|
|
|
content_length: content_length,
|
2021-01-02 11:00:15 +01:00
|
|
|
file: file_contents,
|
2021-03-17 13:22:39 -04:00
|
|
|
file_hash: file_hash,
|
2022-05-25 13:45:31 -04:00
|
|
|
responses: responses,
|
2020-10-10 15:23:03 +02:00
|
|
|
}
|
|
|
|
ensure
|
2023-03-09 21:10:44 -08:00
|
|
|
T.must(file).unlink
|
2020-10-10 15:23:03 +02:00
|
|
|
end
|
2017-12-03 14:02:55 +01:00
|
|
|
|
2021-10-23 15:09:34 -04:00
|
|
|
def curl_supports_tls13?
|
|
|
|
@curl_supports_tls13 ||= Hash.new do |h, key|
|
|
|
|
h[key] = quiet_system(curl_executable, "--tlsv1.3", "--head", "https://brew.sh/")
|
|
|
|
end
|
2022-05-30 04:11:12 +01:00
|
|
|
@curl_supports_tls13[curl_path]
|
2021-09-24 22:44:27 -04:00
|
|
|
end
|
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
def http_status_ok?(status)
|
|
|
|
(100..299).cover?(status.to_i)
|
|
|
|
end
|
2021-04-26 10:13:12 -04:00
|
|
|
|
|
|
|
# Separates the output text from `curl` into an array of HTTP responses and
|
|
|
|
# the final response body (i.e. content). Response hashes contain the
|
|
|
|
# `:status_code`, `:status_text`, and `:headers`.
|
|
|
|
# @param output [String] The output text from `curl` containing HTTP
|
|
|
|
# responses, body content, or both.
|
2022-04-22 13:01:01 -04:00
|
|
|
# @param max_iterations [Integer] The maximum number of iterations for the
|
|
|
|
# `while` loop that parses HTTP response text. This should correspond to
|
|
|
|
# the maximum number of requests in the output. If `curl`'s `--max-redirs`
|
|
|
|
# option is used, `max_iterations` should be `max-redirs + 1`, to
|
|
|
|
# account for any final response after the redirections.
|
2021-04-26 10:13:12 -04:00
|
|
|
# @return [Hash] A hash containing an array of response hashes and the body
|
|
|
|
# content, if found.
|
2022-04-22 13:01:01 -04:00
|
|
|
sig { params(output: String, max_iterations: Integer).returns(T::Hash[Symbol, T.untyped]) }
|
2022-04-26 15:12:38 -04:00
|
|
|
def parse_curl_output(output, max_iterations: 25)
|
2021-04-26 10:13:12 -04:00
|
|
|
responses = []
|
|
|
|
|
|
|
|
iterations = 0
|
|
|
|
output = output.lstrip
|
|
|
|
while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_RESPONSE_BODY_SEPARATOR)
|
|
|
|
iterations += 1
|
|
|
|
raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations
|
|
|
|
|
|
|
|
response_text, _, output = output.partition(HTTP_RESPONSE_BODY_SEPARATOR)
|
|
|
|
output = output.lstrip
|
|
|
|
next if response_text.blank?
|
|
|
|
|
|
|
|
response_text.chomp!
|
|
|
|
response = parse_curl_response(response_text)
|
|
|
|
responses << response if response.present?
|
|
|
|
end
|
|
|
|
|
|
|
|
{ responses: responses, body: output }
|
|
|
|
end
|
|
|
|
|
|
|
|
# Returns the URL from the last location header found in cURL responses,
|
|
|
|
# if any.
|
|
|
|
# @param responses [Array<Hash>] An array of hashes containing response
|
|
|
|
# status information and headers from `#parse_curl_response`.
|
|
|
|
# @param absolutize [true, false] Whether to make the location URL absolute.
|
|
|
|
# @param base_url [String, nil] The URL to use as a base for making the
|
|
|
|
# `location` URL absolute.
|
|
|
|
# @return [String, nil] The URL from the last-occurring `location` header
|
|
|
|
# in the responses or `nil` (if no `location` headers found).
|
|
|
|
sig {
|
|
|
|
params(
|
|
|
|
responses: T::Array[T::Hash[Symbol, T.untyped]],
|
|
|
|
absolutize: T::Boolean,
|
|
|
|
base_url: T.nilable(String),
|
|
|
|
).returns(T.nilable(String))
|
|
|
|
}
|
|
|
|
def curl_response_last_location(responses, absolutize: false, base_url: nil)
|
|
|
|
responses.reverse_each do |response|
|
|
|
|
next if response[:headers].blank?
|
|
|
|
|
|
|
|
location = response[:headers]["location"]
|
|
|
|
next if location.blank?
|
|
|
|
|
|
|
|
absolute_url = URI.join(base_url, location).to_s if absolutize && base_url.present?
|
|
|
|
return absolute_url || location
|
|
|
|
end
|
|
|
|
|
|
|
|
nil
|
|
|
|
end
|
|
|
|
|
2022-11-25 12:09:59 +11:00
|
|
|
# Returns the final URL by following location headers in cURL responses.
|
|
|
|
# @param responses [Array<Hash>] An array of hashes containing response
|
|
|
|
# status information and headers from `#parse_curl_response`.
|
|
|
|
# @param base_url [String] The URL to use as a base.
|
|
|
|
# @return [String] The final absolute URL after redirections.
|
|
|
|
sig {
|
|
|
|
params(
|
|
|
|
responses: T::Array[T::Hash[Symbol, T.untyped]],
|
|
|
|
base_url: String,
|
|
|
|
).returns(String)
|
|
|
|
}
|
|
|
|
def curl_response_follow_redirections(responses, base_url)
|
|
|
|
responses.each do |response|
|
|
|
|
next if response[:headers].blank?
|
|
|
|
|
|
|
|
location = response[:headers]["location"]
|
|
|
|
next if location.blank?
|
|
|
|
|
|
|
|
base_url = URI.join(base_url, location).to_s
|
|
|
|
end
|
|
|
|
|
|
|
|
base_url
|
|
|
|
end
|
|
|
|
|
2021-04-26 10:13:12 -04:00
|
|
|
private
|
|
|
|
|
|
|
|
# Parses HTTP response text from `curl` output into a hash containing the
|
|
|
|
# information from the status line (status code and, optionally,
|
|
|
|
# descriptive text) and headers.
|
|
|
|
# @param response_text [String] The text of a `curl` response, consisting
|
|
|
|
# of a status line followed by header lines.
|
|
|
|
# @return [Hash] A hash containing the response status information and
|
|
|
|
# headers (as a hash with header names as keys).
|
|
|
|
sig { params(response_text: String).returns(T::Hash[Symbol, T.untyped]) }
|
|
|
|
def parse_curl_response(response_text)
|
|
|
|
response = {}
|
|
|
|
return response unless response_text.match?(HTTP_STATUS_LINE_REGEX)
|
|
|
|
|
|
|
|
# Parse the status line and remove it
|
2023-03-09 21:10:44 -08:00
|
|
|
match = T.must(response_text.match(HTTP_STATUS_LINE_REGEX))
|
2021-04-26 10:13:12 -04:00
|
|
|
response[:status_code] = match["code"] if match["code"].present?
|
|
|
|
response[:status_text] = match["text"] if match["text"].present?
|
|
|
|
response_text = response_text.sub(%r{^HTTP/.* (\d+).*$\s*}, "")
|
|
|
|
|
|
|
|
# Create a hash from the header lines
|
2022-05-02 15:31:30 -04:00
|
|
|
response[:headers] = {}
|
|
|
|
response_text.split("\r\n").each do |line|
|
|
|
|
header_name, header_value = line.split(/:\s*/, 2)
|
|
|
|
next if header_name.blank?
|
|
|
|
|
|
|
|
header_name = header_name.strip.downcase
|
|
|
|
header_value&.strip!
|
|
|
|
|
|
|
|
case response[:headers][header_name]
|
|
|
|
when nil
|
|
|
|
response[:headers][header_name] = header_value
|
|
|
|
when String
|
|
|
|
response[:headers][header_name] = [response[:headers][header_name], header_value]
|
|
|
|
when Array
|
|
|
|
response[:headers][header_name].push(header_value)
|
|
|
|
end
|
|
|
|
|
|
|
|
response[:headers][header_name]
|
|
|
|
end
|
2021-04-26 10:13:12 -04:00
|
|
|
|
|
|
|
response
|
|
|
|
end
|
2017-12-03 14:02:55 +01:00
|
|
|
end
|
|
|
|
end
|
2019-09-18 10:32:13 +01:00
|
|
|
|
2020-10-10 15:23:03 +02:00
|
|
|
# FIXME: Include `Utils::Curl` explicitly everywhere it is used.
|
|
|
|
include Utils::Curl # rubocop:disable Style/MixinUsage
|