Add curl_head method.

This commit is contained in:
Markus Reiter 2023-03-31 21:35:58 +02:00
parent 367fe5373b
commit fbf474a3fd
No known key found for this signature in database
GPG Key ID: 245293B51702655B
5 changed files with 100 additions and 33 deletions

View File

@ -463,15 +463,10 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
url = url.sub(%r{^https?://#{GitHubPackages::URL_DOMAIN}/}o, "#{domain.chomp("/")}/")
end
output, _, _status = curl_output(
"--location", "--silent", "--head", "--request", "GET", url.to_s,
timeout: timeout
)
parsed_output = parse_curl_output(output)
parsed_output = curl_head(url.to_s, timeout: timeout)
parsed_headers = parsed_output.fetch(:responses).map { |r| r.fetch(:headers) }
lines = output.to_s.lines.map(&:chomp)
final_url = curl_response_follow_redirections(parsed_output[:responses], url)
final_url = curl_response_follow_redirections(parsed_output.fetch(:responses), url)
content_disposition_parser = Mechanize::HTTP::ContentDispositionParser.new
@ -500,19 +495,20 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
File.basename(filename)
end
filenames = lines.map(&parse_content_disposition).compact
filenames = parsed_headers.flat_map do |headers|
next [] unless (header = headers["content-disposition"])
time =
lines.map { |line| line[/^Last-Modified:\s*(.+)/i, 1] }
.compact
[*parse_content_disposition.call("Content-Disposition: #{header}")]
end
time = parsed_headers
.flat_map { |headers| [*headers["last-modified"]] }
.map { |t| t.match?(/^\d+$/) ? Time.at(t.to_i) : Time.parse(t) }
.last
file_size =
lines.map { |line| line[/^Content-Length:\s*(\d+)/i, 1] }
.compact
.map(&:to_i)
.last
file_size = parsed_headers
.flat_map { |headers| [*headers["content-length"]&.to_i] }
.last
is_redirection = url != final_url
basename = filenames.last || parse_basename(final_url, search_query: !is_redirection)

View File

@ -9,12 +9,38 @@ describe CurlGitHubPackagesDownloadStrategy do
let(:name) { "foo" }
let(:url) { "https://#{GitHubPackages::URL_DOMAIN}/v2/homebrew/core/spec_test/manifests/1.2.3" }
let(:version) { "1.2.3" }
let(:specs) { {} }
let(:specs) { { headers: ["Accept: application/vnd.oci.image.index.v1+json"] } }
let(:authorization) { nil }
let(:head_response) do
<<~HTTP
HTTP/2 200\r
content-length: 12671\r
content-type: application/vnd.oci.image.index.v1+json\r
docker-content-digest: sha256:7d752ee92d9120e3884b452dce15328536a60d468023ea8e9f4b09839a5442e5\r
docker-distribution-api-version: registry/2.0\r
etag: "sha256:7d752ee92d9120e3884b452dce15328536a60d468023ea8e9f4b09839a5442e5"\r
date: Sun, 02 Apr 2023 22:45:08 GMT\r
x-github-request-id: 8814:FA5A:14DAFB5:158D7A2:642A0574\r
HTTP
end
describe "#fetch" do
before do
stub_const("HOMEBREW_GITHUB_PACKAGES_AUTH", authorization) if authorization.present?
allow(strategy).to receive(:system_command)
.with(
/curl/,
hash_including(args: array_including("--head")),
)
.twice
.and_return(instance_double(
SystemCommand::Result,
success?: true,
exit_status: instance_double(Process::Status, exitstatus: 0),
stdout: head_response,
))
strategy.temporary_path.dirname.mkpath
FileUtils.touch strategy.temporary_path
end

View File

@ -10,9 +10,28 @@ describe CurlPostDownloadStrategy do
let(:url) { "https://example.com/foo.tar.gz" }
let(:version) { "1.2.3" }
let(:specs) { {} }
let(:head_response) do
<<~HTTP
HTTP/1.1 200\r
Content-Disposition: attachment; filename="foo.tar.gz"
HTTP
end
describe "#fetch" do
before do
allow(strategy).to receive(:system_command)
.with(
/curl/,
hash_including(args: array_including("--head")),
)
.twice
.and_return(instance_double(
SystemCommand::Result,
success?: true,
exit_status: instance_double(Process::Status, exitstatus: 0),
stdout: head_response,
))
strategy.temporary_path.dirname.mkpath
FileUtils.touch strategy.temporary_path
end

View File

@ -12,6 +12,10 @@ describe CurlDownloadStrategy do
let(:specs) { { user: "download:123456" } }
let(:artifact_domain) { nil }
before do
allow(strategy).to receive(:curl_head).and_return({ responses: [{ headers: {} }] })
end
it "parses the opts and sets the corresponding args" do
expect(strategy.send(:_curl_args)).to eq(["--user", "download:123456"])
end
@ -190,48 +194,48 @@ describe CurlDownloadStrategy do
end
describe "#cached_location" do
subject(:cached_location) { described_class.new(url, name, version, **specs).cached_location }
subject(:cached_location) { strategy.cached_location }
context "when URL ends with file" do
it {
it "falls back to the file name in the URL" do
expect(cached_location).to eq(
HOMEBREW_CACHE/"downloads/3d1c0ae7da22be9d83fb1eb774df96b7c4da71d3cf07e1cb28555cf9a5e5af70--foo.tar.gz",
)
}
end
end
context "when URL file is in middle" do
let(:url) { "https://example.com/foo.tar.gz/from/this/mirror" }
it {
it "falls back to the file name in the URL" do
expect(cached_location).to eq(
HOMEBREW_CACHE/"downloads/1ab61269ba52c83994510b1e28dd04167a2f2e8393a35a9c50c1f7d33fd8f619--foo.tar.gz",
)
}
end
end
context "with a file name trailing the URL path" do
let(:url) { "https://example.com/cask.dmg" }
it {
it "falls back to the file extension in the URL" do
expect(cached_location.extname).to eq(".dmg")
}
end
end
context "with a file name trailing the first query parameter" do
let(:url) { "https://example.com/download?file=cask.zip&a=1" }
it {
it "falls back to the file extension in the URL" do
expect(cached_location.extname).to eq(".zip")
}
end
end
context "with a file name trailing the second query parameter" do
let(:url) { "https://example.com/dl?a=1&file=cask.zip&b=2" }
it {
it "falls back to the file extension in the URL" do
expect(cached_location.extname).to eq(".zip")
}
end
end
context "with an unusually long query string" do
@ -253,10 +257,10 @@ describe CurlDownloadStrategy do
].join
end
it {
it "falls back to the file extension in the URL" do
expect(cached_location.extname).to eq(".zip")
expect(cached_location.to_path.length).to be_between(0, 255)
}
end
end
end
end

View File

@ -141,7 +141,7 @@ module Utils
raise Timeout::Error, result.stderr.lines.last.chomp if timeout && result.status.exitstatus == 28
# Error in the HTTP2 framing layer
if result.status.exitstatus == 16
if result.exit_status == 16
return curl_with_workarounds(
*args, "--http1.1",
timeout: end_time&.remaining, **command_options, **options
@ -149,7 +149,7 @@ module Utils
end
# This is a workaround for https://github.com/curl/curl/issues/1618.
if result.status.exitstatus == 56 # Unexpected EOF
if result.exit_status == 56 # Unexpected EOF
out = curl_output("-V").stdout
# If `curl` doesn't support HTTP2, the exception is unrelated to this bug.
@ -207,6 +207,28 @@ module Utils
curl_with_workarounds(*args, print_stderr: false, show_output: true, **options)
end
def curl_head(*args, **options)
[[], ["--request", "GET"]].each do |request_args|
result = curl_output(
"--fail", "--location", "--silent", "--head", *request_args, *args,
**options
)
# 22 means a non-successful HTTP status code, not a `curl` error, so we still got some headers.
if result.success? || result.exit_status == 22
parsed_output = parse_curl_output(result.stdout)
# If we didn't get a `Content-Disposition` header yet, retry using `GET`.
next if request_args.empty? &&
parsed_output.fetch(:responses).none? { |r| r.fetch(:headers).key?("content-disposition") }
return parsed_output if result.success?
end
result.assert_success!
end
end
# Check if a URL is protected by CloudFlare (e.g. badlion.net and jaxx.io).
# @param response [Hash] A response hash from `#parse_curl_response`.
# @return [true, false] Whether a response contains headers indicating that