Merge pull request #20200 from Homebrew/consider-content-type

download_strategy: preserve cache upon text response
This commit is contained in:
Eric Knibbe 2025-07-04 02:18:45 +00:00 committed by GitHub
commit 94606f343a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -425,8 +425,8 @@ end
class CurlDownloadStrategy < AbstractFileDownloadStrategy
include Utils::Curl
# url, basename, time, file_size, is_redirection
URLMetadata = T.type_alias { [String, String, T.nilable(Time), T.nilable(Integer), T::Boolean] }
# url, basename, time, file_size, content_type, is_redirection
URLMetadata = T.type_alias { [String, String, T.nilable(Time), T.nilable(Integer), T.nilable(String), T::Boolean] }
sig { returns(T::Array[String]) }
attr_reader :mirrors
@ -470,7 +470,7 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
cached_location_valid = cached_location.exist?
resolved_url, _, last_modified, file_size, is_redirection = begin
resolved_url, _, last_modified, file_size, content_type, is_redirection = begin
resolve_url_basename_time_file_size(url, timeout: Utils::Timer.remaining!(end_time))
rescue ErrorDuringExecution
raise unless cached_location_valid
@ -482,18 +482,20 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
# The cached location is no longer fresh if either:
# - Last-Modified value is newer than the file's timestamp
# - Content-Length value is different than the file's size
if cached_location_valid && last_modified && last_modified > cached_location.mtime
if cached_location_valid && (content_type.nil? || !content_type.start_with?("text/"))
if last_modified && last_modified > cached_location.mtime
ohai "Ignoring #{cached_location}",
"Cached modified time #{cached_location.mtime.iso8601} is before" \
"Last-Modified header: #{last_modified.iso8601}"
cached_location_valid = false
end
if cached_location_valid && file_size&.nonzero? && file_size != cached_location.size
if file_size&.nonzero? && file_size != cached_location.size
ohai "Ignoring #{cached_location}",
"Cached size #{cached_location.size} differs from " \
"Content-Length header: #{file_size}"
cached_location_valid = false
end
end
if cached_location_valid
puts "Already downloaded: #{cached_location}"
@ -530,7 +532,7 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
sig { params(timeout: T.any(Float, Integer, NilClass)).returns([T.nilable(Time), Integer]) }
def resolved_time_file_size(timeout: nil)
_, _, time, file_size = resolve_url_basename_time_file_size(url, timeout:)
_, _, time, file_size, = resolve_url_basename_time_file_size(url, timeout:)
[time, T.must(file_size)]
end
@ -548,9 +550,9 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
return @resolved_info_cache.fetch(url) if @resolved_info_cache.include?(url)
begin
parsed_output = curl_headers(url.to_s, wanted_headers: ["content-disposition"], timeout:)
parsed_output = curl_headers(url.to_s, wanted_headers: ["content-disposition", "content-type"], timeout:)
rescue ErrorDuringExecution
return [url, parse_basename(url), nil, nil, false]
return [url, parse_basename(url), nil, nil, nil, false]
end
parsed_headers = parsed_output.fetch(:responses).map { |r| r.fetch(:headers) }
@ -602,10 +604,14 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
.flat_map { |headers| [*headers["content-length"]&.to_i] }
.last
content_type = parsed_headers
.flat_map { |headers| [*headers["content-type"]] }
.last
is_redirection = url != final_url
basename = filenames.last || parse_basename(final_url, search_query: !is_redirection)
@resolved_info_cache[url] = [final_url, basename, time.last, file_size, is_redirection]
@resolved_info_cache[url] = [final_url, basename, time.last, file_size, content_type, is_redirection]
end
sig {
@ -723,7 +729,7 @@ class CurlGitHubPackagesDownloadStrategy < CurlDownloadStrategy
def resolve_url_basename_time_file_size(url, timeout: nil)
return super if @resolved_basename.blank?
[url, @resolved_basename, nil, nil, false]
[url, @resolved_basename, nil, nil, nil, false]
end
end