Merge pull request #20200 from Homebrew/consider-content-type

download_strategy: preserve cache upon text response
This commit is contained in:
Eric Knibbe 2025-07-04 02:18:45 +00:00 committed by GitHub
commit 94606f343a
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -425,8 +425,8 @@ end
class CurlDownloadStrategy < AbstractFileDownloadStrategy class CurlDownloadStrategy < AbstractFileDownloadStrategy
include Utils::Curl include Utils::Curl
# url, basename, time, file_size, is_redirection # url, basename, time, file_size, content_type, is_redirection
URLMetadata = T.type_alias { [String, String, T.nilable(Time), T.nilable(Integer), T::Boolean] } URLMetadata = T.type_alias { [String, String, T.nilable(Time), T.nilable(Integer), T.nilable(String), T::Boolean] }
sig { returns(T::Array[String]) } sig { returns(T::Array[String]) }
attr_reader :mirrors attr_reader :mirrors
@ -470,7 +470,7 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
cached_location_valid = cached_location.exist? cached_location_valid = cached_location.exist?
resolved_url, _, last_modified, file_size, is_redirection = begin resolved_url, _, last_modified, file_size, content_type, is_redirection = begin
resolve_url_basename_time_file_size(url, timeout: Utils::Timer.remaining!(end_time)) resolve_url_basename_time_file_size(url, timeout: Utils::Timer.remaining!(end_time))
rescue ErrorDuringExecution rescue ErrorDuringExecution
raise unless cached_location_valid raise unless cached_location_valid
@ -482,17 +482,19 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
# The cached location is no longer fresh if either: # The cached location is no longer fresh if either:
# - Last-Modified value is newer than the file's timestamp # - Last-Modified value is newer than the file's timestamp
# - Content-Length value is different than the file's size # - Content-Length value is different than the file's size
if cached_location_valid && last_modified && last_modified > cached_location.mtime if cached_location_valid && (content_type.nil? || !content_type.start_with?("text/"))
ohai "Ignoring #{cached_location}", if last_modified && last_modified > cached_location.mtime
"Cached modified time #{cached_location.mtime.iso8601} is before" \ ohai "Ignoring #{cached_location}",
"Last-Modified header: #{last_modified.iso8601}" "Cached modified time #{cached_location.mtime.iso8601} is before" \
cached_location_valid = false "Last-Modified header: #{last_modified.iso8601}"
end cached_location_valid = false
if cached_location_valid && file_size&.nonzero? && file_size != cached_location.size end
ohai "Ignoring #{cached_location}", if file_size&.nonzero? && file_size != cached_location.size
"Cached size #{cached_location.size} differs from " \ ohai "Ignoring #{cached_location}",
"Content-Length header: #{file_size}" "Cached size #{cached_location.size} differs from " \
cached_location_valid = false "Content-Length header: #{file_size}"
cached_location_valid = false
end
end end
if cached_location_valid if cached_location_valid
@ -530,7 +532,7 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
sig { params(timeout: T.any(Float, Integer, NilClass)).returns([T.nilable(Time), Integer]) } sig { params(timeout: T.any(Float, Integer, NilClass)).returns([T.nilable(Time), Integer]) }
def resolved_time_file_size(timeout: nil) def resolved_time_file_size(timeout: nil)
_, _, time, file_size = resolve_url_basename_time_file_size(url, timeout:) _, _, time, file_size, = resolve_url_basename_time_file_size(url, timeout:)
[time, T.must(file_size)] [time, T.must(file_size)]
end end
@ -548,9 +550,9 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
return @resolved_info_cache.fetch(url) if @resolved_info_cache.include?(url) return @resolved_info_cache.fetch(url) if @resolved_info_cache.include?(url)
begin begin
parsed_output = curl_headers(url.to_s, wanted_headers: ["content-disposition"], timeout:) parsed_output = curl_headers(url.to_s, wanted_headers: ["content-disposition", "content-type"], timeout:)
rescue ErrorDuringExecution rescue ErrorDuringExecution
return [url, parse_basename(url), nil, nil, false] return [url, parse_basename(url), nil, nil, nil, false]
end end
parsed_headers = parsed_output.fetch(:responses).map { |r| r.fetch(:headers) } parsed_headers = parsed_output.fetch(:responses).map { |r| r.fetch(:headers) }
@ -602,10 +604,14 @@ class CurlDownloadStrategy < AbstractFileDownloadStrategy
.flat_map { |headers| [*headers["content-length"]&.to_i] } .flat_map { |headers| [*headers["content-length"]&.to_i] }
.last .last
content_type = parsed_headers
.flat_map { |headers| [*headers["content-type"]] }
.last
is_redirection = url != final_url is_redirection = url != final_url
basename = filenames.last || parse_basename(final_url, search_query: !is_redirection) basename = filenames.last || parse_basename(final_url, search_query: !is_redirection)
@resolved_info_cache[url] = [final_url, basename, time.last, file_size, is_redirection] @resolved_info_cache[url] = [final_url, basename, time.last, file_size, content_type, is_redirection]
end end
sig { sig {
@ -723,7 +729,7 @@ class CurlGitHubPackagesDownloadStrategy < CurlDownloadStrategy
def resolve_url_basename_time_file_size(url, timeout: nil) def resolve_url_basename_time_file_size(url, timeout: nil)
return super if @resolved_basename.blank? return super if @resolved_basename.blank?
[url, @resolved_basename, nil, nil, false] [url, @resolved_basename, nil, nil, nil, false]
end end
end end