2020-10-10 14:16:11 +02:00
|
|
|
# typed: false
|
2020-08-08 07:16:06 +05:30
|
|
|
# frozen_string_literal: true
|
|
|
|
|
|
|
|
module Homebrew
|
|
|
|
module Livecheck
|
|
|
|
# The `Livecheck::Strategy` module contains the various strategies as well
|
|
|
|
# as some general-purpose methods for working with them. Within the context
|
|
|
|
# of the `brew livecheck` command, strategies are established procedures
|
|
|
|
# for finding new software versions at a given source.
|
|
|
|
#
|
|
|
|
# @api private
|
|
|
|
module Strategy
|
2020-12-24 03:33:14 +01:00
|
|
|
extend T::Sig
|
|
|
|
|
2020-08-08 07:16:06 +05:30
|
|
|
module_function
|
|
|
|
|
|
|
|
# Strategy priorities informally range from 1 to 10, where 10 is the
|
|
|
|
# highest priority. 5 is the default priority because it's roughly in
|
|
|
|
# the middle of this range. Strategies with a priority of 0 (or lower)
|
|
|
|
# are ignored.
|
|
|
|
DEFAULT_PRIORITY = 5
|
2021-06-04 13:12:01 -04:00
|
|
|
|
2021-06-04 13:16:02 -04:00
|
|
|
# cURL's default `--connect-timeout` value can be up to two minutes, so
|
|
|
|
# we need to use a more reasonable duration (in seconds) to avoid a
|
|
|
|
# lengthy wait when a connection can't be established.
|
|
|
|
CURL_CONNECT_TIMEOUT = 10
|
|
|
|
|
|
|
|
# cURL does not set a default `--max-time` value, so we provide a value
|
|
|
|
# to ensure cURL will time out in a reasonable amount of time.
|
|
|
|
CURL_MAX_TIME = CURL_CONNECT_TIMEOUT + 5
|
|
|
|
|
|
|
|
# The `curl` process will sometimes hang indefinitely (despite setting
|
|
|
|
# the `--max-time` argument) and it needs to be quit for livecheck to
|
|
|
|
# continue. This value is used to set the `timeout` argument on
|
|
|
|
# `Utils::Curl` method calls in `Strategy`.
|
|
|
|
CURL_PROCESS_TIMEOUT = CURL_MAX_TIME + 5
|
|
|
|
|
|
|
|
# Baseline `curl` arguments used in `Strategy` methods.
|
|
|
|
DEFAULT_CURL_ARGS = [
|
|
|
|
# Follow redirections to handle mirrors, relocations, etc.
|
|
|
|
"--location",
|
|
|
|
"--connect-timeout", CURL_CONNECT_TIMEOUT,
|
|
|
|
"--max-time", CURL_MAX_TIME
|
|
|
|
].freeze
|
|
|
|
|
|
|
|
# `curl` arguments used in `Strategy#page_headers` method.
|
|
|
|
PAGE_HEADERS_CURL_ARGS = ([
|
|
|
|
# We only need the response head (not the body)
|
|
|
|
"--head",
|
|
|
|
# Some servers may not allow a HEAD request, so we use GET
|
|
|
|
"--request", "GET",
|
|
|
|
"--silent"
|
|
|
|
] + DEFAULT_CURL_ARGS).freeze
|
|
|
|
|
|
|
|
# `curl` arguments used in `Strategy#page_content` method.
|
|
|
|
PAGE_CONTENT_CURL_ARGS = ([
|
|
|
|
"--compressed",
|
|
|
|
# Include HTTP response headers in output, so we can identify the
|
|
|
|
# final URL after any redirections
|
|
|
|
"--include",
|
|
|
|
] + DEFAULT_CURL_ARGS).freeze
|
|
|
|
|
|
|
|
# Baseline `curl` options used in `Strategy` methods.
|
|
|
|
DEFAULT_CURL_OPTIONS = {
|
|
|
|
print_stdout: false,
|
|
|
|
print_stderr: false,
|
|
|
|
debug: false,
|
|
|
|
verbose: false,
|
|
|
|
timeout: CURL_PROCESS_TIMEOUT,
|
|
|
|
retry: false,
|
|
|
|
}.freeze
|
|
|
|
|
2021-06-04 13:12:01 -04:00
|
|
|
# HTTP response head(s) and body are typically separated by a double
|
|
|
|
# `CRLF` (whereas HTTP header lines are separated by a single `CRLF`).
|
|
|
|
# In rare cases, this can also be a double newline (`\n\n`).
|
|
|
|
HTTP_HEAD_BODY_SEPARATOR = "\r\n\r\n"
|
|
|
|
|
|
|
|
# The `#strategies` method expects `Strategy` constants to be strategies,
|
|
|
|
# so constants we create need to be private for this to work properly.
|
2021-06-04 13:16:02 -04:00
|
|
|
private_constant :DEFAULT_PRIORITY, :CURL_CONNECT_TIMEOUT, :CURL_MAX_TIME,
|
|
|
|
:CURL_PROCESS_TIMEOUT, :DEFAULT_CURL_ARGS,
|
|
|
|
:PAGE_HEADERS_CURL_ARGS, :PAGE_CONTENT_CURL_ARGS,
|
|
|
|
:DEFAULT_CURL_OPTIONS, :HTTP_HEAD_BODY_SEPARATOR
|
2020-08-08 07:16:06 +05:30
|
|
|
|
2020-11-05 17:17:03 -05:00
|
|
|
# Creates and/or returns a `@strategies` `Hash`, which maps a snake
|
|
|
|
# case strategy name symbol (e.g. `:page_match`) to the associated
|
|
|
|
# {Strategy}.
|
2020-08-08 07:16:06 +05:30
|
|
|
#
|
|
|
|
# At present, this should only be called after tap strategies have been
|
|
|
|
# loaded, otherwise livecheck won't be able to use them.
|
|
|
|
# @return [Hash]
|
|
|
|
def strategies
|
|
|
|
return @strategies if defined? @strategies
|
|
|
|
|
|
|
|
@strategies = {}
|
|
|
|
constants.sort.each do |strategy_symbol|
|
|
|
|
key = strategy_symbol.to_s.underscore.to_sym
|
|
|
|
strategy = const_get(strategy_symbol)
|
|
|
|
@strategies[key] = strategy
|
|
|
|
end
|
|
|
|
@strategies
|
|
|
|
end
|
|
|
|
private_class_method :strategies
|
|
|
|
|
2020-11-05 17:17:03 -05:00
|
|
|
# Returns the {Strategy} that corresponds to the provided `Symbol` (or
|
|
|
|
# `nil` if there is no matching {Strategy}).
|
|
|
|
#
|
2020-08-08 07:16:06 +05:30
|
|
|
# @param symbol [Symbol] the strategy name in snake case as a `Symbol`
|
2020-11-05 17:17:03 -05:00
|
|
|
# (e.g. `:page_match`)
|
2020-08-08 07:16:06 +05:30
|
|
|
# @return [Strategy, nil]
|
|
|
|
def from_symbol(symbol)
|
|
|
|
strategies[symbol]
|
|
|
|
end
|
|
|
|
|
|
|
|
# Returns an array of strategies that apply to the provided URL.
|
2020-11-05 17:17:03 -05:00
|
|
|
#
|
2020-08-08 07:16:06 +05:30
|
|
|
# @param url [String] the URL to check for matching strategies
|
2020-12-05 11:49:47 -05:00
|
|
|
# @param livecheck_strategy [Symbol] a {Strategy} symbol from the
|
|
|
|
# `livecheck` block
|
|
|
|
# @param regex_provided [Boolean] whether a regex is provided in the
|
2020-08-08 07:16:06 +05:30
|
|
|
# `livecheck` block
|
|
|
|
# @return [Array]
|
2020-12-14 02:09:23 +01:00
|
|
|
def from_url(url, livecheck_strategy: nil, url_provided: nil, regex_provided: nil, block_provided: nil)
|
2020-08-08 07:16:06 +05:30
|
|
|
usable_strategies = strategies.values.select do |strategy|
|
2020-12-05 11:49:47 -05:00
|
|
|
if strategy == PageMatch
|
|
|
|
# Only treat the `PageMatch` strategy as usable if a regex is
|
|
|
|
# present in the `livecheck` block
|
2021-01-07 13:49:05 -08:00
|
|
|
next if !regex_provided && !block_provided
|
2020-12-05 11:49:47 -05:00
|
|
|
elsif strategy.const_defined?(:PRIORITY) &&
|
|
|
|
!strategy::PRIORITY.positive? &&
|
|
|
|
from_symbol(livecheck_strategy) != strategy
|
|
|
|
# Ignore strategies with a priority of 0 or lower, unless the
|
|
|
|
# strategy is specified in the `livecheck` block
|
|
|
|
next
|
|
|
|
end
|
2020-08-08 07:16:06 +05:30
|
|
|
|
|
|
|
strategy.respond_to?(:match?) && strategy.match?(url)
|
|
|
|
end
|
|
|
|
|
|
|
|
# Sort usable strategies in descending order by priority, using the
|
|
|
|
# DEFAULT_PRIORITY when a strategy doesn't contain a PRIORITY constant
|
|
|
|
usable_strategies.sort_by do |strategy|
|
|
|
|
(strategy.const_defined?(:PRIORITY) ? -strategy::PRIORITY : -DEFAULT_PRIORITY)
|
|
|
|
end
|
|
|
|
end
|
2020-12-12 21:56:07 +01:00
|
|
|
|
|
|
|
def self.page_headers(url)
|
2020-12-19 00:46:18 -05:00
|
|
|
headers = []
|
2020-12-13 12:21:59 +01:00
|
|
|
|
2020-12-14 02:10:38 +01:00
|
|
|
[:default, :browser].each do |user_agent|
|
2020-12-20 02:23:30 +01:00
|
|
|
stdout, _, status = curl_with_workarounds(
|
2021-06-04 13:16:02 -04:00
|
|
|
*PAGE_HEADERS_CURL_ARGS, url,
|
|
|
|
**DEFAULT_CURL_OPTIONS,
|
|
|
|
user_agent: user_agent
|
2020-12-14 02:10:38 +01:00
|
|
|
)
|
|
|
|
|
|
|
|
while stdout.match?(/\AHTTP.*\r$/)
|
|
|
|
h, stdout = stdout.split("\r\n\r\n", 2)
|
|
|
|
|
2020-12-19 00:46:18 -05:00
|
|
|
headers << h.split("\r\n").drop(1)
|
|
|
|
.map { |header| header.split(/:\s*/, 2) }
|
|
|
|
.to_h.transform_keys(&:downcase)
|
2020-12-14 02:10:38 +01:00
|
|
|
end
|
2020-12-13 12:21:59 +01:00
|
|
|
|
2020-12-20 12:35:04 -05:00
|
|
|
return headers if status.success?
|
2020-12-13 12:21:59 +01:00
|
|
|
end
|
|
|
|
|
2020-12-14 02:10:38 +01:00
|
|
|
headers
|
2020-12-12 21:56:07 +01:00
|
|
|
end
|
|
|
|
|
2020-12-22 22:46:52 -05:00
|
|
|
# Fetches the content at the URL and returns a hash containing the
|
|
|
|
# content and, if there are any redirections, the final URL.
|
2020-12-14 13:03:10 -05:00
|
|
|
# If `curl` encounters an error, the hash will contain a `:messages`
|
|
|
|
# array with the error message instead.
|
2020-12-22 22:46:52 -05:00
|
|
|
#
|
|
|
|
# @param url [String] the URL of the content to check
|
|
|
|
# @return [Hash]
|
2020-12-24 03:33:14 +01:00
|
|
|
sig { params(url: String).returns(T::Hash[Symbol, T.untyped]) }
|
2020-12-19 00:21:29 -05:00
|
|
|
def self.page_content(url)
|
2020-12-22 22:46:52 -05:00
|
|
|
original_url = url
|
|
|
|
|
2021-06-09 23:54:56 +02:00
|
|
|
stderr = nil
|
|
|
|
[:default, :browser].each do |user_agent|
|
|
|
|
stdout, stderr, status = curl_with_workarounds(
|
|
|
|
*PAGE_CONTENT_CURL_ARGS, url,
|
|
|
|
**DEFAULT_CURL_OPTIONS,
|
|
|
|
user_agent: user_agent
|
|
|
|
)
|
|
|
|
next unless status.success?
|
2020-12-14 13:03:10 -05:00
|
|
|
|
2021-06-09 23:54:56 +02:00
|
|
|
# stdout contains the header information followed by the page content.
|
|
|
|
# We use #scrub here to avoid "invalid byte sequence in UTF-8" errors.
|
|
|
|
output = stdout.scrub
|
2020-12-14 13:03:10 -05:00
|
|
|
|
2021-06-09 23:54:56 +02:00
|
|
|
# Separate the head(s)/body and identify the final URL (after any
|
|
|
|
# redirections)
|
|
|
|
max_iterations = 5
|
|
|
|
iterations = 0
|
2020-12-14 13:03:10 -05:00
|
|
|
output = output.lstrip
|
2021-06-09 23:54:56 +02:00
|
|
|
while output.match?(%r{\AHTTP/[\d.]+ \d+}) && output.include?(HTTP_HEAD_BODY_SEPARATOR)
|
|
|
|
iterations += 1
|
|
|
|
raise "Too many redirects (max = #{max_iterations})" if iterations > max_iterations
|
|
|
|
|
|
|
|
head_text, _, output = output.partition(HTTP_HEAD_BODY_SEPARATOR)
|
|
|
|
output = output.lstrip
|
2020-12-14 13:03:10 -05:00
|
|
|
|
2021-06-09 23:54:56 +02:00
|
|
|
location = head_text[/^Location:\s*(.*)$/i, 1]
|
|
|
|
next if location.blank?
|
|
|
|
|
|
|
|
location.chomp!
|
|
|
|
# Convert a relative redirect URL to an absolute URL
|
|
|
|
location = URI.join(url, location) unless location.match?(PageMatch::URL_MATCH_REGEX)
|
|
|
|
final_url = location
|
|
|
|
end
|
2020-12-14 13:03:10 -05:00
|
|
|
|
2021-06-09 23:54:56 +02:00
|
|
|
data = { content: output }
|
|
|
|
data[:final_url] = final_url if final_url.present? && final_url != original_url
|
|
|
|
return data
|
2020-12-14 13:03:10 -05:00
|
|
|
end
|
|
|
|
|
2021-06-09 23:54:56 +02:00
|
|
|
/^(?<error_msg>curl: \(\d+\) .+)/ =~ stderr
|
|
|
|
{
|
|
|
|
messages: [error_msg.presence || "cURL failed without an error"],
|
|
|
|
}
|
2020-12-12 21:56:07 +01:00
|
|
|
end
|
2020-08-08 07:16:06 +05:30
|
|
|
end
|
|
|
|
end
|
|
|
|
end
|
|
|
|
|
|
|
|
require_relative "strategy/apache"
|
|
|
|
require_relative "strategy/bitbucket"
|
2020-12-11 04:31:14 +01:00
|
|
|
require_relative "strategy/cpan"
|
2021-03-17 01:58:31 +05:30
|
|
|
require_relative "strategy/electron_builder"
|
2021-04-04 03:00:34 +02:00
|
|
|
require_relative "strategy/extract_plist"
|
2020-08-08 07:16:06 +05:30
|
|
|
require_relative "strategy/git"
|
2020-12-02 18:04:22 +05:30
|
|
|
require_relative "strategy/github_latest"
|
2020-08-08 07:16:06 +05:30
|
|
|
require_relative "strategy/gnome"
|
|
|
|
require_relative "strategy/gnu"
|
|
|
|
require_relative "strategy/hackage"
|
2020-12-14 02:49:32 +01:00
|
|
|
require_relative "strategy/header_match"
|
2020-08-08 07:16:06 +05:30
|
|
|
require_relative "strategy/launchpad"
|
|
|
|
require_relative "strategy/npm"
|
|
|
|
require_relative "strategy/page_match"
|
|
|
|
require_relative "strategy/pypi"
|
|
|
|
require_relative "strategy/sourceforge"
|
2020-12-12 21:59:04 +01:00
|
|
|
require_relative "strategy/sparkle"
|
2020-08-08 07:16:06 +05:30
|
|
|
require_relative "strategy/xorg"
|