180 lines
7.0 KiB
Ruby
Raw Normal View History

# typed: strict
# frozen_string_literal: true
module Homebrew
module Livecheck
module Strategy
# The {Xml} strategy fetches content at a URL, parses it as XML using
2024-04-30 11:10:23 +02:00
# `REXML` and provides the `REXML::Document` to a `strategy` block.
# If a regex is present in the `livecheck` block, it should be passed
# as the second argument to the `strategy` block.
#
# This is a generic strategy that doesn't contain any logic for finding
# versions, as the structure of XML data varies. Instead, a `strategy`
# block must be used to extract version information from the XML data.
# For more information on how to work with an `REXML::Document` object,
# please refer to the [`REXML::Document`](https://ruby.github.io/rexml/REXML/Document.html)
# and [`REXML::Element`](https://ruby.github.io/rexml/REXML/Element.html)
# documentation.
#
# This strategy is not applied automatically and it is necessary to use
# `strategy :xml` in a `livecheck` block (in conjunction with a
# `strategy` block) to use it.
#
# This strategy's {find_versions} method can be used in other strategies
# that work with XML content, so it should only be necessary to write
# the version-finding logic that works with the parsed XML data.
#
# @api public
class Xml
NICE_NAME = "XML"
# A priority of zero causes livecheck to skip the strategy. We do this
# for {Xml} so we can selectively apply it only when a strategy block
# is provided in a `livecheck` block.
PRIORITY = 0
# The `Regexp` used to determine if the strategy applies to the URL.
URL_MATCH_REGEX = %r{^https?://}i
# Whether the strategy can be applied to the provided URL.
# {Xml} will technically match any HTTP URL but is only usable with
# a `livecheck` block containing a `strategy` block.
#
# @param url [String] the URL to match against
# @return [Boolean]
sig { params(url: String).returns(T::Boolean) }
def self.match?(url)
URL_MATCH_REGEX.match?(url)
end
# Parses XML text and returns an `REXML::Document` object.
# @param content [String] the XML text to parse
# @return [REXML::Document, nil]
sig { params(content: String).returns(T.nilable(REXML::Document)) }
def self.parse_xml(content)
parsing_tries = 0
begin
REXML::Document.new(content)
rescue REXML::UndefinedNamespaceException => e
undefined_prefix = e.to_s[/Undefined prefix ([^ ]+) found/i, 1]
raise "Could not identify undefined prefix." if undefined_prefix.blank?
# Only retry parsing once after removing prefix from content
parsing_tries += 1
raise "Could not parse XML after removing undefined prefix." if parsing_tries > 1
# When an XML document contains a prefix without a corresponding
# namespace, it's necessary to remove the prefix from the content
# to be able to successfully parse it using REXML
content = content.gsub(%r{(</?| )#{Regexp.escape(undefined_prefix)}:}, '\1')
retry
end
end
# Retrieves the stripped inner text of an `REXML` element. Returns
# `nil` if the optional child element doesn't exist or the text is
# blank.
# @param element [REXML::Element] an `REXML` element to retrieve text
# from, either directly or from a child element
# @param child_path [String, nil] the XPath of a child element to
# retrieve text from
# @return [String, nil]
sig {
params(
element: REXML::Element,
child_path: T.nilable(String),
).returns(T.nilable(String))
}
def self.element_text(element, child_path = nil)
element = element.get_elements(child_path).first if child_path.present?
return if element.nil?
text = element.text
return if text.blank?
text.strip
end
# Parses XML text and identifies versions using a `strategy` block.
# If a regex is provided, it will be passed as the second argument to
# the `strategy` block (after the parsed XML data).
# @param content [String] the XML text to parse and check
# @param regex [Regexp, nil] a regex used for matching versions in the
# content
# @return [Array]
sig {
params(
content: String,
regex: T.nilable(Regexp),
2023-04-04 22:40:31 -07:00
block: T.nilable(Proc),
).returns(T::Array[String])
}
def self.versions_from_content(content, regex = nil, &block)
return [] if content.blank? || block.blank?
require "rexml"
xml = parse_xml(content)
return [] if xml.blank?
block_return_value = if regex.present?
yield(xml, regex)
elsif block.arity == 2
raise "Two arguments found in `strategy` block but no regex provided."
else
yield(xml)
end
Strategy.handle_block_return(block_return_value)
end
# Checks the XML content at the URL for versions, using the provided
# `strategy` block to extract version information.
#
# @param url [String] the URL of the content to check
# @param regex [Regexp, nil] a regex used for matching versions
# @param provided_content [String, nil] page content to use in place of
# fetching via `Strategy#page_content`
# @param homebrew_curl [Boolean] whether to use brewed curl with the URL
# @return [Hash]
sig {
params(
url: String,
regex: T.nilable(Regexp),
provided_content: T.nilable(String),
homebrew_curl: T::Boolean,
livecheck: Add support for POST requests livecheck currently doesn't support `POST` requests but it wasn't entirely clear how best to handle that. I initially approached it as a `Post` strategy but unfortunately that would have required us to handle response body parsing (e.g., JSON, XML, etc.) in some fashion. We could borrow some of the logic from related strategies but we would still be stuck having to update `Post` whenever we add a strategy for a new format. Instead, this implements `POST` support by borrowing ideas from the `using: :post` and `data` `url` options found in formulae. This uses a `post_form` option to handle form data and `post_json` to handle JSON data, encoding the hash argument for each into the appropriate format. The presence of either option means that curl will use a `POST` request. With this approach, we can make a `POST` request using any strategy that calls `Strategy::page_headers` or `::page_content` (directly or indirectly) and everything else works the same as usual. The only change needed in related strategies was to pass the options through to the `Strategy` methods. For example, if we need to parse a JSON response from a `POST` request, we add a `post_data` or `post_json` hash to the `livecheck` block `url` and use `strategy :json` with a `strategy` block. This leans on existing patterns that we're already familiar with and shouldn't require any notable maintenance burden when adding new strategies, so it seems like a better approach than a `Post` strategy.
2025-02-04 10:30:16 -05:00
unused: T.untyped,
2023-04-04 22:40:31 -07:00
block: T.nilable(Proc),
).returns(T::Hash[Symbol, T.untyped])
}
livecheck: Add support for POST requests livecheck currently doesn't support `POST` requests but it wasn't entirely clear how best to handle that. I initially approached it as a `Post` strategy but unfortunately that would have required us to handle response body parsing (e.g., JSON, XML, etc.) in some fashion. We could borrow some of the logic from related strategies but we would still be stuck having to update `Post` whenever we add a strategy for a new format. Instead, this implements `POST` support by borrowing ideas from the `using: :post` and `data` `url` options found in formulae. This uses a `post_form` option to handle form data and `post_json` to handle JSON data, encoding the hash argument for each into the appropriate format. The presence of either option means that curl will use a `POST` request. With this approach, we can make a `POST` request using any strategy that calls `Strategy::page_headers` or `::page_content` (directly or indirectly) and everything else works the same as usual. The only change needed in related strategies was to pass the options through to the `Strategy` methods. For example, if we need to parse a JSON response from a `POST` request, we add a `post_data` or `post_json` hash to the `livecheck` block `url` and use `strategy :json` with a `strategy` block. This leans on existing patterns that we're already familiar with and shouldn't require any notable maintenance burden when adding new strategies, so it seems like a better approach than a `Post` strategy.
2025-02-04 10:30:16 -05:00
def self.find_versions(url:, regex: nil, provided_content: nil, homebrew_curl: false, **unused, &block)
2025-02-23 11:08:00 -08:00
raise ArgumentError, "#{Utils.demodulize(name)} requires a `strategy` block" if block.blank?
2024-03-07 16:20:20 +00:00
match_data = { matches: {}, regex:, url: }
return match_data if url.blank? || block.blank?
content = if provided_content.is_a?(String)
match_data[:cached] = true
provided_content
else
livecheck: Add support for POST requests livecheck currently doesn't support `POST` requests but it wasn't entirely clear how best to handle that. I initially approached it as a `Post` strategy but unfortunately that would have required us to handle response body parsing (e.g., JSON, XML, etc.) in some fashion. We could borrow some of the logic from related strategies but we would still be stuck having to update `Post` whenever we add a strategy for a new format. Instead, this implements `POST` support by borrowing ideas from the `using: :post` and `data` `url` options found in formulae. This uses a `post_form` option to handle form data and `post_json` to handle JSON data, encoding the hash argument for each into the appropriate format. The presence of either option means that curl will use a `POST` request. With this approach, we can make a `POST` request using any strategy that calls `Strategy::page_headers` or `::page_content` (directly or indirectly) and everything else works the same as usual. The only change needed in related strategies was to pass the options through to the `Strategy` methods. For example, if we need to parse a JSON response from a `POST` request, we add a `post_data` or `post_json` hash to the `livecheck` block `url` and use `strategy :json` with a `strategy` block. This leans on existing patterns that we're already familiar with and shouldn't require any notable maintenance burden when adding new strategies, so it seems like a better approach than a `Post` strategy.
2025-02-04 10:30:16 -05:00
match_data.merge!(
Strategy.page_content(
url,
url_options: unused.fetch(:url_options, {}),
homebrew_curl:,
),
)
match_data[:content]
end
return match_data if content.blank?
versions_from_content(content, regex, &block).each do |match_text|
match_data[:matches][match_text] = Version.new(match_text)
end
match_data
end
end
end
end
end