dev-cmd/contributions: usability/performance improvements.

- more sensible/performant defaults: default to primary repositories
  only for the last year rather than all repositories forever
- allow specifying more than one user at a time
- output the breakdown of contributions without needing `--csv`
- add a space before the `--csv` output
- consolidate some code
- avoid counting authored commits twice, to improve performance
- retry failed GitHub API calls (this happens often when querying all
  maintainers)
- stop counting after we find 1000 commits for a given user to avoid
  excessive API queries/pagination
This commit is contained in:
Mike McQuaid 2023-08-30 15:08:50 +01:00
parent 851df262a0
commit d357607b2c
No known key found for this signature in database
GPG Key ID: 3338A31AFDB1D829
8 changed files with 121 additions and 110 deletions

View File

@ -13,28 +13,32 @@ module Homebrew
OFFICIAL_CMD_TAPS.keys.map { |t| t.delete_prefix("homebrew/") },
OFFICIAL_CASK_TAPS.reject { |t| t == "cask" },
].flatten.freeze
MAX_REPO_COMMITS = 1000
sig { returns(CLI::Parser) }
def contributions_args
Homebrew::CLI::Parser.new do
usage_banner "`contributions` [--user=<email|username>] [<--repositories>`=`] [<--csv>]"
description <<~EOS
Contributions to Homebrew repos.
Contributions to Homebrew repositories.
EOS
comma_array "--repositories",
description: "Specify a comma-separated (no spaces) list of repositories to search. " \
description: "Specify a comma-separated list of repositories to search. " \
"Supported repositories: #{SUPPORTED_REPOS.map { |t| "`#{t}`" }.to_sentence}. " \
"Omitting this flag, or specifying `--repositories=all`, searches all repositories. " \
"Use `--repositories=primary` to search only the main repositories: brew,core,cask."
"Omitting this flag, or specifying `--repositories=primary`, searches only the " \
"main repositories: brew,core,cask. " \
"Specifying `--repositories=all`, searches all repositories. "
flag "--from=",
description: "Date (ISO-8601 format) to start searching contributions."
description: "Date (ISO-8601 format) to start searching contributions. " \
"Omitting this flag searches the last year."
flag "--to=",
description: "Date (ISO-8601 format) to stop searching contributions."
flag "--user=",
description: "A GitHub username or email address of a specific person to find contribution data for."
comma_array "--user=",
description: "Specify a comma-separated list of GitHub usernames or email addresses to find " \
"contributions from. Omitting this flag searches maintainers."
switch "--csv",
description: "Print a CSV of contributions across repositories over the time period."
@ -48,41 +52,47 @@ module Homebrew
results = {}
grand_totals = {}
all_repos = args.repositories.nil? || args.repositories.include?("all")
repos = if all_repos
SUPPORTED_REPOS
elsif args.repositories.include?("primary")
repos = if args.repositories.blank? || args.repositories.include?("primary")
PRIMARY_REPOS
elsif args.repositories.include?("all")
SUPPORTED_REPOS
else
args.repositories
end
if args.user
user = args.user
results[user] = scan_repositories(repos, user, args)
grand_totals[user] = total(results[user])
from = args.from.presence || Date.today.prev_year.iso8601
puts "#{user} contributed #{Utils.pluralize("time", grand_totals[user].values.sum,
include_count: true)} #{time_period(args)}."
puts generate_csv(T.must(user), results[user], grand_totals[user]) if args.csv?
return
end
contribution_types = [:author, :committer, :coauthorship, :review]
maintainers = GitHub.members_by_team("Homebrew", "maintainers")
maintainers.each do |username, _|
users = args.user.presence || GitHub.members_by_team("Homebrew", "maintainers")
users.each do |username, _|
# TODO: Using the GitHub username to scan the `git log` undercounts some
# contributions as people might not always have configured their Git
# committer details to match the ones on GitHub.
# TODO: Switch to using the GitHub APIs instead of `git log` if
# they ever support trailers.
results[username] = scan_repositories(repos, username, args)
results[username] = scan_repositories(repos, username, args, from: from)
grand_totals[username] = total(results[username])
puts "#{username} contributed #{Utils.pluralize("time", grand_totals[username].values.sum,
include_count: true)} #{time_period(args)}."
contributions = contribution_types.map do |type|
type_count = grand_totals[username][type]
next if type_count.to_i.zero?
"#{Utils.pluralize("time", type_count, include_count: true)} (#{type})"
end.compact
contributions << "#{Utils.pluralize("time", grand_totals[username].values.sum, include_count: true)} (total)"
puts [
"#{username} contributed",
*contributions.to_sentence,
"#{time_period(from: from, to: args.to)}.",
].join(" ")
end
puts generate_maintainers_csv(grand_totals) if args.csv?
return unless args.csv?
puts
puts generate_csv(grand_totals)
end
sig { params(repo: String).returns(Pathname) }
@ -92,23 +102,23 @@ module Homebrew
Tap.fetch("homebrew", repo).path
end
sig { params(args: Homebrew::CLI::Args).returns(String) }
def time_period(args)
if args.from && args.to
"between #{args.from} and #{args.to}"
elsif args.from
"after #{args.from}"
elsif args.to
"before #{args.to}"
sig { params(from: T.nilable(String), to: T.nilable(String)).returns(String) }
def time_period(from:, to:)
if from && to
"between #{from} and #{to}"
elsif from
"after #{from}"
elsif to
"before #{to}"
else
"in all time"
end
end
sig { params(totals: Hash).returns(String) }
def generate_maintainers_csv(totals)
def generate_csv(totals)
CSV.generate do |csv|
csv << %w[user repo author committer coauthorships reviews total]
csv << %w[user repo author committer coauthorship review total]
totals.sort_by { |_, v| -v.values.sum }.each do |user, total|
csv << grand_total_row(user, total)
@ -116,25 +126,6 @@ module Homebrew
end
end
sig { params(user: String, results: Hash, grand_total: Hash).returns(String) }
def generate_csv(user, results, grand_total)
CSV.generate do |csv|
csv << %w[user repo author committer coauthorships reviews total]
results.each do |repo, counts|
csv << [
user,
repo,
counts[:author],
counts[:committer],
counts[:coauthorships],
counts[:reviews],
counts.values.sum,
]
end
csv << grand_total_row(user, grand_total)
end
end
sig { params(user: String, grand_total: Hash).returns(Array) }
def grand_total_row(user, grand_total)
[
@ -142,13 +133,13 @@ module Homebrew
"all",
grand_total[:author],
grand_total[:committer],
grand_total[:coauthorships],
grand_total[:reviews],
grand_total[:coauthorship],
grand_total[:review],
grand_total.values.sum,
]
end
def scan_repositories(repos, person, args)
def scan_repositories(repos, person, args, from:)
data = {}
repos.each do |repo|
@ -171,11 +162,13 @@ module Homebrew
puts "Determining contributions for #{person} on #{repo_full_name}..." if args.verbose?
author_commits, committer_commits = GitHub.count_repo_commits(repo_full_name, person, args,
max: MAX_REPO_COMMITS)
data[repo] = {
author: GitHub.count_repo_commits(repo_full_name, person, "author", args),
committer: GitHub.count_repo_commits(repo_full_name, person, "committer", args),
coauthorships: git_log_trailers_cmd(T.must(repo_path), person, "Co-authored-by", args),
reviews: count_reviews(repo_full_name, person, args),
author: author_commits,
committer: committer_commits,
coauthorship: git_log_trailers_cmd(T.must(repo_path), person, "Co-authored-by", from: from, to: args.to),
review: count_reviews(repo_full_name, person, args),
}
end
@ -184,7 +177,7 @@ module Homebrew
sig { params(results: Hash).returns(Hash) }
def total(results)
totals = { author: 0, committer: 0, coauthorships: 0, reviews: 0 }
totals = { author: 0, committer: 0, coauthorship: 0, review: 0 }
results.each_value do |counts|
counts.each do |kind, count|
@ -195,12 +188,15 @@ module Homebrew
totals
end
sig { params(repo_path: Pathname, person: String, trailer: String, args: Homebrew::CLI::Args).returns(Integer) }
def git_log_trailers_cmd(repo_path, person, trailer, args)
sig {
params(repo_path: Pathname, person: String, trailer: String, from: T.nilable(String),
to: T.nilable(String)).returns(Integer)
}
def git_log_trailers_cmd(repo_path, person, trailer, from:, to:)
cmd = ["git", "-C", repo_path, "log", "--oneline"]
cmd << "--format='%(trailers:key=#{trailer}:)'"
cmd << "--before=#{args.to}" if args.to
cmd << "--after=#{args.from}" if args.from
cmd << "--before=#{to}" if to
cmd << "--after=#{from}" if from
Utils.safe_popen_read(*cmd).lines.count { |l| l.include?(person) }
end

View File

@ -90,49 +90,48 @@ describe GitHub do
it "counts commits authored by a user" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "author", {}).and_return(five_shas)
.with("homebrew/cask", "user1", "author", {}, nil).and_return(five_shas)
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "committer", {}, nil).and_return([])
expect(described_class.count_repo_commits("homebrew/cask", "user1", "author", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/cask", "user1", {})).to eq([5, 0])
end
it "counts commits committed by a user" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/core", "user1", "author", {}).and_return([])
.with("homebrew/core", "user1", "author", {}, nil).and_return([])
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/core", "user1", "committer", {}).and_return(five_shas)
.with("homebrew/core", "user1", "committer", {}, nil).and_return(five_shas)
expect(described_class.count_repo_commits("homebrew/core", "user1", "committer", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/core", "user1", {})).to eq([0, 5])
end
it "calculates correctly when authored > committed with different shas" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "author", {}).and_return(ten_shas)
.with("homebrew/cask", "user1", "author", {}, nil).and_return(ten_shas)
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "committer", {}).and_return(%w[1 2 3 4 5])
.with("homebrew/cask", "user1", "committer", {}, nil).and_return(%w[1 2 3 4 5])
expect(described_class.count_repo_commits("homebrew/cask", "user1", "author", {})).to eq(10)
expect(described_class.count_repo_commits("homebrew/cask", "user1", "committer", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/cask", "user1", {})).to eq([10, 5])
end
it "calculates correctly when committed > authored" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "author", {}).and_return(five_shas)
.with("homebrew/cask", "user1", "author", {}, nil).and_return(five_shas)
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/cask", "user1", "committer", {}).and_return(ten_shas)
.with("homebrew/cask", "user1", "committer", {}, nil).and_return(ten_shas)
expect(described_class.count_repo_commits("homebrew/cask", "user1", "author", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/cask", "user1", "committer", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/cask", "user1", {})).to eq([5, 5])
end
it "deduplicates commits authored and committed by the same user" do
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/core", "user1", "author", {}).and_return(five_shas)
.with("homebrew/core", "user1", "author", {}, nil).and_return(five_shas)
allow(described_class).to receive(:repo_commits_for_user)
.with("homebrew/core", "user1", "committer", {}).and_return(five_shas)
.with("homebrew/core", "user1", "committer", {}, nil).and_return(five_shas)
# Because user1 authored and committed the same 5 commits.
expect(described_class.count_repo_commits("homebrew/core", "user1", "author", {})).to eq(5)
expect(described_class.count_repo_commits("homebrew/core", "user1", "committer", {})).to eq(0)
expect(described_class.count_repo_commits("homebrew/core", "user1", {})).to eq([5, 0])
end
end
end

View File

@ -758,7 +758,7 @@ module GitHub
output[/^Status: (200)/, 1] != "200"
end
def self.repo_commits_for_user(nwo, user, filter, args)
def self.repo_commits_for_user(nwo, user, filter, args, max)
return if Homebrew::EnvConfig.no_github_api?
params = ["#{filter}=#{user}"]
@ -768,20 +768,25 @@ module GitHub
commits = []
API.paginate_rest("#{API_URL}/repos/#{nwo}/commits", additional_query_params: params.join("&")) do |result|
commits.concat(result.map { |c| c["sha"] })
if max.present? && commits.length >= max
opoo "#{user} exceeded #{max} #{nwo} commits as #{filter}, stopped counting!"
break
end
end
commits
end
def self.count_repo_commits(nwo, user, filter, args)
return if Homebrew::EnvConfig.no_github_api?
def self.count_repo_commits(nwo, user, args, max: nil)
odie "Cannot count commits, HOMEBREW_NO_GITHUB_API set!" if Homebrew::EnvConfig.no_github_api?
author_shas = repo_commits_for_user(nwo, user, "author", args)
return author_shas.count if filter == "author"
committer_shas = repo_commits_for_user(nwo, user, "committer", args)
return 0 if committer_shas.empty?
author_shas = repo_commits_for_user(nwo, user, "author", args, max)
committer_shas = repo_commits_for_user(nwo, user, "committer", args, max)
return [0, 0] if author_shas.blank? && committer_shas.blank?
author_count = author_shas.count
# Only count commits where the author and committer are different.
committer_shas.difference(author_shas).count
committer_count = committer_shas.difference(author_shas).count
[author_count, committer_count]
end
end

View File

@ -20,6 +20,7 @@ module GitHub
API_URL = "https://api.github.com"
API_MAX_PAGES = 50
API_MAX_ITEMS = 5000
PAGINATE_RETRY_COUNT = 3
CREATE_GIST_SCOPES = ["gist"].freeze
CREATE_ISSUE_FORK_OR_PR_SCOPES = ["repo"].freeze
@ -281,7 +282,17 @@ module GitHub
def self.paginate_rest(url, additional_query_params: nil, per_page: 100)
(1..API_MAX_PAGES).each do |page|
result = API.open_rest("#{url}?per_page=#{per_page}&page=#{page}&#{additional_query_params}")
retry_count = 1
result = begin
API.open_rest("#{url}?per_page=#{per_page}&page=#{page}&#{additional_query_params}")
rescue Error
if retry_count < PAGINATE_RETRY_COUNT
retry_count += 1
retry
end
raise
end
break if result.blank?
yield(result, page)

View File

@ -528,15 +528,15 @@ __fish_brew_complete_arg 'config' -l quiet -d 'Make some output more quiet'
__fish_brew_complete_arg 'config' -l verbose -d 'Make some output more verbose'
__fish_brew_complete_cmd 'contributions' 'Contributions to Homebrew repos'
__fish_brew_complete_cmd 'contributions' 'Contributions to Homebrew repositories'
__fish_brew_complete_arg 'contributions' -l csv -d 'Print a CSV of contributions across repositories over the time period'
__fish_brew_complete_arg 'contributions' -l debug -d 'Display any debugging information'
__fish_brew_complete_arg 'contributions' -l from -d 'Date (ISO-8601 format) to start searching contributions'
__fish_brew_complete_arg 'contributions' -l from -d 'Date (ISO-8601 format) to start searching contributions. Omitting this flag searches the last year'
__fish_brew_complete_arg 'contributions' -l help -d 'Show this message'
__fish_brew_complete_arg 'contributions' -l quiet -d 'Make some output more quiet'
__fish_brew_complete_arg 'contributions' -l repositories -d 'Specify a comma-separated (no spaces) list of repositories to search. Supported repositories: `brew`, `core`, `cask`, `aliases`, `autoupdate`, `bundle`, `command-not-found`, `test-bot`, `services`, `cask-fonts` and `cask-versions`. Omitting this flag, or specifying `--repositories=all`, searches all repositories. Use `--repositories=primary` to search only the main repositories: brew,core,cask'
__fish_brew_complete_arg 'contributions' -l repositories -d 'Specify a comma-separated list of repositories to search. Supported repositories: `brew`, `core`, `cask`, `aliases`, `autoupdate`, `bundle`, `command-not-found`, `test-bot`, `services`, `cask-fonts` and `cask-versions`. Omitting this flag, or specifying `--repositories=primary`, searches only the main repositories: brew,core,cask. Specifying `--repositories=all`, searches all repositories. '
__fish_brew_complete_arg 'contributions' -l to -d 'Date (ISO-8601 format) to stop searching contributions'
__fish_brew_complete_arg 'contributions' -l user -d 'A GitHub username or email address of a specific person to find contribution data for'
__fish_brew_complete_arg 'contributions' -l user -d 'Specify a comma-separated list of GitHub usernames or email addresses to find contributions from. Omitting this flag searches maintainers'
__fish_brew_complete_arg 'contributions' -l verbose -d 'Make some output more verbose'

View File

@ -155,7 +155,7 @@ __brew_internal_commands() {
'commands:Show lists of built-in and external commands'
'completions:Control whether Homebrew automatically links external tap shell completion files'
'config:Show Homebrew and system configuration info useful for debugging'
'contributions:Contributions to Homebrew repos'
'contributions:Contributions to Homebrew repositories'
'create:Generate a formula or, with `--cask`, a cask for the downloadable file at URL and open it in the editor'
'deps:Show dependencies for formula'
'desc:Display formula'\''s name and one-line description'
@ -677,12 +677,12 @@ _brew_contributions() {
_arguments \
'--csv[Print a CSV of contributions across repositories over the time period]' \
'--debug[Display any debugging information]' \
'--from[Date (ISO-8601 format) to start searching contributions]' \
'--from[Date (ISO-8601 format) to start searching contributions. Omitting this flag searches the last year]' \
'--help[Show this message]' \
'--quiet[Make some output more quiet]' \
'--repositories[Specify a comma-separated (no spaces) list of repositories to search. Supported repositories: `brew`, `core`, `cask`, `aliases`, `autoupdate`, `bundle`, `command-not-found`, `test-bot`, `services`, `cask-fonts` and `cask-versions`. Omitting this flag, or specifying `--repositories=all`, searches all repositories. Use `--repositories=primary` to search only the main repositories: brew,core,cask]' \
'--repositories[Specify a comma-separated list of repositories to search. Supported repositories: `brew`, `core`, `cask`, `aliases`, `autoupdate`, `bundle`, `command-not-found`, `test-bot`, `services`, `cask-fonts` and `cask-versions`. Omitting this flag, or specifying `--repositories=primary`, searches only the main repositories: brew,core,cask. Specifying `--repositories=all`, searches all repositories. ]' \
'--to[Date (ISO-8601 format) to stop searching contributions]' \
'--user[A GitHub username or email address of a specific person to find contribution data for]' \
'--user[Specify a comma-separated list of GitHub usernames or email addresses to find contributions from. Omitting this flag searches maintainers]' \
'--verbose[Make some output more verbose]'
}

View File

@ -1170,16 +1170,16 @@ Display the path to the file being used when invoking `brew` *`cmd`*.
### `contributions` [--user=*`email|username`*] [*`--repositories`*`=`] [*`--csv`*]
Contributions to Homebrew repos.
Contributions to Homebrew repositories.
* `--repositories`:
Specify a comma-separated (no spaces) list of repositories to search. Supported repositories: `brew`, `core`, `cask`, `aliases`, `autoupdate`, `bundle`, `command-not-found`, `test-bot`, `services`, `cask-fonts` and `cask-versions`. Omitting this flag, or specifying `--repositories=all`, searches all repositories. Use `--repositories=primary` to search only the main repositories: brew,core,cask.
Specify a comma-separated list of repositories to search. Supported repositories: `brew`, `core`, `cask`, `aliases`, `autoupdate`, `bundle`, `command-not-found`, `test-bot`, `services`, `cask-fonts` and `cask-versions`. Omitting this flag, or specifying `--repositories=primary`, searches only the main repositories: brew,core,cask. Specifying `--repositories=all`, searches all repositories.
* `--from`:
Date (ISO-8601 format) to start searching contributions.
Date (ISO-8601 format) to start searching contributions. Omitting this flag searches the last year.
* `--to`:
Date (ISO-8601 format) to stop searching contributions.
* `--user`:
A GitHub username or email address of a specific person to find contribution data for.
Specify a comma-separated list of GitHub usernames or email addresses to find contributions from. Omitting this flag searches maintainers.
* `--csv`:
Print a CSV of contributions across repositories over the time period.

View File

@ -1686,15 +1686,15 @@ Treat all named arguments as casks\.
Display the path to the file being used when invoking \fBbrew\fR \fIcmd\fR\.
.
.SS "\fBcontributions\fR [\-\-user=\fIemail|username\fR] [\fI\-\-repositories\fR\fB=\fR] [\fI\-\-csv\fR]"
Contributions to Homebrew repos\.
Contributions to Homebrew repositories\.
.
.TP
\fB\-\-repositories\fR
Specify a comma\-separated (no spaces) list of repositories to search\. Supported repositories: \fBbrew\fR, \fBcore\fR, \fBcask\fR, \fBaliases\fR, \fBautoupdate\fR, \fBbundle\fR, \fBcommand\-not\-found\fR, \fBtest\-bot\fR, \fBservices\fR, \fBcask\-fonts\fR and \fBcask\-versions\fR\. Omitting this flag, or specifying \fB\-\-repositories=all\fR, searches all repositories\. Use \fB\-\-repositories=primary\fR to search only the main repositories: brew,core,cask\.
Specify a comma\-separated list of repositories to search\. Supported repositories: \fBbrew\fR, \fBcore\fR, \fBcask\fR, \fBaliases\fR, \fBautoupdate\fR, \fBbundle\fR, \fBcommand\-not\-found\fR, \fBtest\-bot\fR, \fBservices\fR, \fBcask\-fonts\fR and \fBcask\-versions\fR\. Omitting this flag, or specifying \fB\-\-repositories=primary\fR, searches only the main repositories: brew,core,cask\. Specifying \fB\-\-repositories=all\fR, searches all repositories\.
.
.TP
\fB\-\-from\fR
Date (ISO\-8601 format) to start searching contributions\.
Date (ISO\-8601 format) to start searching contributions\. Omitting this flag searches the last year\.
.
.TP
\fB\-\-to\fR
@ -1702,7 +1702,7 @@ Date (ISO\-8601 format) to stop searching contributions\.
.
.TP
\fB\-\-user\fR
A GitHub username or email address of a specific person to find contribution data for\.
Specify a comma\-separated list of GitHub usernames or email addresses to find contributions from\. Omitting this flag searches maintainers\.
.
.TP
\fB\-\-csv\fR