utils/gzip: introduce new module

This commit is contained in:
Caleb Xu 2023-01-06 22:58:22 -05:00
parent 974984d1f5
commit c75adc8238
No known key found for this signature in database
GPG Key ID: 47E6040D07B8407D
3 changed files with 175 additions and 0 deletions

View File

@ -0,0 +1,95 @@
# typed: false
# frozen_string_literal: true
require "utils/gzip"
describe Utils::Gzip do
describe "compress_with_options" do
it "uses the explicitly specified mtime, orig_name, and output path when passed" do
mktmpdir do |path|
mtime = Time.at(12345).utc
orig_name = "someotherfile"
output = path/"subdir/anotherfile.gz"
file_content = "Hello world"
expected_checksum = "df509051b519faa8a1143157d2750d1694dc5fe6373e493c0d5c360be3e61516"
somefile = path/"somefile"
File.write(somefile, file_content)
mkdir path/"subdir"
expect(described_class.compress_with_options(somefile, mtime: mtime, orig_name: orig_name,
output: output)).to eq(output)
expect(Digest::SHA256.hexdigest(File.read(output))).to eq(expected_checksum)
end
end
it "uses SOURCE_DATE_EPOCH as mtime when not explicitly specified" do
mktmpdir do |path|
ENV["SOURCE_DATE_EPOCH"] = "23456"
file_content = "Hello world"
expected_checksum = "a579be88ec8073391a5753b1df4d87fbf008aaec6b5a03f8f16412e2e01f119a"
somefile = path/"somefile"
File.write(somefile, file_content)
expect(described_class.compress_with_options(somefile).to_s).to eq("#{somefile}.gz")
expect(Digest::SHA256.hexdigest(File.read("#{somefile}.gz"))).to eq(expected_checksum)
end
end
end
describe "compress" do
it "creates non-reproducible gz files from input files" do
mktmpdir do |path|
files = (0..2).map { |n| path/"somefile#{n}" }
FileUtils.touch files
results = described_class.compress(*files, reproducible: false)
3.times do |n|
expect(results[n].to_s).to eq("#{files[n]}.gz")
expect(Pathname.new("#{files[n]}.gz")).to exist
end
end
end
it "creates reproducible gz files from input files with explicit mtime" do
mtime = Time.at(12345).utc
expected_checksums = %w[
5b45cabc7f0192854365aeccd82036e482e35131ba39fbbc6d0684266eb2e88a
d422bf4cbede17ae242135d7f32ba5379fbffb288c29cd38b7e5e1a5f89073f8
1d93a3808e2bd5d8c6371ea1c9b8b538774d6486af260719400fc3a5b7ac8d6f
]
mktmpdir do |path|
files = (0..2).map { |n| path/"somefile#{n}" }
files.each { |f| File.write(f, "Hello world") }
results = described_class.compress(*files, mtime: mtime)
3.times do |n|
expect(results[n].to_s).to eq("#{files[n]}.gz")
expect(Digest::SHA256.hexdigest(File.read(results[n]))).to eq(expected_checksums[n])
end
end
end
it "creates reproducible gz files from input files with SOURCE_DATE_EPOCH as mtime" do
ENV["SOURCE_DATE_EPOCH"] = "23456"
expected_checksums = %w[
d5e0cc3259b1eb61d93ee5a30d41aef4a382c1cf2b759719c289f625e27b915c
068657725bca5f9c2bc62bc6bf679eb63786e92d16cae575dee2fd9787a338f3
e566e9fdaf9aa2a7c9501f9845fed1b70669bfa679b0de609e3b63f99988784d
]
mktmpdir do |path|
files = (0..2).map { |n| path/"somefile#{n}" }
files.each { |f| File.write(f, "Hello world") }
results = described_class.compress(*files)
3.times do |n|
expect(results[n].to_s).to eq("#{files[n]}.gz")
expect(Digest::SHA256.hexdigest(File.read(results[n]))).to eq(expected_checksums[n])
end
end
end
end
end

View File

@ -0,0 +1,73 @@
# typed: true
# frozen_string_literal: true
# Apple's gzip also uses zlib so use the same buffer size here.
# https://github.com/apple-oss-distributions/file_cmds/blob/file_cmds-400/gzip/gzip.c#L147
GZIP_BUFFER_SIZE = 64 * 1024
module Utils
# Helper functions for creating gzip files.
#
# @api private
module Gzip
extend T::Sig
module_function
sig {
params(
path: T.any(String, Pathname),
mtime: T.any(Integer, Time),
orig_name: String,
output: T.any(String, Pathname),
).returns(Pathname)
}
def compress_with_options(path, mtime: ENV["SOURCE_DATE_EPOCH"].to_i, orig_name: File.basename(path),
output: "#{path}.gz")
# Ideally, we would just set mtime = 0 if SOURCE_DATE_EPOCH is absent, but Ruby's
# Zlib::GzipWriter does not properly handle the case of setting mtime = 0:
# https://bugs.ruby-lang.org/issues/16285
#
# This was fixed in https://github.com/ruby/zlib/pull/10. Set mtime to 0 instead
# of raising exception once we are using zlib gem version 1.1.0 or newer.
if mtime.to_i.zero?
raise ArgumentError,
"Can't create reproducible gzip file without a valid mtime"
end
File.open(path, "rb") do |fp|
odebug "Creating gzip file at #{output}"
gz = Zlib::GzipWriter.open(output)
gz.mtime = mtime
gz.orig_name = orig_name
gz.write(fp.read(GZIP_BUFFER_SIZE)) until fp.eof?
ensure
# GzipWriter should be closed in case of error as well
gz.close
end
FileUtils.rm_f path
Pathname.new(output)
end
sig {
params(
paths: T.any(String, Pathname),
reproducible: T::Boolean,
mtime: T.any(Integer, Time),
).returns(T::Array[Pathname])
}
def compress(*paths, reproducible: true, mtime: ENV["SOURCE_DATE_EPOCH"].to_i)
if reproducible
paths.map do |path|
compress_with_options(path, mtime: mtime)
end
else
paths.map do |path|
safe_system "gzip", path
Pathname.new("#{path}.gz")
end
end
end
end
end

View File

@ -0,0 +1,7 @@
# typed: strict
module Utils
module Gzip
include Kernel
end
end