require 'csv' require 'etc' require 'open3' require 'rubygems' require 'rubygems/package' require 'tmpdir' require 'licensee' # Licences.csv are from validate_ruby_files.rb and it contains what was possible to gather from upstream gemfiles and Fedora specfiles. # This file exists to validate, that the output from gem2rpm for Fedora is the same. And possibly also validate Licensefiles, cos # the goddamn MIT and BSD have around 20 possibilities from callaway to spdx... csv = CSV.parse(File.read('licences.csv'), col_sep: ';', headers: true) class ThreadWorker MAX_THREADS = Etc.nprocessors # Expecting an array of items and a block def initialize(items) raise ArgumentError, 'Worker queue received no work...' unless items raise ArgumentError, 'Provide block to execute in threads with items.' unless block_given? @items = items.to_a # If we more threads than items, then we can't slice it slices = if @items.count <= MAX_THREADS [@items] else # + 1 to slice size should prevent spawning more threads # than we have HW threads (Yes, MRI does not have hw:sw # mapping, but they spawn subprocesses that do execute on HW # thread) @items.each_slice((@items.count / MAX_THREADS) + 1) end @worker_pool = [] slices.each do |slice| @worker_pool << Thread.new do yield slice end end end def gather_pool @worker_pool.map(&:value).flatten(1) end class << self def execute(command, pwd: nil) options = {} options[:chdir] = pwd if pwd puts "Executing: #{command}" stdout, stderr, status = Open3.capture3(command, options) raise "Failed command, stderr: #{stderr}, stdout: #{stdout}, cmd: #{command}" unless status.success? stdout end end end # Fetch the Fedora sources ThreadWorker.new(csv.to_a[1..]) do |slice| slice.map do |row| name = "rubygem-#{row[0]}" next if Dir.exist?(name) ThreadWorker.execute("fedpkg clone -a #{name}") end end.gather_pool # Sigh... they have tar as rubygem source... EXCLUDED_SOURCES = %w[rubygem-morph-cli rubygem-krb5-auth rubygem-asciidoctor rubygem-rgen rubygem-net-irc].freeze # Fetch the gem from lookaside cache ThreadWorker.new(csv.to_a[1..]) do |slice| slice.map do |row| name = "rubygem-#{row[0]}" dir = Dir["#{name}/*.gem"] next unless dir.empty? next if EXCLUDED_SOURCES.include? name raise "#{dir} ; too much stuff" if dir.size > 1 puts "sources for #{name}" ThreadWorker.execute("fedpkg sources", pwd: name) end end.gather_pool res = if File.exist?("gem2rpm.cache") File.read("gem2rpm.cache").split("\n") else # Create CSV from the gems, so that we have smth to compare. gems = Dir['*/*.gem'] out = ThreadWorker.new(gems) do |slice| slice.map do |gem_file| # template: # <%# gem_name;gem_version;fedora_license;license_file %> # <%= spec.name %>;<%= spec.version %>;<%= spec.licenses.join(" and ") %>;<%= main_files.filter do |item| item.license? end.join(" ")%> ThreadWorker.execute("gem2rpm --template ./template.erb #{gem_file} --local") end end.gather_pool.map(&:lstrip) File.write("gem2rpm.cache", out.join('')) out.map(&:rstrip) end res = res.map { |str| str.split(';') } def licensee_mit(gem_path, license_file) # Match the MIT license against this text... let's see if it even helps curr_dir = Dir.pwd license = nil content = nil Dir.mktmpdir do |destination| the_gem = Gem::Package.new(File.join(curr_dir, gem_path)) the_gem.contents # get the files in the gem the_gem.extract_files destination # extract the gem into a directory content = File.read(File.join(destination, license_file)) license = Licensee.license(File.join(destination,license_file)) # In case of an exception, it is needed to debug what went wrong (nonexistant dir, nonexistant file even despite guards...) rescue => e require 'irb'; binding.irb end # raise "\n"+license unless license.gsub(/[[:space:]]/, '') =~ regex # rescue return "TRUE valid MIT" if license && license.spdx_id == "MIT" "FALSE, inspection required" end def licensee_general(gem_path, license_file) curr_dir = Dir.pwd license = nil content = nil Dir.mktmpdir do |destination| the_gem = Gem::Package.new(File.join(curr_dir, gem_path)) the_gem.contents # get the files in the gem the_gem.extract_files destination # extract the gem into a directory if license_file content = File.read(File.join(destination, license_file)) license = Licensee.license(File.join(destination,license_file)) else license = Licensee.license(destination) end # In case of an exception, it is needed to debug what went wrong (nonexistant dir, nonexistant file even despite guards...) rescue => e require 'irb'; binding.irb end if license && license.spdx_id != "other" "The license might be #{license.spdx_id}" else "Inspection required" end end ret = res.map do |arr| gem2rpm_name = arr[0] gem2rpm_ver = arr[1] gem2rpm_license = arr[2] gem2rpm_license_file = arr[3] fedora_gem = csv.find { |row| row["gem_name"] == gem2rpm_name } fedora_name = fedora_gem["gem_name"] fedora_license = fedora_gem["fedora_license"] fedora_gem_license = fedora_gem["gem_license"] raise "The names of gems differ. Leading me to this is gem2rpm: #{gem2rpm_name} fedora: #{fedora_name}" if gem2rpm_name != fedora_name str = '' if fedora_license == fedora_gem_license && fedora_license == gem2rpm_license str = "fedora matches gem2rpm" # validate_mit_text(gem_path, license_file_name) if fedora_license =~ /MIT/ && gem2rpm_license_file && !gem2rpm_license_file.empty? str += ";" + licensee_mit(Dir["rubygem-#{fedora_name}/#{fedora_name}*.gem"].sort.first, gem2rpm_license_file) elsif fedora_license =~ /MIT/ str += ";" + "inspection required, MIT without licensefile detected" elsif fedora_license == "BSD-2-Clause" || fedora_license == "BSD-3-Clause" || fedora_license == "Apache-2.0" str += ";" + "Valid SPDX ID, no intervention required." else begin # ThreadWorker.execute("license-validate #{fedora_license}") str += ';' + licensee_general(Dir["rubygem-#{fedora_name}/#{fedora_name}*.gem"].sort.first, gem2rpm_license_file) rescue RuntimeError => e puts e.message end end str else str = "Fedora does not match gem2rpm" + ";" + "Inspection needed" end str + ";" + fedora_name + ";" + fedora_license.to_s + ";" + gem2rpm_license.to_s end puts ret.sort