diff --git a/app/routines/exercises/tag/spreadsheet.rb b/app/routines/exercises/tag/spreadsheet.rb index be110803..cf8ef72b 100644 --- a/app/routines/exercises/tag/spreadsheet.rb +++ b/app/routines/exercises/tag/spreadsheet.rb @@ -1,6 +1,6 @@ # Tags Exercises based on a spreadsheet # Row format: -# - Exercise UID +# - Exercise ID or Nickname # - Tags... module Exercises module Tag @@ -11,27 +11,36 @@ class Spreadsheet include RowParser include ::Exercises::Tagger - def exec(filename:, skip_first_row: true) + def exec(filename:) Rails.logger.info { "Filename: #{filename}" } - row_offset = skip_first_row ? 1 : 0 + initialized = false + + query_field = :number record_failures do |failures| - ProcessSpreadsheet.call(filename: filename, offset: row_offset) do |row, row_index| + ProcessSpreadsheet.call(filename: filename, headers: :downcase) do |headers, row, row_index| + unless initialized + query_field = :nickname if headers[0].include? 'nickname' + initialized = true + end + values = row.compact next if values.size < 2 - exercise_numbers = values.first.split(',').map(&:to_i) + exercise_numbers_or_nicknames = values.first.split(',') exercises = Exercise.joins(publication: :publication_group) - .where(publication: {publication_group: {number: exercise_numbers}}) + .where(publication: { + publication_group: { query_field => exercise_numbers_or_nicknames } + }) .preload(:tags, publication: :publication_group) .latest - not_found_numbers = exercise_numbers - exercises.map(&:number) + not_found_numbers_or_nicknames = exercise_numbers_or_nicknames - exercises.map(&query_field) Rails.logger.warn do - "WARNING: Couldn't find any Exercises with numbers #{not_found_numbers.join(', ')}" - end unless not_found_numbers.empty? + "WARNING: Couldn't find any Exercises with #{query_field}(s) #{not_found_numbers_or_nicknames.join(', ')}" + end unless not_found_numbers_or_nicknames.empty? tags = values.slice(1..-1).flat_map { |value| value.split(',') } diff --git a/app/routines/process_spreadsheet.rb b/app/routines/process_spreadsheet.rb index 9c21071d..8b300e08 100644 --- a/app/routines/process_spreadsheet.rb +++ b/app/routines/process_spreadsheet.rb @@ -5,21 +5,23 @@ class ProcessSpreadsheet # "headers" can be either false to disable headers, # or a method name to normalize the headers, like :downcase - def exec(filename:, offset: 1, pad_cells: true, headers: false, &block) + def exec(filename:, offset: 1, pad_xlsx: true, headers: false, &block) raise ArgumentError, 'A block must be provided' if block.nil? if File.extname(filename) == '.csv' klass = Roo::CSV method = :each + options = {} else klass = Roo::Excelx method = :each_row_streaming + options = { pad_cells: pad_xlsx } end args = [] - pad_to_size = 0 if pad_cells - klass.new(filename).public_send(method, pad_cells: pad_cells).each_with_index do |row, row_index| - normalized_row = row.map { |cell| cell&.value&.to_s&.strip } + pad_to_size = 0 if pad_xlsx + klass.new(filename).public_send(method, **options).each_with_index do |row, row_index| + normalized_row = row.map { |cell| (cell.respond_to?(:value) ? cell.value : cell)&.to_s&.strip } if headers && row_index == 0 header_row = normalized_row @@ -27,7 +29,7 @@ def exec(filename:, offset: 1, pad_cells: true, headers: false, &block) header.send(headers) unless header.nil? end if [String, Symbol].include?(headers.class) args << header_row - elsif pad_cells + elsif pad_xlsx normalized_row += [nil] * (pad_to_size - row.length) if pad_to_size > row.length pad_to_size = row.length end diff --git a/lib/tasks/exercises/tag.rake b/lib/tasks/exercises/tag.rake index 5bca1f43..0fe79cbc 100644 --- a/lib/tasks/exercises/tag.rake +++ b/lib/tasks/exercises/tag.rake @@ -20,12 +20,11 @@ namespace :exercises do end # Tags exercises using a spreadsheet - # Arguments are, in order: - # filename, [skip_first_row] + # Argument is filename # Example: rake exercises:tag:spreadsheet[tags.xlsx] # will tag exercises based on tags.xlsx desc 'tags exercises using a spreadsheet' - task :spreadsheet, [:filename, :skip_first_row] => :environment do |t, args| + task :spreadsheet, [:filename] => :environment do |t, args| # Output import logging info to the console (except in the test environment) original_logger = Rails.logger @@ -83,38 +82,52 @@ namespace :exercises do Rails.logger.info { "Processing \"#{args[:filename]}\"" } output_filename = "#{book.slug}.csv" + + initialized = false + chapter_index = nil - exercise_id_index = nil + exercise_id_or_nickname_index = nil CSV.open(output_filename, 'w') do |csv| - csv << [ 'Exercise UID', 'Tags...' ] - ProcessSpreadsheet.call(filename: args[:filename], headers: :downcase) do |headers, row, index| - chapter_index ||= headers.index { |header| header&.include? 'chapter' } - page_index ||= headers.index { |header| header&.include?('page') || header&.include?('module') } - chapter_uuid_by_page_uuid ||= {} - if chapter_index.nil? - raise ArgumentError, 'Could not find Chapter, Page or Module column' if page_index.nil? - chapters.each do |chapter| - chapter.parts.each { |page| chapter_uuid_by_page_uuid[page.uuid] = chapter.uuid } + unless initialized + chapter_index ||= headers.index { |header| header&.include? 'chapter' } + page_index ||= headers.index { |header| header&.include?('page') || header&.include?('module') } + chapter_uuid_by_page_uuid ||= {} + if chapter_index.nil? + raise ArgumentError, 'Could not find Chapter, Page or Module column' if page_index.nil? + chapters.each do |chapter| + chapter.parts.each { |page| chapter_uuid_by_page_uuid[page.uuid] = chapter.uuid } + end + end + + exercise_id_or_nickname_index ||= headers.index do |header| + header&.include?('assessment') || header&.include?('exercise') + end + if exercise_id_or_nickname_index.nil? + exercise_id_or_nickname_index ||= headers.index { |header| header&.include?('nickname') } + + raise ArgumentError, 'Could not find "Assessment ID" or "Nickname" columns' \ + if exercise_id_or_nickname_index.nil? + + csv << [ 'Exercise Nickname', 'Tags...' ] + else + csv << [ 'Exercise ID', 'Tags...' ] end - end - exercise_id_index ||= headers.index do |header| - header&.include?('assessment') || header&.include?('exercise') + initialized = true end - raise ArgumentError, 'Could not find Assessment ID column' if exercise_id_index.nil? - if row[exercise_id_index].blank? + if row[exercise_id_or_nickname_index].blank? Rails.logger.info { "Skipped row #{index + 1} due to no Exercise ID" } next end chapter = chapter_index.nil? ? chapter_uuid_by_page_uuid[row[page_index]] : row[chapter_index] # The value in the Chapter column may be a UUID or a chapter number - chapter_uuid = chapter_uuids.include?(chapter) ? chapter : chapter_uuids[Integer(chapter) - 1] + chapter_uuid = chapter_uuids.include?(chapter) ? chapter : chapter_uuids[Float(chapter).to_i - 1] csv << [ - row[exercise_id_index], + row[exercise_id_or_nickname_index], "assessment:practice:https://openstax.org/orn/book:subbook/#{ args[:book_uuid]}:#{chapter_uuid}" ]