diff --git a/app/jobs/file_upload_job.rb b/app/jobs/file_upload_job.rb index 028ee74f..56660ce6 100644 --- a/app/jobs/file_upload_job.rb +++ b/app/jobs/file_upload_job.rb @@ -1,7 +1,7 @@ class FileUploadJob < ApplicationJob # Consider removing concurrency limits due to SolidQueue blocking issues # or use a more specific key to avoid blocking all jobs for a language - limits_concurrency to: 3, key: ->(_language_id, content_id, _content_type) { "hard-limit" } + limits_concurrency to: 3, key: ->(*args) { "hard-limit" } retry_on AzureFileShares::Errors::ApiError, wait: :exponentially_longer, attempts: 3 retry_on Timeout::Error, wait: :exponentially_longer, attempts: 2 @@ -12,34 +12,34 @@ class FileUploadJob < ApplicationJob Rails.logger.error "Suggestion: Check provider names for invalid characters if Azure API errors" end - def perform(language_id, content_id, content_type, share = ENV["AZURE_STORAGE_SHARE_NAME"]) + def perform(language_id, file_id, provider_id = nil, share = ENV["AZURE_STORAGE_SHARE_NAME"]) @language = Language.find(language_id) - @processor = LanguageContentProcessor.new(language) @share = share + @file_id = file_id.to_sym + @processor = LanguageContentProcessor.new(language) - send_provider_content(content_id) if content_type == "provider" - send_language_content(content_id.to_sym) if content_type == "file" + send_provider_content(provider_id) if provider_id.present? + send_language_content if provider_id.blank? end private - attr_reader :language, :processor, :share + attr_reader :language, :file_id, :share, :processor def send_provider_content(provider_id) provider = language.providers.find(provider_id) - return unless provider - - processor.provider_files.each do |file| - FileWorker.new( - share:, - name: file.name[provider], - path: file.path, - file: file.content[provider], - ).send - end + file = processor.provider_files[file_id] + return unless provider && file + + FileWorker.new( + share:, + name: file.name[provider], + path: file.path, + file: file.content[provider], + ).send end - def send_language_content(file_id) + def send_language_content file = processor.language_files[file_id] return unless file diff --git a/app/services/csv_generator/base.rb b/app/services/csv_generator/base.rb index 93d2373d..6f24fa21 100644 --- a/app/services/csv_generator/base.rb +++ b/app/services/csv_generator/base.rb @@ -1,4 +1,9 @@ class CsvGenerator::Base + def initialize(source, **args) + @source = source + @args = args + end + def perform CSV.generate(row_sep: "\n") do |csv| csv << headers @@ -7,4 +12,18 @@ def perform end end end + + private + + attr_reader :source, :args + + def topics_collection + return source.topics if provider? + + source.topics + end + + def language = language? ? source : args.fetch(:language) + def language? = source.is_a?(Language) + def provider? = source.is_a?(Provider) end diff --git a/app/services/csv_generator/files.rb b/app/services/csv_generator/files.rb index 5c524604..ee11606d 100644 --- a/app/services/csv_generator/files.rb +++ b/app/services/csv_generator/files.rb @@ -1,19 +1,12 @@ class CsvGenerator::Files < CsvGenerator::Base - def initialize(language, **args) - @language = language - @args = args - end - private - attr_reader :language, :args - def headers %w[FileID TopicID FileName FileType FileSize] end def scope - language.topics.active + topics_collection.active .flat_map do |topic| topic.documents.map do |doc| [ diff --git a/app/services/csv_generator/tag_details.rb b/app/services/csv_generator/tag_details.rb index 70d54696..17cdd9d3 100644 --- a/app/services/csv_generator/tag_details.rb +++ b/app/services/csv_generator/tag_details.rb @@ -1,19 +1,12 @@ class CsvGenerator::TagDetails < CsvGenerator::Base - def initialize(language, **args) - @language = language - @args = args - end - private - attr_reader :language, :args - def headers %w[TagID Tag] end def scope - language.topics.active.includes(:tags) + topics_collection.active.includes(:tags) .flat_map { |topic| topic.tags_on(language.code.to_sym) } .uniq .map do |tag| diff --git a/app/services/csv_generator/topic_authors.rb b/app/services/csv_generator/topic_authors.rb index bf85f007..58b4c919 100644 --- a/app/services/csv_generator/topic_authors.rb +++ b/app/services/csv_generator/topic_authors.rb @@ -1,18 +1,11 @@ class CsvGenerator::TopicAuthors < CsvGenerator::Base - def initialize(language, **args) - @language = language - @args = args - end - private - attr_reader :language, :args - def headers %w[TopicID AuthorID] end def scope - language.topics.active.map { |topic| [ topic.id, 0 ] } + topics_collection.active.map { |topic| [ topic.id, 0 ] } end end diff --git a/app/services/csv_generator/topic_tags.rb b/app/services/csv_generator/topic_tags.rb index 7b6768c0..872b135b 100644 --- a/app/services/csv_generator/topic_tags.rb +++ b/app/services/csv_generator/topic_tags.rb @@ -1,19 +1,12 @@ class CsvGenerator::TopicTags < CsvGenerator::Base - def initialize(language, **args) - @language = language - @args = args - end - private - attr_reader :language, :args - def headers %w[TopicID TagID] end def scope - language.topics.active.includes(:tags) + topics_collection.active.includes(:tags) .flat_map do |topic| topic.tags_on(language.code.to_sym).map do |tag| [ diff --git a/app/services/csv_generator/topics.rb b/app/services/csv_generator/topics.rb index af87ce61..1afc8f46 100644 --- a/app/services/csv_generator/topics.rb +++ b/app/services/csv_generator/topics.rb @@ -1,19 +1,12 @@ class CsvGenerator::Topics < CsvGenerator::Base - def initialize(language, **args) - @language = language - @args = args - end - private - attr_reader :language, :args - def headers %w[TopicID TopicName TopicVolume TopicIssue TopicYear TopicMonth ContentProvider] end def scope - language.topics.active.includes(:provider) + topics_collection.active.includes(:provider) .map do |topic| [ topic.id, diff --git a/app/services/language_content_processor.rb b/app/services/language_content_processor.rb index da59634e..b575aaf8 100644 --- a/app/services/language_content_processor.rb +++ b/app/services/language_content_processor.rb @@ -12,13 +12,38 @@ def perform # this is needed to avoid loading all files into memory at once # Field 'name' is a lambda to allow dynamic naming based on the provider def provider_files - [ - FileToUpload.new( + { + single_provider: FileToUpload.new( content: ->(provider) { XmlGenerator::SingleProvider.new(provider).perform }, name: ->(provider) { "#{language.file_storage_prefix}#{provider.name.parameterize}.xml" }, path: "#{language.file_storage_prefix}CMES-Pi/assets/XML", ), - ] + files: FileToUpload.new( + content: ->(provider) { CsvGenerator::Files.new(provider).perform }, + name: ->(provider) { "#{language.file_storage_prefix}#{provider.name.parameterize}-file.csv" }, + path: "#{language.file_storage_prefix}CMES-v2/assets/csv", + ), + topics: FileToUpload.new( + content: ->(provider) { CsvGenerator::Topics.new(provider).perform }, + name: ->(provider) { "#{language.file_storage_prefix}#{provider.name.parameterize}-topic.csv" }, + path: "#{language.file_storage_prefix}CMES-v2/assets/csv", + ), + tag_details: FileToUpload.new( + content: ->(provider) { CsvGenerator::TagDetails.new(provider, language:).perform }, + name: ->(provider) { "#{language.file_storage_prefix}#{provider.name.parameterize}-tag.csv" }, + path: "#{language.file_storage_prefix}CMES-v2/assets/csv", + ), + topic_tags: FileToUpload.new( + content: ->(provider) { CsvGenerator::TopicTags.new(provider, language:).perform }, + name: ->(provider) { "#{language.file_storage_prefix}#{provider.name.parameterize}-topic-tag.csv" }, + path: "#{language.file_storage_prefix}CMES-v2/assets/csv", + ), + topic_authors: FileToUpload.new( + content: ->(provider) { CsvGenerator::TopicAuthors.new(provider).perform }, + name: ->(provider) { "#{language.file_storage_prefix}#{provider.name.parameterize}-topic-author.csv" }, + path: "#{language.file_storage_prefix}CMES-v2/assets/csv", + ), + } end # Field 'content' is a lambda to allow lazy evaluation @@ -65,7 +90,7 @@ def language_files name: "#{language.file_storage_prefix}TopicTag.csv", path: "#{language.file_storage_prefix}CMES-v2/assets/csv", ), - topic_authors: FileToUpload.new( + topic_authors: FileToUpload.new( content: ->(language) { CsvGenerator::TopicAuthors.new(language).perform }, name: "#{language.file_storage_prefix}TopicAuthor.csv", path: "#{language.file_storage_prefix}CMES-v2/assets/csv", @@ -79,11 +104,13 @@ def language_files def process_language_content! language_files.keys.each do |file_id| - FileUploadJob.perform_later(language.id, file_id.to_s, "file") + FileUploadJob.perform_later(language.id, file_id.to_s) end language.providers.distinct.find_each do |provider| - FileUploadJob.perform_later(language.id, provider.id, "provider") + provider_files.keys.each do |file_id| + FileUploadJob.perform_later(language.id, file_id.to_s, provider.id) + end end end end diff --git a/spec/jobs/file_upload_job_spec.rb b/spec/jobs/file_upload_job_spec.rb index 198446d8..d820a43b 100644 --- a/spec/jobs/file_upload_job_spec.rb +++ b/spec/jobs/file_upload_job_spec.rb @@ -19,7 +19,7 @@ file: file.content[language], ) - described_class.perform_now(language.id, file_id.to_s, "file") + described_class.perform_now(language.id, file_id.to_s) end end end @@ -30,42 +30,31 @@ before { create(:topic, :tagged, language:, provider:) } it "processes specific file" do - expect(FileWorker).to receive(:new).with( - share: ENV["AZURE_STORAGE_SHARE_NAME"], - name: "#{language.file_storage_prefix}test-provider.xml", - path: "#{language.file_storage_prefix}CMES-Pi/assets/XML", - file: XmlGenerator::SingleProvider.new(provider).perform, - ) - - described_class.perform_now(language.id, provider.id, "provider") - end - - context "when provider name contains /" do - let(:provider) { create(:provider, name: "Test/Provider") } - - it "replaces / with - in the file name" do + processor.provider_files.each do |file_id, file| expect(FileWorker).to receive(:new).with( share: ENV["AZURE_STORAGE_SHARE_NAME"], - name: "#{language.file_storage_prefix}test-provider.xml", - path: "#{language.file_storage_prefix}CMES-Pi/assets/XML", - file: XmlGenerator::SingleProvider.new(provider).perform, + name: file.name[provider], + path: file.path, + file: file.content[provider], ) - described_class.perform_now(language.id, provider.id, "provider") + described_class.perform_now(language.id, file_id.to_s, provider.id) end + end - context "when provider name contains /" do - let(:provider) { create(:provider, name: "WHO/Guidelines") } + context "when provider name contains /" do + let(:provider) { create(:provider, name: "Test/Provider") } - it "replaces / with - in the file name" do + it "replaces / with - in the file name" do + processor.provider_files.each do |file_id, file| expect(FileWorker).to receive(:new).with( share: ENV["AZURE_STORAGE_SHARE_NAME"], - name: "#{language.file_storage_prefix}who-guidelines.xml", - path: "#{language.file_storage_prefix}CMES-Pi/assets/XML", - file: XmlGenerator::SingleProvider.new(provider).perform, + name: file.name[provider], + path: file.path, + file: file.content[provider], ) - described_class.perform_now(language.id, provider.id, "provider") + described_class.perform_now(language.id, file_id.to_s, provider.id) end end end diff --git a/spec/services/csv_generator/files_spec.rb b/spec/services/csv_generator/files_spec.rb index a5ad333d..fa254989 100644 --- a/spec/services/csv_generator/files_spec.rb +++ b/spec/services/csv_generator/files_spec.rb @@ -1,9 +1,11 @@ require "rails_helper" RSpec.describe CsvGenerator::Files do - subject { described_class.new(language) } + subject { described_class.new(source, **args) } let(:language) { create(:language) } + let(:source) { language } + let(:args) { {} } let(:header) { "FileID,TopicID,FileName,FileType,FileSize\n" } it "generates empty csv" do @@ -41,6 +43,15 @@ expect(subject.perform).to eq(data) end end + + context "when generated for provider" do + let(:source) { topic.provider } + let(:args) { { language: } } + + it "generates csv with documents info" do + expect(subject.perform).to eq(data) + end + end end context "when topic exists but archived" do diff --git a/spec/services/csv_generator/tag_details_spec.rb b/spec/services/csv_generator/tag_details_spec.rb index e5f79681..e771b51c 100644 --- a/spec/services/csv_generator/tag_details_spec.rb +++ b/spec/services/csv_generator/tag_details_spec.rb @@ -1,9 +1,11 @@ require "rails_helper" RSpec.describe CsvGenerator::TagDetails do - subject { described_class.new(language) } + subject { described_class.new(source, **args) } let(:language) { create(:language) } + let(:source) { language } + let(:args) { {} } let(:header) { "TagID,Tag\n" } it "generates empty csv" do @@ -39,6 +41,15 @@ expect(subject.perform).to eq(data) end end + + context "when generated for provider" do + let(:source) { topic.provider } + let(:args) { { language: } } + + it "generates csv with documents info" do + expect(subject.perform).to eq(data) + end + end end context "when topic exists but archived" do diff --git a/spec/services/csv_generator/topic_authors_spec.rb b/spec/services/csv_generator/topic_authors_spec.rb index 518f7a16..b22e9405 100644 --- a/spec/services/csv_generator/topic_authors_spec.rb +++ b/spec/services/csv_generator/topic_authors_spec.rb @@ -1,9 +1,11 @@ require "rails_helper" RSpec.describe CsvGenerator::TopicAuthors do - subject { described_class.new(language) } + subject { described_class.new(source, **args) } let(:language) { create(:language) } + let(:source) { language } + let(:args) { {} } let(:header) { "TopicID,AuthorID\n" } it "generates empty csv" do @@ -21,6 +23,15 @@ it "generates csv with topics info" do expect(subject.perform).to eq(data) end + + context "when generated for provider" do + let(:source) { topic.provider } + let(:args) { { language: } } + + it "generates csv with documents info" do + expect(subject.perform).to eq(data) + end + end end context "when topic exists but archived" do diff --git a/spec/services/csv_generator/topic_tags_spec.rb b/spec/services/csv_generator/topic_tags_spec.rb index b2c9a439..09f5ac6d 100644 --- a/spec/services/csv_generator/topic_tags_spec.rb +++ b/spec/services/csv_generator/topic_tags_spec.rb @@ -1,9 +1,11 @@ require "rails_helper" RSpec.describe CsvGenerator::TopicTags do - subject { described_class.new(language) } + subject { described_class.new(source, **args) } let(:language) { create(:language) } + let(:source) { language } + let(:args) { {} } let(:header) { "TopicID,TagID\n" } it "generates empty csv" do @@ -23,6 +25,15 @@ it "generates csv with topic tag info" do expect(subject.perform).to eq(data) end + + context "when generated for provider" do + let(:source) { topic.provider } + let(:args) { { language: } } + + it "generates csv with documents info" do + expect(subject.perform).to eq(data) + end + end end context "when topic exists but archived" do diff --git a/spec/services/csv_generator/topics_spec.rb b/spec/services/csv_generator/topics_spec.rb index f8359ca1..f47a51b3 100644 --- a/spec/services/csv_generator/topics_spec.rb +++ b/spec/services/csv_generator/topics_spec.rb @@ -1,9 +1,11 @@ require "rails_helper" RSpec.describe CsvGenerator::Topics do - subject { described_class.new(language) } + subject { described_class.new(source, **args) } let(:language) { create(:language) } + let(:source) { language } + let(:args) { {} } let(:header) { "TopicID,TopicName,TopicVolume,TopicIssue,TopicYear,TopicMonth,ContentProvider\n" } it "generates empty csv" do @@ -21,6 +23,15 @@ it "generates csv with topics info" do expect(subject.perform).to eq(data) end + + context "when generated for provider" do + let(:source) { topic.provider } + let(:args) { { language: } } + + it "generates csv with documents info" do + expect(subject.perform).to eq(data) + end + end end context "when topic exists but archived" do diff --git a/spec/services/language_content_processor_spec.rb b/spec/services/language_content_processor_spec.rb index d176cf15..102292f8 100644 --- a/spec/services/language_content_processor_spec.rb +++ b/spec/services/language_content_processor_spec.rb @@ -15,20 +15,27 @@ end it "processes content for every language" do - files_number = language.providers.size + 9 # 2 xml files for all providers, 1 xml file for single provider, 2 text files for tags, 5 csv files + # 2 xml files for all providers, 1 xml file for single provider, 2 text files for tags, 5 csv files = 9 + # per provider 6 files (1 xml and 5 csv) + files_number = language.providers.size * 6 + 9 subject.perform expect(FileUploadJob).to have_received(:perform_later).exactly(files_number).times - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "all_providers", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "all_providers_recent", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tags", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tags_and_title", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "files", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topics", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tag_details", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topic_tags", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topic_authors", "file") - expect(FileUploadJob).to have_received(:perform_later).with(language.id, provider.id, "provider") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "all_providers") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "all_providers_recent") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tags") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tags_and_title") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "files") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topics") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tag_details") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topic_tags") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topic_authors") + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "single_provider", provider.id) + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "files", provider.id) + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topics", provider.id) + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tag_details", provider.id) + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topic_tags", provider.id) + expect(FileUploadJob).to have_received(:perform_later).with(language.id, "topic_authors", provider.id) end end