Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
23 changes: 11 additions & 12 deletions app/services/language_content_processor.rb
Original file line number Diff line number Diff line change
Expand Up @@ -7,14 +7,13 @@ def initialize(language, share = ENV["AZURE_STORAGE_SHARE_NAME"])
def perform
process_language_content!
end

# Field 'content' is a lambda to allow lazy evaluation
# this is needed to avoid loading all files into memory at once
# Field 'name' is a lambda to allow dynamic naming based on the provider
def provider_files
[
FileToUpload.new(
content: ->(provider) { XmlGenerator::SingleProvider.new(provider).perform },
content: ->(provider) { LanguageTopicsXmlGenerator.new(@language, provider: provider).perform },
name: ->(provider) { "#{language.file_storage_prefix}#{provider.name.parameterize}.xml" },
path: "#{language.file_storage_prefix}CMES-Pi/assets/XML",
),
Expand All @@ -25,16 +24,16 @@ def provider_files
# this is needed to avoid loading all files into memory at once
def language_files
{
# all_providers: FileToUpload.new(
# content: ->(language) { XmlGenerator::AllProviders.new(language).perform },
# name: "#{language.file_storage_prefix}Server_XML.xml",
# path: "#{language.file_storage_prefix}CMES-Pi/assets/XML",
# ),
# all_providers_recent: FileToUpload.new(
# content: ->(language) { XmlGenerator::AllProviders.new(language, recent: true).perform },
# name: "#{language.file_storage_prefix}New_Uploads_Server_XML.xml",
# path: "#{language.file_storage_prefix}CMES-Pi/assets/XML",
# ),
all_providers: FileToUpload.new(
content: ->(language) { LanguageTopicsXmlGenerator.new(language).perform },
name: "#{language.file_storage_prefix}Server_XML.xml",
path: "#{language.file_storage_prefix}CMES-Pi/assets/XML",
),
all_providers_recent: FileToUpload.new(
content: ->(language) { LanguageTopicsXmlGenerator.new(language, recent: true).perform },
name: "#{language.file_storage_prefix}New_Uploads_Server_XML.xml",
path: "#{language.file_storage_prefix}CMES-Pi/assets/XML",
),
tags: FileToUpload.new(
content: ->(language) { TextGenerator::Tags.new(language).perform },
name: "#{language.file_storage_prefix}tags.txt",
Expand Down
96 changes: 96 additions & 0 deletions app/services/language_topics_xml_generator.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
class LanguageTopicsXmlGenerator
def initialize(language, provider: nil, **args)
@language = language
@provider = provider
@args = args
end

def perform
doc = Ox::Document.new(version: "1.0")
root = Ox::Element.new("cmes")
doc << root

grouped_by_provider.each do |provider, topics|
root << provider_xml(provider, topics)
end

Ox.dump(doc)
end

private

attr_reader :language, :provider, :args

def grouped_by_provider
topics_scope.group_by(&:provider)
end

def provider_xml(provider, topics)
Ox::Element.new("content_provider").tap do |provider_element|
provider_element[:name] = provider.name
build_year_nodes(provider_element, topics)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Like this decomposition into several methods!

end
end

def build_year_nodes(parent_element, topics)
topics.group_by { |t| t.published_at.year }
.sort_by { |year, _| -year }
.each do |year, topics_in_year|
parent_element << year_xml(year, topics_in_year)
end
end

def year_xml(year, topics_in_year)
Ox::Element.new("topic_year").tap do |year_element|
year_element[:year] = year.to_s
topics_in_year.group_by { |t| t.published_at.strftime("%m_%B") }
.sort_by { |month_label, _| month_label }
.each do |month_label, topics_in_month|
year_element << month_xml(month_label, topics_in_month)
end
end
end

def month_xml(month_label, topics_in_month)
Ox::Element.new("topic_month").tap do |month_element|
month_element[:month] = month_label
topics_in_month.each { |topic| month_element << topic_xml(topic) }
end
end

def topic_xml(topic)
Ox::Element.new("title").tap do |title_element|
title_element[:name] = topic.title
title_element << (Ox::Element.new("topic_id") << topic.id.to_s)
title_element << (Ox::Element.new("counter") << "0")
title_element << (Ox::Element.new("topic_volume") << topic.published_at.year.to_s)
title_element << (Ox::Element.new("topic_issue") << topic.published_at.month.to_s)
title_element << files_xml(topic.documents)
title_element << (Ox::Element.new("topic_author") << (Ox::Element.new("topic_author_1") << " "))
title_element << (Ox::Element.new("topic_tags") << topic.current_tags_list.join(", "))
end
end

def files_xml(documents)
Ox::Element.new("topic_files").tap do |files|
files[:files] = "Files"
documents.reject { |doc| doc.content_type == "video/mp4" }
.each_with_index do |document, index|
files << Ox::Element.new("file_name_#{index + 1}").tap do |file_name|
file_name[:file_size] = document.byte_size
file_name << document.filename.to_s
end
end
end
end

def topics_scope
scope = @provider ? @provider.topics : Topic
scope = scope.where(language_id: language.id)

scope = scope.where("published_at > ?", 1.month.ago) if args.fetch(:recent, false)

scope.includes(:provider, { taggings: :tag }, { documents_attachments: :blob })
.order(published_at: :desc)
end
end
Binary file added spec/fixtures/files/report.pdf
Binary file not shown.
1 change: 1 addition & 0 deletions spec/fixtures/files/sample.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
sample text
1 change: 1 addition & 0 deletions spec/fixtures/files/video.mp4
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
video content
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we add small video file instead?

87 changes: 39 additions & 48 deletions spec/jobs/file_upload_job_spec.rb
Original file line number Diff line number Diff line change
@@ -1,73 +1,64 @@
require "rails_helper"

RSpec.describe FileUploadJob, type: :job do
let(:language) { create(:language) }
let(:processor) { LanguageContentProcessor.new(language) }
# The only collaborator that should be stubbed is the one performing the
# external action (the file upload). We want to test the full integration
# with the real LanguageContentProcessor and its dependent generators.
before do
allow(FileWorker).to receive(:new).and_return(instance_double(FileWorker, send: true))
end

describe "#perform" do
before do
allow(FileWorker).to receive(:new).and_return(instance_double(FileWorker, send: true))
end
let!(:language) { create(:language) }

context "when language specific file" do
it "processes specific file" do
processor.language_files.each do |file_id, file|
expect(FileWorker).to receive(:new).with(
share: ENV["AZURE_STORAGE_SHARE_NAME"],
name: file.name,
path: file.path,
file: file.content[language],
)

described_class.perform_now(language.id, file_id.to_s, "file")
end
end
end
context "when processing a language-specific file" do
it "correctly looks up the file definition and generates the content" do
# Create data to ensure the generator produces content.
create(:topic, language: language)
file_id = :all_providers_recent

context "when provider specific file" do
let(:provider) { create(:provider, name: "Test Provider") }
# Dynamically determine the expected output from the real objects.
processor = LanguageContentProcessor.new(language)
expected_file_definition = processor.language_files[file_id]
expected_content = LanguageTopicsXmlGenerator.new(language, recent: true).perform

before { create(:topic, :tagged, language:, provider:) }

it "processes specific file" do
expect(FileWorker).to receive(:new).with(
share: ENV["AZURE_STORAGE_SHARE_NAME"],
name: "#{language.file_storage_prefix}test-provider.xml",
path: "#{language.file_storage_prefix}CMES-Pi/assets/XML",
file: XmlGenerator::SingleProvider.new(provider).perform,
name: expected_file_definition.name,
path: expected_file_definition.path,
file: expected_content
)

described_class.perform_now(language.id, provider.id, "provider")
described_class.perform_now(language.id, file_id.to_s, "file")
end
end

context "when processing a provider-specific file" do
# This single example replaces the three previous, repetitive tests.
# It verifies that the job correctly handles provider name parameterization
# by testing multiple cases in a data-driven way.
it "generates the correct parameterized filename for various provider names" do
test_cases = {
"Test Provider" => "#{language.file_storage_prefix}test-provider.xml",
"Test/Provider" => "#{language.file_storage_prefix}test-provider.xml",
"WHO/Guidelines" => "#{language.file_storage_prefix}who-guidelines.xml",
}

test_cases.each do |provider_name, expected_filename|
provider = create(:provider, name: provider_name)
create(:topic, :tagged, language: language, provider: provider)

context "when provider name contains /" do
let(:provider) { create(:provider, name: "Test/Provider") }
expected_content = LanguageTopicsXmlGenerator.new(language, provider: provider).perform

it "replaces / with - in the file name" do
expect(FileWorker).to receive(:new).with(
share: ENV["AZURE_STORAGE_SHARE_NAME"],
name: "#{language.file_storage_prefix}test-provider.xml",
name: expected_filename,
path: "#{language.file_storage_prefix}CMES-Pi/assets/XML",
file: XmlGenerator::SingleProvider.new(provider).perform,
file: expected_content
)

described_class.perform_now(language.id, provider.id, "provider")
end

context "when provider name contains /" do
let(:provider) { create(:provider, name: "WHO/Guidelines") }

it "replaces / with - in the file name" do
expect(FileWorker).to receive(:new).with(
share: ENV["AZURE_STORAGE_SHARE_NAME"],
name: "#{language.file_storage_prefix}who-guidelines.xml",
path: "#{language.file_storage_prefix}CMES-Pi/assets/XML",
file: XmlGenerator::SingleProvider.new(provider).perform,
)

described_class.perform_now(language.id, provider.id, "provider")
end
end
end
end
end
Expand Down
7 changes: 4 additions & 3 deletions spec/services/language_content_processor_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,14 @@
end

it "processes content for every language" do
files_number = language.providers.size + 6 # 2 xml files for all providers, 1 xml file for single provider, 2 text files for tags, 5 csv files
files_number = language.providers.size + 9 # 2 xml files for all provides, 2 text files for tags, 5 csv files

subject.perform

expect(FileUploadJob).to have_received(:perform_later).exactly(files_number).times

# expect(FileUploadJob).to have_received(:perform_later).with(language.id, "all_providers", "file")
# expect(FileUploadJob).to have_received(:perform_later).with(language.id, "all_providers_recent", "file")
expect(FileUploadJob).to have_received(:perform_later).with(language.id, "all_providers", "file")
expect(FileUploadJob).to have_received(:perform_later).with(language.id, "all_providers_recent", "file")
expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tags", "file")
expect(FileUploadJob).to have_received(:perform_later).with(language.id, "tags_and_title", "file")
expect(FileUploadJob).to have_received(:perform_later).with(language.id, "files", "file")
Expand Down
93 changes: 93 additions & 0 deletions spec/services/language_topics_xml_generator_spec.rb
Original file line number Diff line number Diff line change
@@ -0,0 +1,93 @@
require "rails_helper"

RSpec.describe LanguageTopicsXmlGenerator do
let(:language) { Language.find_by(name: "en") }

# This spec provides a high-confidence check that the refactored service
# is a safe replacement for the legacy implementation by asserting that
# their XML outputs are semantically identical.
it "produces an XML output identical to the corrected legacy generator" do
# Arrange: Build a consistent data set for both generators.
XmlTestDataBuilder.xml_scenario
.for_language(name: "en")
.for_provider(name: "Health Corp")
.with_topic(
title: "Topic A - Jan 2023",
published_at: Date.new(2023, 1, 15),
documents: [
{ filename: "report.pdf", content_type: "application/pdf" },
{ filename: "video.mp4", content_type: "video/mp4" },
],
tags: [ "flu", "vaccine" ]
)
.with_topic(
title: "Topic C - Feb 2022",
published_at: Date.new(2022, 2, 10),
tags: [ "diabetes", "research" ]
)
.for_provider(name: "Wellness Inc")
.with_topic(
title: "Topic D - Jan 2023",
published_at: Date.new(2023, 1, 5)
)
.build!

# Patch the single buggy method in the legacy implementation for this test.
# This allows us to use the actual legacy classes but with the critical
# language-scoping logic fixed, ensuring a valid comparison.
# allow_any_instance_of(XmlGenerator::SingleProvider).to receive(:topic_scope) do |instance, provider|
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need this commented code here?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If this all works we can just join all in a single spec file for the new class

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can we remove this commented code then?

And why we can't join all right now?

# # Re-implement the method with the correct logic. The `instance` passed
# # here is the XmlGenerator::AllProviders object, which holds the context.
# language = instance.instance_variable_get(:@language)
# args = instance.instance_variable_get(:@args)

# scope = provider.topics.where(language_id: language.id)
# scope = scope.where("published_at > ?", 1.month.ago) if args.fetch(:recent, false)
# scope
# .select(:id, :title, :published_at, :language_id, :provider_id)
# .includes(:language, { taggings: :tag }, { documents_attachments: :blob })
# .order(published_at: :desc)
# end


# Act: Generate XML from both the new and (patched) legacy services.
new_xml = LanguageTopicsXmlGenerator.new(language).perform
legacy_xml = XmlGenerator::AllProviders.new(language).perform

# Assert: Parse and normalize both XML outputs to ensure they are identical.
# Comparing parsed documents is more robust than string comparison as it
# ignores insignificant whitespace and attribute ordering differences.
new_doc = Nokogiri::XML(new_xml) { |config| config.noblanks }
legacy_doc = Nokogiri::XML(legacy_xml) { |config| config.noblanks }

expect(new_doc.to_xml).to eq(legacy_doc.to_xml)
end
context "when the :recent option is true" do
let(:generator) { described_class.new(language, recent: true) }

before do
XmlTestDataBuilder.xml_scenario
.for_language(name: "en")
.for_provider(name: "Health Corp")
.with_topic(
title: "Recent Topic",
published_at: 2.weeks.ago
)
.with_topic(
title: "Old Topic",
published_at: 2.months.ago
)
.build!
end

it "includes only topics published within the last month" do
doc = Nokogiri::XML(generator.perform)

recent_topic_node = doc.at_xpath("//title[@name='Recent Topic']")
old_topic_node = doc.at_xpath("//title[@name='Old Topic']")

expect(recent_topic_node).not_to be_nil
expect(old_topic_node).to be_nil
end
end
end
Loading
Loading