Skip to content

Commit ae6d7d2

Browse files
committed
Consolidate documentLoader calls to loadRemoteDocument, and re-implement JSON::LD::API::RemoteDocument.
1 parent 5f0f0b8 commit ae6d7d2

File tree

13 files changed

+218
-262
lines changed

13 files changed

+218
-262
lines changed

README.md

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -226,22 +226,22 @@ require 'json/ld'
226226
]
227227
```
228228
## Use a custom Document Loader
229-
In some cases, the built-in document loader {JSON::LD::API.documentLoader} is inadequate; for example, when using `http://schema.org` as a remote context, it will be re-loaded every time.
229+
In some cases, the built-in document loader {JSON::LD::API.documentLoader} is inadequate; for example, when using `http://schema.org` as a remote context, it will be re-loaded every time (however, see [json-ld-preloaded](https://rubygems.org/gems/json-ld-preloaded)).
230230

231231
All entries into the {JSON::LD::API} accept a `:documentLoader` option, which can be used to provide an alternative method to use when loading remote documents. For example:
232232
```ruby
233-
def load_document_local(url, options={}, &block)
234-
if RDF::URI(url, canonicalize: true) == RDF::URI('http://schema.org/')
235-
remote_document = JSON::LD::API::RemoteDocument.new(url, File.read("etc/schema.org.jsonld"))
236-
return block_given? ? yield(remote_document) : remote_document
237-
else
238-
JSON::LD::API.documentLoader(url, options, &block)
239-
end
240-
end
233+
def load_document_local(url, options={}, &block)
234+
if RDF::URI(url, canonicalize: true) == RDF::URI('http://schema.org/')
235+
remote_document = JSON::LD::API::RemoteDocument.new(url, File.read("etc/schema.org.jsonld"))
236+
return block_given? ? yield(remote_document) : remote_document
237+
else
238+
JSON::LD::API.documentLoader(url, options, &block)
239+
end
240+
end
241241
```
242242
Then, when performing something like expansion:
243243
```ruby
244-
JSON::LD::API.expand(input, documentLoader: load_document_local)
244+
JSON::LD::API.expand(input, documentLoader: load_document_local)
245245
```
246246

247247
## Preloading contexts

lib/json/ld/api.rb

Lines changed: 150 additions & 92 deletions
Large diffs are not rendered by default.

lib/json/ld/context.rb

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -304,7 +304,6 @@ def initialize(**options)
304304
@base = @doc_base = RDF::URI(options[:base]).dup
305305
@doc_base.canonicalize! if options[:canonicalize]
306306
end
307-
options[:documentLoader] ||= JSON::LD::API.method(:documentLoader)
308307
@processingMode ||= options[:processingMode]
309308
@term_definitions = {}
310309
@iri_to_term = {
@@ -482,20 +481,15 @@ def parse(local_context, remote_contexts: [], from_term: nil)
482481

483482
# Load context document, if it is a string
484483
begin
485-
context_opts = @options.dup
484+
context_opts = @options.merge(
485+
profile: 'http://www.w3.org/ns/json-ld#context',
486+
requestProfile: 'http://www.w3.org/ns/json-ld#context',
487+
base: nil)
486488
context_opts.delete(:headers)
487-
@options[:documentLoader].call(context.to_s, context_opts) do |remote_doc|
489+
JSON::LD::API.loadRemoteDocument(context.to_s, context_opts) do |remote_doc|
488490
# 3.2.5) Dereference context. If the dereferenced document has no top-level JSON object with an @context member, an invalid remote context has been detected and processing is aborted; otherwise, set context to the value of that member.
489-
jo = if remote_doc.content_type == 'text/html'
490-
API.load_html(remote_doc.document, url: context.to_s, profile: 'http://www.w3.org/ns/json-ld#context')
491-
elsif remote_doc.document.is_a?(String)
492-
MultiJson.load(remote_doc.document)
493-
else
494-
remote_doc.document
495-
end
496-
497-
raise JsonLdError::InvalidRemoteContext, "#{context}" unless jo.is_a?(Hash) && jo.has_key?('@context')
498-
context = jo['@context']
491+
raise JsonLdError::InvalidRemoteContext, "#{context}" unless remote_doc.document.is_a?(Hash) && remote_doc.document.has_key?('@context')
492+
context = remote_doc.document['@context']
499493
end
500494
rescue JsonLdError::LoadingDocumentFailed => e
501495
#log_debug("parse") {"Failed to retrieve @context from remote document at #{context_no_base.context_base.inspect}: #{e.message}"}

lib/json/ld/extensions.rb

Lines changed: 0 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -40,23 +40,6 @@ def valid_extended?
4040
value.is_a?(String)
4141
end
4242
end
43-
44-
module Util
45-
module File
46-
# Add contextUrl accessor
47-
class RemoteDocument
48-
# @return [String]
49-
# The URL of a remote context as specified by an HTTP Link header with rel=`http://www.w3.org/ns/json-ld#context`
50-
attr_accessor :contextUrl
51-
52-
# @return [String, Array<Hash>, Hash]
53-
# The retrieved document, either as raw text or parsed JSON
54-
def document
55-
@document ||= self.read
56-
end
57-
end
58-
end
59-
end
6043
end
6144

6245
class Array

lib/json/ld/html/nokogiri.rb

Lines changed: 3 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -128,10 +128,10 @@ def method_missing(method, *args)
128128
# Initializes the underlying XML library.
129129
#
130130
# @param [Hash{Symbol => Object}] options
131-
# @return [void]
131+
# @return [NodeProxy] of root element
132132
def initialize_html(input, options = {})
133133
require 'nokogiri' unless defined?(::Nokogiri)
134-
@doc = case input
134+
doc = case input
135135
when ::Nokogiri::HTML::Document, ::Nokogiri::XML::Document
136136
input
137137
else
@@ -143,37 +143,8 @@ def initialize_html(input, options = {})
143143
::Nokogiri::HTML.parse(input, base_uri.to_s, 'utf-8')
144144
end
145145
end
146-
end
147-
148-
# Accessor methods to mask native elements & attributes
149-
150-
##
151-
# Return proxy for document root
152-
def root
153-
@root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
154-
end
155-
156-
##
157-
# Document errors
158-
def doc_errors
159-
# FIXME: Nokogiri version 1.5 thinks many HTML5 elements are invalid, so just ignore all Tag errors.
160-
# Nokogumbo might make this simpler
161-
if @host_language == :html5
162-
@doc.errors.reject {|e| e.to_s =~ /The doctype must be the first token in the document/}
163-
else
164-
@doc.errors.reject {|e| e.to_s =~ /(?:Tag \w+ invalid)|(?:Missing attribute name)/}
165-
end
166-
end
167146

168-
##
169-
# Find value of document base
170-
#
171-
# @param [String] base Existing base from URI or :base_uri
172-
# @return [String]
173-
def doc_base(base)
174-
# find if the document has a base element
175-
base_el = @doc.at_css("html>head>base")
176-
base.join(base_el.attribute("href").to_s.split("#").first) if base_el
147+
NodeProxy.new(doc.root) if doc && doc.root
177148
end
178149
end
179150
end

lib/json/ld/html/rexml.rb

Lines changed: 3 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -168,41 +168,18 @@ def method_missing(method, *args)
168168
# Initializes the underlying XML library.
169169
#
170170
# @param [Hash{Symbol => Object}] options
171-
# @return [void]
171+
# @return [NodeProxy] of document root
172172
def initialize_html(input, options = {})
173173
require 'rexml/document' unless defined?(::REXML)
174-
@doc = case input
174+
doc = case input
175175
when ::REXML::Document
176176
input
177177
else
178178
# Only parse as XML, no HTML mode
179179
::REXML::Document.new(input.respond_to?(:read) ? input.read : input.to_s)
180180
end
181-
end
182-
183-
# Accessor methods to mask native elements & attributes
184-
185-
##
186-
# Return proxy for document root
187-
def root
188-
@root ||= NodeProxy.new(@doc.root) if @doc && @doc.root
189-
end
190181

191-
##
192-
# Document errors
193-
def doc_errors
194-
[]
195-
end
196-
197-
##
198-
# Find value of document base
199-
#
200-
# @param [String] base Existing base from URI or :base_uri
201-
# @return [String]
202-
def doc_base(base)
203-
# find if the document has a base element
204-
base_el = ::REXML::XPath.first(@doc, "/html/head/base") rescue nil
205-
base.join(base_el.attribute("href").to_s.split("#").first) if base_el
182+
NodeProxy.new(doc.root) if doc && doc.root
206183
end
207184
end
208185
end

spec/api_spec.rb

Lines changed: 11 additions & 44 deletions
Original file line numberDiff line numberDiff line change
@@ -9,56 +9,32 @@
99
describe "#initialize" do
1010
context "with string input" do
1111
let(:context) do
12-
RDF::Util::File::RemoteDocument.new(%q({
12+
JSON::LD::API::RemoteDocument.new(%q({
1313
"@context": {
1414
"xsd": "http://www.w3.org/2001/XMLSchema#",
1515
"name": "http://xmlns.com/foaf/0.1/name",
1616
"homepage": {"@id": "http://xmlns.com/foaf/0.1/homepage", "@type": "@id"},
1717
"avatar": {"@id": "http://xmlns.com/foaf/0.1/avatar", "@type": "@id"}
1818
}
19-
}), base_uri: "http://example.com/context")
19+
}),
20+
documentUrl: "http://example.com/context",
21+
contentType: 'application/ld+json'
22+
)
2023
end
2124
let(:remote_doc) do
22-
d = RDF::Util::File::RemoteDocument.new(%q({
23-
"@id": "",
24-
"name": "foo"
25-
}), base_uri: "http://example.com/foo")
26-
d.contextUrl = "http://example.com/context"
27-
d
25+
JSON::LD::API::RemoteDocument.new(%q({"@id": "", "name": "foo"}),
26+
documentUrl: "http://example.com/foo",
27+
contentType: 'application/ld+json',
28+
contextUrl: "http://example.com/context"
29+
)
2830
end
2931

3032
it "loads document with loader and loads context" do
31-
expect(described_class).to receive(:documentLoader).with("http://example.com/foo", anything).and_return(remote_doc)
33+
expect(described_class).to receive(:documentLoader).with("http://example.com/foo", anything).and_yield(remote_doc)
3234
expect(described_class).to receive(:documentLoader).with("http://example.com/context", anything).and_yield(context)
3335
described_class.new("http://example.com/foo", nil)
3436
end
3537
end
36-
37-
context "with RDF::Util::File::RemoteDoc input" do
38-
let(:context) do
39-
RDF::Util::File::RemoteDocument.new(%q({
40-
"@context": {
41-
"xsd": "http://www.w3.org/2001/XMLSchema#",
42-
"name": "http://xmlns.com/foaf/0.1/name",
43-
"homepage": {"@id": "http://xmlns.com/foaf/0.1/homepage", "@type": "@id"},
44-
"avatar": {"@id": "http://xmlns.com/foaf/0.1/avatar", "@type": "@id"}
45-
}
46-
}), base_uri: "http://example.com/context")
47-
end
48-
let(:remote_doc) do
49-
RDF::Util::File::RemoteDocument.new(%q({"@id": "", "name": "foo"}),
50-
headers: {
51-
content_type: 'application/json',
52-
link: %(<http://example.com/context>; rel="#{JSON::LD::JSON_LD_NS}context"; type="application/ld+json")
53-
}
54-
)
55-
end
56-
57-
it "processes document and retrieves linked context" do
58-
expect(described_class).to receive(:documentLoader).with("http://example.com/context", anything).and_yield(context)
59-
described_class.new(remote_doc, nil)
60-
end
61-
end
6238
end
6339

6440
context "when validating", pending: ("JRuby support for jsonlint" if RUBY_ENGINE == "jruby") do
@@ -105,13 +81,4 @@
10581
end
10682
end
10783
end
108-
109-
# This class is deprecated
110-
describe JSON::LD::API::RemoteDocument do
111-
it "creates with deprecation" do
112-
expect {
113-
described_class.new("http://example.com", "foo")
114-
}.to write('[DEPRECATION]').to(:error)
115-
end
116-
end
11784
end

spec/compact_spec.rb

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -514,7 +514,9 @@
514514

515515
context "context as reference" do
516516
let(:remote_doc) do
517-
RDF::Util::File::RemoteDocument.new(%q({"@context": {"b": "http://example.com/b"}}), base_uri: "http://example.com/context")
517+
JSON::LD::API::RemoteDocument.new(
518+
%q({"@context": {"b": "http://example.com/b"}}),
519+
documentUrl: "http://example.com/context")
518520
end
519521
it "uses referenced context" do
520522
input = ::JSON.parse %({
@@ -2699,6 +2701,7 @@
26992701

27002702
def run_compact(params)
27012703
input, output, context = params[:input], params[:output], params[:context]
2704+
params[:base] ||= nil
27022705
context ||= output # Since it will have the context
27032706
input = ::JSON.parse(input) if input.is_a?(String)
27042707
output = ::JSON.parse(output) if output.is_a?(String)

spec/conneg_spec.rb

Lines changed: 14 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -40,16 +40,16 @@
4040

4141
describe "#call" do
4242
let(:schema_context) {
43-
RDF::Util::File::RemoteDocument.new(%q({
43+
JSON::LD::API::RemoteDocument.new(%q({
4444
"@context": {
4545
"@vocab": "http://schema.org/",
4646
"id": "@id",
4747
"type": "@type"
4848
}
49-
}), base_uri: "http://schema.org")
49+
}), documentUrl: "http://schema.org")
5050
}
5151
let(:frame) {
52-
RDF::Util::File::RemoteDocument.new(%q({
52+
JSON::LD::API::RemoteDocument.new(%q({
5353
"@context": {
5454
"dc": "http://purl.org/dc/elements/1.1/",
5555
"ex": "http://example.org/vocab#"
@@ -61,21 +61,21 @@
6161
"@type": "ex:Chapter"
6262
}
6363
}
64-
}), base_uri: "http://conneg.example.com/frame")
64+
}), documentUrl: "http://conneg.example.com/frame")
6565
}
6666
let(:context) {
67-
RDF::Util::File::RemoteDocument.new(%q({
67+
JSON::LD::API::RemoteDocument.new(%q({
6868
"@context": {
6969
"dc": "http://purl.org/dc/elements/1.1/",
7070
"ex": "http://example.org/vocab#"
7171
}
72-
}), base_uri: "http://conneg.example.com/context")
72+
}), documentUrl: "http://conneg.example.com/context")
7373
}
7474

7575
before(:each) do
7676
allow(JSON::LD::API).to receive(:documentLoader).with("http://schema.org", any_args).and_yield(schema_context)
7777
allow(JSON::LD::API).to receive(:documentLoader).with("http://conneg.example.com/context", any_args).and_yield(context)
78-
allow(JSON::LD::API).to receive(:documentLoader).with("http://conneg.example.com/frame", any_args).and_return(frame)
78+
allow(JSON::LD::API).to receive(:documentLoader).with("http://conneg.example.com/frame", any_args).and_yield(frame)
7979
end
8080

8181
context "with text result" do
@@ -235,16 +235,16 @@
235235

236236
describe "#call" do
237237
let(:schema_context) {
238-
RDF::Util::File::RemoteDocument.new(%q({
238+
JSON::LD::API::RemoteDocument.new(%q({
239239
"@context": {
240240
"@vocab": "http://schema.org/",
241241
"id": "@id",
242242
"type": "@type"
243243
}
244-
}), base_uri: "http://schema.org")
244+
}), documentUrl: "http://schema.org")
245245
}
246246
let(:frame) {
247-
RDF::Util::File::RemoteDocument.new(%q({
247+
JSON::LD::API::RemoteDocument.new(%q({
248248
"@context": {
249249
"dc": "http://purl.org/dc/elements/1.1/",
250250
"ex": "http://example.org/vocab#"
@@ -256,21 +256,21 @@
256256
"@type": "ex:Chapter"
257257
}
258258
}
259-
}), base_uri: "http://conneg.example.com/frame")
259+
}), documentUrl: "http://conneg.example.com/frame")
260260
}
261261
let(:context) {
262-
RDF::Util::File::RemoteDocument.new(%q({
262+
JSON::LD::API::RemoteDocument.new(%q({
263263
"@context": {
264264
"dc": "http://purl.org/dc/elements/1.1/",
265265
"ex": "http://example.org/vocab#"
266266
}
267-
}), base_uri: "http://conneg.example.com/context")
267+
}), documentUrl: "http://conneg.example.com/context")
268268
}
269269

270270
before(:each) do
271271
allow(JSON::LD::API).to receive(:documentLoader).with("http://schema.org", any_args).and_yield(schema_context)
272272
allow(JSON::LD::API).to receive(:documentLoader).with("http://conneg.example.com/context", any_args).and_yield(context)
273-
allow(JSON::LD::API).to receive(:documentLoader).with("http://conneg.example.com/frame", any_args).and_return(frame)
273+
allow(JSON::LD::API).to receive(:documentLoader).with("http://conneg.example.com/frame", any_args).and_yield(frame)
274274
end
275275

276276
{

0 commit comments

Comments
 (0)