Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .travis.fuseki_install_optional.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

set -v

uri="http://archive.apache.org/dist/jena/binaries/apache-jena-fuseki-2.4.0.tar.gz"
uri="http://archive.apache.org/dist/jena/binaries/apache-jena-fuseki-3.9.0.tar.gz"

if wget "$uri" &&
tar -zxf *jena*fuseki*.tar.gz &&
Expand Down
3 changes: 2 additions & 1 deletion .travis.yml
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@ git:
depth: 3

python:
- 3.5
- 3.6
- 3.7
- 3.8

jobs:
include:
Expand All @@ -24,6 +24,7 @@ before_install:

install:
- pip install --default-timeout 60 -r requirements.txt
- pip install --default-timeout 60 -r requirements.dev.txt
- pip install --default-timeout 60 coverage coveralls nose-timer && export HAS_COVERALLS=1
- python setup.py install

Expand Down
3 changes: 3 additions & 0 deletions CONTRIBUTORS
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
Aaron Swartz
Andrew Eland
Andrew Kuchling
Ashley Sommer
Arve Knudsen
Chimezie Ogbuji
Daniel Krech
Expand All @@ -18,7 +19,9 @@ Kendall Clark
Leandro López
Lucio Torre
Michel Pelletier
Natanael Arndt
Nacho Barrientos Arias
Nicholas J. Car
Niklas Lindström
Phil Dawes
Phillip Pearson
Expand Down
2 changes: 1 addition & 1 deletion LICENSE
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
LICENSE AGREEMENT FOR RDFLIB
------------------------------------------------
Copyright (c) 2002-2017, RDFLib Team
Copyright (c) 2002-2020, RDFLib Team
See CONTRIBUTORS and http://github.com/RDFLib/rdflib
All rights reserved.

Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@ Please see the list for all packages/repositories here:
## Versions

* `5.x.y` supports Python 2.7 and 3.4+ and is [mostly backwards compatible with 4.2.2](https://rdflib.readthedocs.io/en/stable/upgrade4to5.html). Only bug fixes will be applied.
* `6.x.y` is the next major release which will support Python 3.5+. (Current master branch)
* `6.x.y` is the next major release which will support Python 3.6+. (Current master branch)


## Installation
Expand Down
2 changes: 1 addition & 1 deletion docs/developers.rst
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,7 @@ Compatibility

RDFLib 5.x.y tries to be compatible with python versions 2.7, 3.4, 3.5, 3.6, 3.7.

The current master branch (which will be released as 6.0.0) will only support Python 3.5 and newer.
The current master branch (which will be released as 6.0.0) will only support Python 3.6 and newer.


Releasing
Expand Down
40 changes: 27 additions & 13 deletions examples/sparqlstore_example.py
Original file line number Diff line number Diff line change
@@ -1,16 +1,15 @@
"""
A simple example showing how to use the SPARQLStore
Simple examples showing how to use the SPARQLStore
"""

import locale
from rdflib import Graph, URIRef, Namespace
from rdflib.plugins.stores.sparqlstore import SPARQLStore

if __name__ == "__main__":

dbo = Namespace("http://dbpedia.org/ontology/")

# using a Graph with the Store type string set to "SPARQLStore"
# EXAMPLE 1: using a Graph with the Store type string set to "SPARQLStore"
graph = Graph("SPARQLStore", identifier="http://dbpedia.org")
graph.open("http://dbpedia.org/sparql")

Expand All @@ -22,13 +21,28 @@
).replace(",", ".")
)

# using a SPARQLStore object directly
s = SPARQLStore(endpoint="http://dbpedia.org/sparql")
s.open(None)
pop = graph.value(
URIRef("http://dbpedia.org/resource/Brisbane"), dbo.populationTotal
)
print(
"According to DBPedia, Brisbane has a population of "
"{0:,}".format(int(pop), ",d")
)
# EXAMPLE 2: using a SPARQLStore object directly
st = SPARQLStore(query_endpoint="http://dbpedia.org/sparql")

for p in st.objects(URIRef("http://dbpedia.org/resource/Brisbane"), dbo.populationTotal):
print(
"According to DBPedia, Brisbane has a population of "
"{0:,}".format(int(pop), ",d")
)

# EXAMPLE 3: doing RDFlib triple navigation using SPARQLStore as a Graph()
graph = Graph("SPARQLStore", identifier="http://dbpedia.org")
graph.open("http://dbpedia.org/sparql")
# we are asking DBPedia for 3 skos:Concept instances
count = 0
from rdflib.namespace import RDF, SKOS
for s in graph.subjects(predicate=RDF.type, object=SKOS.Concept):
count += 1
print(s)
if count >= 3:
break

# EXAMPLE 4: using a SPARQL endpoint that requires Basic HTTP authentication
# NOTE: this example won't run since the endpoint isn't live (or real)
s = SPARQLStore(query_endpoint="http://fake-sparql-endpoint.com/repository/x", auth=("my_username", "my_password"))
# do normal Graph things
16 changes: 8 additions & 8 deletions rdflib/plugins/sparql/evaluate.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,9 @@
import collections
import itertools
import re
import requests
from urllib.request import urlopen, Request
from urllib.parse import urlencode
import json as j
from pyparsing import ParseException

from rdflib import Variable, Graph, BNode, URIRef, Literal
Expand Down Expand Up @@ -313,13 +315,11 @@ def evalServiceQuery(ctx, part):
}
# GET is easier to cache so prefer that if the query is not to long
if len(service_query) < 600:
response = requests.get(service_url, params=query_settings, headers=headers)
response = urlopen(Request(service_url + "?" + urlencode(query_settings), headers=headers))
else:
response = requests.post(
service_url, params=query_settings, headers=headers
)
if response.status_code == 200:
json = response.json()
response = urlopen(Request(service_url, data=urlencode(query_settings).encode(), headers=headers))
if response.status == 200:
json = j.loads(response.read())
variables = res["vars_"] = json["head"]["vars"]
# or just return the bindings?
res = json["results"]["bindings"]
Expand All @@ -329,7 +329,7 @@ def evalServiceQuery(ctx, part):
yield bound
else:
raise Exception(
"Service: %s responded with code: %s", service_url, response.status_code
"Service: %s responded with code: %s", service_url, response.status
)


Expand Down
77 changes: 36 additions & 41 deletions rdflib/plugins/stores/sparqlconnector.py
Original file line number Diff line number Diff line change
@@ -1,12 +1,13 @@
import logging
import threading
import requests

import os
from urllib.request import urlopen, Request
from urllib.parse import urlencode
from urllib.error import HTTPError, URLError
import base64

from io import BytesIO

from rdflib.query import Result
from rdflib import BNode

log = logging.getLogger(__name__)

Expand All @@ -26,7 +27,6 @@ class SPARQLConnectorException(Exception):


class SPARQLConnector(object):

"""
this class deals with nitty gritty details of talking to a SPARQL server
"""
Expand All @@ -37,29 +37,26 @@ def __init__(
update_endpoint=None,
returnFormat="xml",
method="GET",
auth=None,
**kwargs
):
"""
Any additional keyword arguments will be passed to requests, and can be used to setup timesouts, basic auth, etc.
auth, if present, must be a tuple of (username, password) used for Basic Authentication

Any additional keyword arguments will be passed to to the request, and can be used to setup timesouts etc.
"""

self.returnFormat = returnFormat
self.query_endpoint = query_endpoint
self.update_endpoint = update_endpoint
self.kwargs = kwargs
self.method = method

# it is recommended to have one session object per thread/process. This assures that is the case.
# https://github.com/kennethreitz/requests/issues/1871

self._session = threading.local()

@property
def session(self):
k = "session_%d" % os.getpid()
self._session.__dict__.setdefault(k, requests.Session())
log.debug("Session %s %s", os.getpid(), id(self._session.__dict__[k]))
return self._session.__dict__[k]
if auth is not None:
assert type(auth) == tuple, "auth must be a tuple"
assert len(auth) == 2, "auth must be a tuple (user, password)"
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In my eyes it is no good practice to have assert in production code, so it should not be in a library.
The Python reference states:

The current code generator emits no code for an assert statement when optimization is requested at compile time.
https://docs.python.org/3/reference/simple_stmts.html#assert

So I think it should be replaced by if and raise and exception.

base64string = base64.b64encode(bytes('%s:%s' % auth, 'ascii'))
self.kwargs.setdefault("headers", {})
self.kwargs["headers"].update({"Authorization": "Basic %s" % base64string.decode('utf-8')})

@property
def method(self):
Expand All @@ -72,19 +69,18 @@ def method(self, method):

self._method = method

def query(self, query, default_graph=None):

def query(self, query, default_graph: str = None, named_graph: str = None):
if not self.query_endpoint:
raise SPARQLConnectorException("Query endpoint not set!")

params = {"query": query}
if default_graph:
# this test ensures we don't have a useless (BNode) default graph URI, which calls to Graph().query() will add
if default_graph is not None and type(default_graph) != BNode:
params["default-graph-uri"] = default_graph

headers = {"Accept": _response_mime_types[self.returnFormat]}

args = dict(self.kwargs)
args.update(url=self.query_endpoint)

# merge params/headers dicts
args.setdefault("params", {})
Expand All @@ -94,47 +90,46 @@ def query(self, query, default_graph=None):

if self.method == "GET":
args["params"].update(params)
qsa = "?" + urlencode(args["params"])
try:
res = urlopen(Request(self.query_endpoint + qsa, headers=args["headers"]))
except Exception as e:
raise ValueError("You did something wrong formulating either the URI or your SPARQL query")
elif self.method == "POST":
args["headers"].update({"Content-Type": "application/sparql-query"})
args["data"] = params
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why don't we need params in args["data"] anymore?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The only params allowed for a POST SPARQL query is the query itself, as per the spec, so we don't allow any others. Auth params for secure endpoints I've catered for with a separate auth param.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

We still need to submit default-graph-uri and named-graph-uri parameters. So the params is still required.

Also I think we should support both application/x-www-form-urlencoded and application/sparql-query content-types. But I'm unsure how we would determine which content-type is supported by the store we are speaking with.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The params attribute is still required or at least the information about the default-graph.

If you consider the following:

Load some context aware quad store (e.g. the QuitStore) with the following data:

graph <http://example.org/> {
    <http://example.org/ExampleInstance> a <http://example.org/Example>
}
graph <http://othergraph.org/> {
    <http://example.org/OtherInstance> a <http://example.org/Example>
}

The query endpoint is assumed to be available at http://localhost:5000/sparql

class SPARQLStoreQuitStoreTestCase(unittest.TestCase):
    store_name = "SPARQLStore"
    path = "http://localhost:5000/sparql"
    create = False

    def setUp(self):
        store = SPARQLStore(query_endpoint=self.path, method="POST")
        self.conjunctivegraph = ConjunctiveGraph(store=store)

    def tearDown(self):
        self.conjunctivegraph.close()

    def test_Query(self):
        query = "select distinct ?inst where {?inst a <http://example.org/Example>}"
        graph = self.conjunctivegraph.get_context(URIRef("http://example.org/"))
        res = graph.query(query, initNs={})
        assert len(res) == 1, len(res)
        for i in res:
            assert type(i[0]) == URIRef, i[0].n3()
            assert i[0] == URIRef("http://example.org/ExampleInstance"), i[0].n3()

The query is executed as POST request but does not convey the information about the default graph.

This issue is a combination of removing this line and sending the wrong Content-Type.
Actually, what the request was doing before this change and the merge of #1022 was sending a proper POST request with a application/x-www-form-urlencoded content type according to the SPARQL 1.1 Protocol (query via URL-encoded POST; https://www.w3.org/TR/2013/REC-sparql11-protocol-20130321/#query-operation).

try:
res = urlopen(Request(self.query_endpoint, data=query.encode(), headers=args["headers"]))
except HTTPError as e:
return e.code, str(e), None
else:
raise SPARQLConnectorException("Unknown method %s" % self.method)

res = self.session.request(self.method, **args)

res.raise_for_status()

return Result.parse(
BytesIO(res.content), content_type=res.headers["Content-type"]
BytesIO(res.read()), content_type=res.headers["Content-Type"].split(";")[0]
)

def update(self, update, default_graph=None):
def update(self, query, default_graph: str = None, named_graph: str = None):
if not self.update_endpoint:
raise SPARQLConnectorException("Query endpoint not set!")

params = {}

if default_graph:
if default_graph is not None:
params["using-graph-uri"] = default_graph

if named_graph is not None:
params["using-named-graph-uri"] = default_graph

headers = {
"Accept": _response_mime_types[self.returnFormat],
"Content-Type": "application/sparql-update",
}

args = dict(self.kwargs)

args.update(url=self.update_endpoint, data=update.encode("utf-8"))
args = dict(self.kwargs) # other QSAs

# merge params/headers dicts
args.setdefault("params", {})
args["params"].update(params)
args.setdefault("headers", {})
args["headers"].update(headers)

res = self.session.post(**args)

res.raise_for_status()

def close(self):
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why do we remove close() method?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because the Store is actually stateless with each operation and there's nothing to close. I originally dropped open() too but it's needed if the Store is called by Graph("SPARQLStore") as opposed to SPARQLStore()

self.session.close()
qsa = "?" + urlencode(args["params"])
res = urlopen(Request(self.update_endpoint + qsa, data=query.encode(), headers=args["headers"]))
Loading