Merge remote-tracking branch 'origin' into Statnett-255

nelly-hateva · nelly-hateva · commit bf1d7efbffd0 · 2025-11-27T09:18:45.000+02:00
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -1,12 +1,17 @@
-Next release
+1.4.0-rc1
 ============
 
 * [#255](https://github.com/statnett/Talk2PowerSystem_PM/issues/255): OBO auth flow for Cognite
-* [#255](https://github.com/statnett/Talk2PowerSystem_PM/issues/255): Update the version of `cognite-sdk` from `7.86.0` to `7.89.0`
+* [#255](https://github.com/statnett/Talk2PowerSystem_PM/issues/255): Update the version of `cognite-sdk` from `7.88.0` to `7.89.0`
+
+1.3.0-rc1
+============
+
 * [#256](https://github.com/statnett/Talk2PowerSystem_PM/issues/256): Update the queries for the ontologies and datasets information served from the `__about` endpoint
 * [#251](https://github.com/statnett/Talk2PowerSystem_PM/issues/251): Change N-Shot tool configuration to default to the base GraphDB
-* [#251](https://github.com/statnett/Talk2PowerSystem_PM/issues/251): Change N-Shot tool configuration SPARQL query template
+* [#276](https://github.com/statnett/Talk2PowerSystem_PM/issues/276): Change N-Shot tool configuration SPARQL query template, so that it outputs unique SPARQL queries
 * [#254](https://github.com/statnett/Talk2PowerSystem_PM/issues/254): Update the version of `ttyg` from `1.9.3` to `1.10.0`, so that the chat bot can run without admin access to GraphDB
+* [#254](https://github.com/statnett/Talk2PowerSystem_PM/issues/254): Update the version of `cognite-sdk` from `7.86.0` to `7.88.0`
 
 1.2.0-rc4
 ============
diff --git a/README.adoc b/README.adoc
@@ -98,7 +98,8 @@ Steps to create an official release (from the main branch):
 
 . `conda activate Talk2PowerSystemLLM`
 . `poetry version <major|minor|patch>`.
-. `git add pyproject.toml`
+. Update `CHANGELOG.md`
+. `git add pyproject.toml CHANGELOG.md`
 . `git commit -m "Bumping version from <previous-version> to <current-version>"`
 . `git push -u origin main`
 . Create a release from https://github.com/statnett/Talk2PowerSystem_LLM/releases[the GitHub interface]. The tag and the release title must match the version from poetry!
@@ -115,7 +116,8 @@ For example, if the current version is `1.2.0-rc1`, the next version must be `1.
 If the current version is a development version, then the next pre-release version must follow the semantic versioning convention on how to increment the major,
 minor and patch parts of the version and add `-rc1` at the end.
 For example, if the current version is `1.1.0-dev0` and the next release will be a major one, the next version must be `2.0.0-rc1`.
-. `git add pyproject.toml`
+. Update `CHANGELOG.md`
+. `git add pyproject.toml CHANGELOG.md`
 . `git commit -m "Bumping version from <previous-version> to <current-version>"`
 . `git push -u origin main`
 . Create a release from https://github.com/statnett/Talk2PowerSystem_LLM/releases[the GitHub interface]. The tag and the release title must match the version from poetry!
diff --git a/config/dev+retrieval.yaml b/config/dev+retrieval.yaml
@@ -32,18 +32,22 @@ tools:
       PREFIX retr: <http://www.ontotext.com/connectors/retrieval#>
       PREFIX retr-index: <http://www.ontotext.com/connectors/retrieval/instance#>
       PREFIX qa: <https://www.statnett.no/Talk2PowerSystem/qa#>
-      SELECT ?question ?query {{
-          [] a retr-index:{connector_name} ;
-            retr:query "{query}" ;
-            retr:limit {limit} ;
-            retr:entities ?entity .
-          ?entity retr:score ?score;
-            qa:question ?question.
-          ?template qa:paraphrase ?entity;
-            qa:querySparql ?query.
-          FILTER (?score > {score})
+      SELECT (REPLACE(GROUP_CONCAT(?q; separator="@"), "(.*?)@.*", "$1") AS ?question) ?query {{
+          SELECT ?q ?query ?score {{
+              [] a retr-index:{connector_name} ;
+                  retr:query "{query}" ;
+                  retr:limit 100;
+                  retr:entities ?entity .
+              ?entity retr:score ?score;
+                  qa:question ?q.
+              ?template qa:paraphrase ?entity;
+                  qa:querySparql ?query.
+              FILTER (?score > {score})
+          }}
+          ORDER BY DESC(?score)
       }}
-      ORDER BY DESC(?score)
+      GROUP BY ?query
+      LIMIT {limit}
 llm:
   azure_endpoint: "https://statnett.openai.azure.com/"
   model: "gpt-4.1"
diff --git a/config/ontology/cim-subset-pretty.ttl b/config/ontology/cim-subset-pretty.ttl
diff --git a/docs/AgentConfig.md b/docs/AgentConfig.md
@@ -38,18 +38,22 @@ tools:
       PREFIX retr: <http://www.ontotext.com/connectors/retrieval#>
       PREFIX retr-index: <http://www.ontotext.com/connectors/retrieval/instance#>
       PREFIX qa: <https://www.statnett.no/Talk2PowerSystem/qa#>
-      SELECT ?question ?query {{
-          [] a retr-index:{connector_name} ;
-            retr:query "{query}" ;
-            retr:limit {limit} ;
-            retr:entities ?entity .
-          ?entity retr:score ?score;
-            qa:question ?question.
-          ?template qa:paraphrase ?entity;
-            qa:querySparql ?query.
-          FILTER (?score > {score})
+      SELECT (REPLACE(GROUP_CONCAT(?q; separator="@"), "(.*?)@.*", "$1") AS ?question) ?query {{
+          SELECT ?q ?query ?score {{
+              [] a retr-index:{connector_name} ;
+                  retr:query "{query}" ;
+                  retr:limit 100;
+                  retr:entities ?entity .
+              ?entity retr:score ?score;
+                  qa:question ?q.
+              ?template qa:paraphrase ?entity;
+                  qa:querySparql ?query.
+              FILTER (?score > {score})
+          }}
+          ORDER BY DESC(?score)
       }}
-      ORDER BY DESC(?score)
+      GROUP BY ?query
+      LIMIT {limit}
   cognite:
     base_url: https://statnett.cognitedata.com
     client_name: talk2powersystem
@@ -146,11 +150,11 @@ LIMIT {limit}
 - `tools.cognite.project` - OPTIONAL, DEFAULT=`prod` - Cognite Data Fusion project name.
 One of `dev1`, `dev2`, `dev3`, `test`, `prod` according to [CDF access from RNDP](https://github.com/statnett/Talk2PowerSystem_PM/wiki/CDF-access-from-RNDP).
 - `tools.cognite.client_name` - OPTIONAL, DEFAULT=`talk2powersystem` - Name of the client for logging purposes.
-- `tools.cognite.interactive_client_id` - OPTIONAL - If provided, interactive authentication is used (local run of Jupyter Notebook).
+- `tools.cognite.interactive_client_id` - OPTIONAL - If provided, interactive authentication is used (when you run on a dev machine the backend app with uvicorn or the Jupyter Notebook).
   Otherwise, `tools.cognite.token_file_path` or `tools.cognite.client_secret` must be provided.
 - `tools.cognite.tenant_id` - REQUIRED iff `tools.cognite.interactive_client_id` is present - Azure tenant ID. For example, `a8d61462-f252-44b2-bf6a-d7231960c041`.
-- `tools.cognite.token_file_path` - OPTIONAL - Full path on the disk to the cognite token file (run of Jupyter Notebook on RNDP). For example, `/var/run/secrets/microsoft.com/entra/cognite`.
-* `tools.cognite.client_secret` - OPTIONAL - Client secret for the Cognite confidential application (running from the backend app).
+- `tools.cognite.token_file_path` - OPTIONAL - Full path on the disk to the cognite token file (used when you run the Jupyter Notebook on RNDP). For example, `/var/run/secrets/microsoft.com/entra/cognite`.
+* `tools.cognite.client_secret` - OPTIONAL - Client secret for the Cognite confidential application (used for the backend app running on RNDP).
 
 ## `llm`
 
diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "Talk2PowerSystemLLM"
-version = "1.2.0-rc4"
+version = "1.4.0-rc1"
 description = "Talk to Power System LLM"
 authors = []
 readme = "README.adoc"
@@ -51,5 +51,4 @@ build-backend = "poetry.core.masonry.api"
 [project.scripts]
 evaluation = 'talk2powersystemllm.scripts.run_evaluation:main'
 qa_dataset2rdf = 'talk2powersystemllm.scripts.qa_dataset2rdf:main'
-find_min_retrieval_limit_on_dev = 'talk2powersystemllm.scripts.find_min_retrieval_limit_on_dev:main'
 benchmark_graphdb_ttyg = 'talk2powersystemllm.scripts.benchmark_graphdb_ttyg:main'
diff --git a/src/jupyter_notebooks/Talk2PowerSystem.ipynb b/src/jupyter_notebooks/Talk2PowerSystem.ipynb
@@ -111,7 +111,7 @@
     "\n",
     "\n",
     "conf = RunnableConfig(configurable={\"thread_id\": \"thread-123\"})\n",
-    "messages = {\"messages\": [(\"user\", \"List timeseries\")]}\n",
+    "messages = {\"messages\": [(\"user\", \"List all transformers within substation OSLO.\")]}\n",
     "last_message_id = run_agent(agent, messages, conf)"
    ]
   },
diff --git a/src/talk2powersystemllm/app/trouble.md b/src/talk2powersystemllm/app/trouble.md
@@ -1345,7 +1345,7 @@ Sample Response Body:
       },
       "sample_sparql_queries": {
         "enabled": true,
-        "sparql_query_template": "PREFIX retr: <http://www.ontotext.com/connectors/retrieval#>\nPREFIX retr-index: <http://www.ontotext.com/connectors/retrieval/instance#>\nPREFIX qa: <https://www.statnett.no/Talk2PowerSystem/qa#>\nSELECT ?question ?query {{\n    [] a retr-index:{connector_name} ;\n      retr:query \"{query}\" ;\n      retr:limit {limit} ;\n      retr:entities ?entity .\n    ?entity retr:score ?score;\n      qa:question ?question.\n    ?template qa:paraphrase ?entity;\n      qa:querySparql ?query.\n    FILTER (?score > {score})\n}}\nORDER BY DESC(?score)\n",
+        "sparql_query_template": "PREFIX retr: <http://www.ontotext.com/connectors/retrieval#>\nPREFIX retr-index: <http://www.ontotext.com/connectors/retrieval/instance#>\nPREFIX qa: <https://www.statnett.no/Talk2PowerSystem/qa#>\nSELECT (REPLACE(GROUP_CONCAT(?q; separator=\"@\"), \"(.*?)@.*\", \"$1\") AS ?question) ?query {{\n    SELECT ?q ?query ?score {{\n        [] a retr-index:{connector_name} ;\n            retr:query \"{query}\" ;\n            retr:limit 100;\n            retr:entities ?entity .\n        ?entity retr:score ?score;\n            qa:question ?q.\n        ?template qa:paraphrase ?entity;\n            qa:querySparql ?query.\n        FILTER (?score > {score})\n    }}\n    ORDER BY DESC(?score)\n}}\nGROUP BY ?query\nLIMIT {limit}\n",
         "connector_name": "qa_dataset"
       },
       "retrieve_data_points": {
diff --git a/src/talk2powersystemllm/qa_dataset/min_retrieval_limit.py b/src/talk2powersystemllm/qa_dataset/min_retrieval_limit.py
diff --git a/src/talk2powersystemllm/qa_dataset/qa_dataset2rdf.py b/src/talk2powersystemllm/qa_dataset/qa_dataset2rdf.py
@@ -64,7 +64,8 @@ def build_qa_dataset_graph(split):
             paraphrase_iri = URIRef(f"Paraphrase_{template['template_id']}_{n}", base_ns)
             graph.add((paraphrase_iri, RDF.type, qa_dataset_ns.Paraphrase))
             graph.add((template_iri, qa_dataset_ns.paraphrase, paraphrase_iri))
-            graph.add((paraphrase_iri, qa_dataset_ns.question, Literal(transform_paraphrase(paraphrase))))
-            verify_unique_placeholders(transform_paraphrase(paraphrase))
+            transformed_paraphrase = transform_paraphrase(paraphrase)
+            verify_unique_placeholders(transformed_paraphrase)
+            graph.add((paraphrase_iri, qa_dataset_ns.question, Literal(transformed_paraphrase)))
 
     return graph
diff --git a/src/talk2powersystemllm/scripts/find_min_retrieval_limit_on_dev.py b/src/talk2powersystemllm/scripts/find_min_retrieval_limit_on_dev.py

Original file line number	Diff line number	Diff line change
`@@ -111,7 +111,7 @@`
`111`	`111`	`"\n",`
`112`	`112`	`"\n",`
`113`	`113`	`"conf = RunnableConfig(configurable={\"thread_id\": \"thread-123\"})\n",`
`114`		`- "messages = {\"messages\": [(\"user\", \"List timeseries\")]}\n",`
	`114`	`+ "messages = {\"messages\": [(\"user\", \"List all transformers within substation OSLO.\")]}\n",`
`115`	`115`	`"last_message_id = run_agent(agent, messages, conf)"`
`116`	`116`	`]`
`117`	`117`	`},`