diff --git a/code_generator_TopCPToolkit/boot.sh b/code_generator_TopCPToolkit/boot.sh old mode 100644 new mode 100755 diff --git a/code_generator_TopCPToolkit/servicex/TopCP_code_generator/__init__.py b/code_generator_TopCPToolkit/servicex/TopCP_code_generator/__init__.py index 90ddf5c40..bdbeb82c2 100644 --- a/code_generator_TopCPToolkit/servicex/TopCP_code_generator/__init__.py +++ b/code_generator_TopCPToolkit/servicex/TopCP_code_generator/__init__.py @@ -27,11 +27,21 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os +import json import servicex_codegen from servicex.TopCP_code_generator.request_translator import TopCPTranslator def create_app(test_config=None, provided_translator=None): + allowed_images_json = os.environ.get("TOPCP_ALLOWED_IMAGES") + + if allowed_images_json: + allowed_images = json.loads(allowed_images_json) + assert isinstance(allowed_images, list) + for item in allowed_images: + assert isinstance(item, str) + return servicex_codegen.create_app( test_config, provided_translator=( diff --git a/code_generator_TopCPToolkit/servicex/TopCP_code_generator/query_translate.py b/code_generator_TopCPToolkit/servicex/TopCP_code_generator/query_translate.py index ac7ff9ac3..925c64d71 100644 --- a/code_generator_TopCPToolkit/servicex/TopCP_code_generator/query_translate.py +++ b/code_generator_TopCPToolkit/servicex/TopCP_code_generator/query_translate.py @@ -1,4 +1,3 @@ -import json import os options = { @@ -36,12 +35,15 @@ "ifTrue": ["--no-filter"], "ifFalse": None, }, + "docker_image": { + "properType": str, + "properTypeString": "string", + "optional": True, + }, } -def generate_files_from_query(query, query_file_path): - jquery = json.loads(query) - +def generate_files_from_query(jquery: dict, query_file_path): runTopCommand = [ "runTop_el.py", "-i", @@ -52,16 +54,19 @@ def generate_files_from_query(query, query_file_path): "customConfig", ] - # ensure all keys are specified + # ensure all required keys are specified for key in options: if key not in jquery: + # Skip optional parameters + if options[key].get("optional", False): + continue raise ValueError( key + " must be specified. May be type None or ", options[key]["properTypeString"], ) for key in jquery: - # ensure only aviable options are allowed + # ensure only available options are allowed if key not in options: raise KeyError( key + " is not implemented. Available keys: " + str(options.keys()) @@ -78,7 +83,7 @@ def generate_files_from_query(query, query_file_path): ) # check for reco.yaml, parton.yaml and particle.yaml files - if isinstance(jquery[key], str): + if isinstance(jquery[key], str) and "fileName" in options[key]: with open( os.path.join(query_file_path, options[key]["fileName"]), "w" ) as file: diff --git a/code_generator_TopCPToolkit/servicex/TopCP_code_generator/request_translator.py b/code_generator_TopCPToolkit/servicex/TopCP_code_generator/request_translator.py index 76ebcf0c8..46348a893 100644 --- a/code_generator_TopCPToolkit/servicex/TopCP_code_generator/request_translator.py +++ b/code_generator_TopCPToolkit/servicex/TopCP_code_generator/request_translator.py @@ -26,7 +26,9 @@ # OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import os +import json import shutil + from . import query_translate from servicex_codegen.code_generator import ( CodeGenerator, @@ -35,6 +37,24 @@ ) +def validate_custom_docker_image(image_name: str) -> bool: + allowed_images_json = os.environ.get("TOPCP_ALLOWED_IMAGES") + + if not allowed_images_json: + raise GenerateCodeException("Custom Docker images are not allowed.") + + try: + allowed_prefixes = json.loads(allowed_images_json) + for prefix in allowed_prefixes: + if image_name.startswith(prefix): + return True + + raise GenerateCodeException(f"Custom Docker image '{image_name}' not allowed.") + + except json.JSONDecodeError: + raise GenerateCodeException("TopCP allowed images are improperly configured.") + + class TopCPTranslator(CodeGenerator): # Generate the code. Ignoring caching for now def generate_code(self, query, cache_path: str): @@ -62,11 +82,20 @@ def generate_code(self, query, cache_path: str): capabilities_path = os.environ.get( "CAPABILITIES_PATH", "/home/servicex/transformer_capabilities.json" ) + + jquery = json.loads(query) + query_translate.generate_files_from_query(jquery, query_file_path) + shutil.copyfile( capabilities_path, os.path.join(query_file_path, "transformer_capabilities.json"), ) - query_translate.generate_files_from_query(query, query_file_path) + results = GeneratedFileResult(_hash, query_file_path) + + if "docker_image" in jquery: + docker_image = jquery["docker_image"] + validate_custom_docker_image(docker_image) + results.image = docker_image - return GeneratedFileResult(_hash, query_file_path) + return results diff --git a/code_generator_TopCPToolkit/servicex/templates/transform_single_file.py b/code_generator_TopCPToolkit/servicex/templates/transform_single_file.py index 264b1c51f..7efe01a6d 100644 --- a/code_generator_TopCPToolkit/servicex/templates/transform_single_file.py +++ b/code_generator_TopCPToolkit/servicex/templates/transform_single_file.py @@ -12,21 +12,23 @@ def transform_single_file(file_path: str, output_path: Path, output_format: str) # create input.txt file for event loop and insert file_path as only line with open("input.txt", "w") as f: f.write(file_path) - # move reco.yaml, parton.yaml and particle.yaml if they exit to CONFIG_LOC loacation + + # move reco.yaml, parton.yaml and particle.yaml if they exist to CONFIG_LOC location + config_loc = os.environ.get("CONFIG_LOC", os.getcwd()) if os.path.exists("/generated/reco.yaml"): shutil.copyfile( "/generated/reco.yaml", - os.path.join(os.environ.get("CONFIG_LOC"), "reco.yaml"), + os.path.join(config_loc, "reco.yaml"), ) if os.path.exists("/generated/parton.yaml"): shutil.copyfile( "/generated/parton.yaml", - os.path.join(os.environ.get("CONFIG_LOC"), "parton.yaml"), + os.path.join(config_loc, "parton.yaml"), ) if os.path.exists("/generated/particle.yaml"): shutil.copyfile( "/generated/particle.yaml", - os.path.join(os.environ.get("CONFIG_LOC"), "particle.yaml"), + os.path.join(config_loc, "particle.yaml"), ) generated_transformer.runTop_el() diff --git a/code_generator_TopCPToolkit/tests/test_src.py b/code_generator_TopCPToolkit/tests/test_src.py index edc0bcd94..ae157b6bb 100755 --- a/code_generator_TopCPToolkit/tests/test_src.py +++ b/code_generator_TopCPToolkit/tests/test_src.py @@ -88,6 +88,110 @@ def test_generate_code(): translator.generate_code(query, tmpdirname) +def test_generate_code_with_custom_docker_image(): + os.environ["TEMPLATE_PATH"] = "servicex/templates/transform_single_file.py" + os.environ["CAPABILITIES_PATH"] = "transformer_capabilities.json" + os.environ["TOPCP_ALLOWED_IMAGES"] = '["sslhep/custom_image:"]' + + with tempfile.TemporaryDirectory() as tmpdirname: + translator = TopCPTranslator() + query = ( + '{"reco": "CommonServices:\\n systematicsHistogram: \'listOfSystematics\'\\n\\n' + "PileupReweighting: {}\\n\\nEventCleaning:\\n runEventCleaning: False\\n" + " runGRL: False\\n\\nElectrons:\\n - containerName: 'AnaElectrons'\\n" + " crackVeto: True\\n IFFClassification: {}\\n WorkingPoint:\\n" + " - selectionName: 'loose'\\n identificationWP: 'TightLH'\\n" + " isolationWP: 'NonIso'\\n noEffSF: True\\n" + " - selectionName: 'tight'\\n identificationWP: 'TightLH'\\n" + " isolationWP: 'Tight_VarRad'\\n noEffSF: True\\n" + " PtEtaSelection:\\n minPt: 25000.0\\n maxEta: 2.47\\n" + " useClusterEta: True\\n\\n" + "# After configuring each container, many variables will be saved automatically.\\n" + "Output:\\n treeName: 'reco'\\n vars: []\\n metVars: []\\n containers:\\n" + " # Format should follow: ':'\\n" + " el_: 'AnaElectrons'\\n '': 'EventInfo'\\n commands:\\n" + " # Turn output branches on and off with 'enable' and 'disable'\\n\\n" + 'AddConfigBlocks: []\\n", "parton": null, "particle": null, "max_events": 100, ' + '"no_systematics": true, "no_filter": false, ' + '"docker_image": "sslhep/custom_image:test"}' + ) + + expected_hash = "f30db9cc91520d3fc08cffd95b072634" + result = translator.generate_code(query, tmpdirname) + + # is the generated code at least syntactically valid Python? + try: + exec( + open(os.path.join(result.output_dir, "generated_transformer.py")).read() + ) + except SyntaxError: + pytest.fail("Generated Python is not valid code") + + assert result.hash == expected_hash + assert result.image == "sslhep/custom_image:test" + assert result.output_dir == os.path.join(tmpdirname, expected_hash) + + +def test_generate_code_fails_with_unknown_selection_key(): + os.environ["TEMPLATE_PATH"] = "servicex/templates/transform_single_file.py" + os.environ["CAPABILITIES_PATH"] = "transformer_capabilities.json" + + with tempfile.TemporaryDirectory() as tmpdirname: + translator = TopCPTranslator() + query = ( + '{"reco": "CommonServices:\\n systematicsHistogram: \'listOfSystematics\'\\n\\n' + "PileupReweighting: {}\\n\\nEventCleaning:\\n runEventCleaning: False\\n" + " runGRL: False\\n\\nElectrons:\\n - containerName: 'AnaElectrons'\\n" + " crackVeto: True\\n IFFClassification: {}\\n WorkingPoint:\\n" + " - selectionName: 'loose'\\n identificationWP: 'TightLH'\\n" + " isolationWP: 'NonIso'\\n noEffSF: True\\n" + " - selectionName: 'tight'\\n identificationWP: 'TightLH'\\n" + " isolationWP: 'Tight_VarRad'\\n noEffSF: True\\n" + " PtEtaSelection:\\n minPt: 25000.0\\n maxEta: 2.47\\n" + " useClusterEta: True\\n\\n" + "# After configuring each container, many variables will be saved automatically.\\n" + "Output:\\n treeName: 'reco'\\n vars: []\\n metVars: []\\n containers:\\n" + " # Format should follow: ':'\\n" + " el_: 'AnaElectrons'\\n '': 'EventInfo'\\n commands:\\n" + " # Turn output branches on and off with 'enable' and 'disable'\\n\\n" + 'AddConfigBlocks: []\\n", "parton": null, "particle": null, "max_events": 100, ' + '"no_systematics": true, "no_filter": false, "unknown_key": "unknown_value"}' + ) + + with pytest.raises(KeyError): + translator.generate_code(query, tmpdirname) + + +def test_generate_code_fails_with_missing_required_selection_key(): + os.environ["TEMPLATE_PATH"] = "servicex/templates/transform_single_file.py" + os.environ["CAPABILITIES_PATH"] = "transformer_capabilities.json" + + with tempfile.TemporaryDirectory() as tmpdirname: + translator = TopCPTranslator() + query = ( + '{"reco": "CommonServices:\\n systematicsHistogram: \'listOfSystematics\'\\n\\n' + "PileupReweighting: {}\\n\\nEventCleaning:\\n runEventCleaning: False\\n" + " runGRL: False\\n\\nElectrons:\\n - containerName: 'AnaElectrons'\\n" + " crackVeto: True\\n IFFClassification: {}\\n WorkingPoint:\\n" + " - selectionName: 'loose'\\n identificationWP: 'TightLH'\\n" + " isolationWP: 'NonIso'\\n noEffSF: True\\n" + " - selectionName: 'tight'\\n identificationWP: 'TightLH'\\n" + " isolationWP: 'Tight_VarRad'\\n noEffSF: True\\n" + " PtEtaSelection:\\n minPt: 25000.0\\n maxEta: 2.47\\n" + " useClusterEta: True\\n\\n" + "# After configuring each container, many variables will be saved automatically.\\n" + "Output:\\n treeName: 'reco'\\n vars: []\\n metVars: []\\n containers:\\n" + " # Format should follow: ':'\\n" + " el_: 'AnaElectrons'\\n '': 'EventInfo'\\n commands:\\n" + " # Turn output branches on and off with 'enable' and 'disable'\\n\\n" + 'AddConfigBlocks: []\\n", "parton": null, "particle": null, "max_events": 100, ' + '"no_systematics": true}' + ) + + with pytest.raises(ValueError): + translator.generate_code(query, tmpdirname) + + def test_app(): import servicex.TopCP_code_generator diff --git a/code_generator_TopCPToolkit/tests/test_validate_custom_docker_image.py b/code_generator_TopCPToolkit/tests/test_validate_custom_docker_image.py new file mode 100644 index 000000000..4320e5f2c --- /dev/null +++ b/code_generator_TopCPToolkit/tests/test_validate_custom_docker_image.py @@ -0,0 +1,59 @@ +import pytest +from pytest import MonkeyPatch +from servicex.TopCP_code_generator.request_translator import ( + validate_custom_docker_image, +) +from servicex_codegen.code_generator import GenerateCodeException + + +class TestValidateCustomDockerImage: + """Tests for the validate_custom_docker_image function""" + + def test_validate_with_matching_prefix(self, monkeypatch: MonkeyPatch): + monkeypatch.setenv( + "TOPCP_ALLOWED_IMAGES", '["sslhep/servicex_science_image_topcp:"]' + ) + result = validate_custom_docker_image( + "sslhep/servicex_science_image_topcp:2.17.0" + ) + assert result is True + + def test_validate_with_multiple_prefixes(self, monkeypatch: MonkeyPatch): + """Test validation with multiple allowed prefixes""" + monkeypatch.setenv( + "TOPCP_ALLOWED_IMAGES", + '["sslhep/servicex_science_image_topcp:", "docker.io/ssl-hep/"]', + ) + assert ( + validate_custom_docker_image("sslhep/servicex_science_image_topcp:latest") + is True + ) + assert validate_custom_docker_image("docker.io/ssl-hep/custom:v1") is True + + def test_validate_with_no_matching_prefix(self, monkeypatch: MonkeyPatch): + """Test validation fails when image doesn't match any allowed prefix""" + monkeypatch.setenv( + "TOPCP_ALLOWED_IMAGES", '["sslhep/servicex_science_image_topcp:"]' + ) + with pytest.raises(GenerateCodeException, match="not allowed"): + validate_custom_docker_image("unauthorized/image:latest") + + def test_validate_with_no_env_variable(self, monkeypatch: MonkeyPatch): + """Test validation fails when TOPCP_ALLOWED_IMAGES is not set""" + monkeypatch.delenv("TOPCP_ALLOWED_IMAGES") + with pytest.raises( + GenerateCodeException, match="Custom Docker images are not allowed." + ): + validate_custom_docker_image("sslhep/servicex_science_image_topcp:2.17.0") + + def test_validate_with_invalid_json(self, monkeypatch: MonkeyPatch): + """Test validation fails with invalid JSON in env variable""" + monkeypatch.setenv("TOPCP_ALLOWED_IMAGES", "not-valid-json") + with pytest.raises(GenerateCodeException, match="improperly configured"): + validate_custom_docker_image("sslhep/servicex_science_image_topcp:2.17.0") + + def test_validate_with_empty_list(self, monkeypatch: MonkeyPatch): + """Test validation fails when allowed list is empty""" + monkeypatch.setenv("TOPCP_ALLOWED_IMAGES", "[]") + with pytest.raises(GenerateCodeException, match="not allowed"): + validate_custom_docker_image("sslhep/servicex_science_image_topcp:2.17.0") diff --git a/docs/deployment/reference.md b/docs/deployment/reference.md index 77e62c00c..cee82e634 100644 --- a/docs/deployment/reference.md +++ b/docs/deployment/reference.md @@ -74,6 +74,13 @@ parameters for the [rabbitMQ](https://github.com/bitnami/charts/tree/master/bitn | `codegen.cmssw-5-3-32.enabled` | Deploy the CMS AOD code generator? - also all of the code gen settings above are available | true | | `codegen.atlasr21.enabled` | Deploy the ATLAS FuncADL Release 21 code generator? - also all of the code gen settings above are available | true | | `codegen.atlasr22.enabled` | Deploy the ATLAS FuncADL Release 22 code generator? - also all of the code gen settings above are available | true | +| `codegen.topcp.enabled` | Deploy the TopCP code generator? | true | +| `codegen.topcp.image` | TopCP ode generator image | `sslhep/servicex_code_gen_topcp` | +| `codegen.topcp.pullPolicy` | TopCP code generator image pull policy | Always | +| `codegen.topcp.tag` | TopCP code generator image tag | develop | +| `codegen.topcp.defaultScienceContainerImage` | The default image used by a TopCP transformer container | sslhep/servicex_science_image_topcp | +| `codegen.topcp.defaultScienceContainerTag` | The default tag used by a TopCP transformer container | 2.17.0-25.2.45 | +| `codegen.topcp.allowedImages` | A list of strings, of which one must be a valid prefix of a submitted custom TopCP docker image | - "sslhep/servicex_science_image_topcp:" | | `codegen.python.enabled` | Deploy the python uproot code generator? - also all of the code gen settings, above are available | true | | `x509Secrets.image` | X509 Secret Service image name | `sslhep/x509-secrets` | | `x509Secrets.tag` | X509 Secret Service image tag | `latest` | diff --git a/helm/servicex/templates/codegen/deployment.yaml b/helm/servicex/templates/codegen/deployment.yaml index 97d225ae9..09a8d8228 100644 --- a/helm/servicex/templates/codegen/deployment.yaml +++ b/helm/servicex/templates/codegen/deployment.yaml @@ -22,6 +22,10 @@ spec: env: - name: INSTANCE_NAME value: {{ $.Release.Name }} + {{- if eq $codeGenName "topcp" }} + - name: TOPCP_ALLOWED_IMAGES + value: {{ $.Values.codeGen.topcp.allowedImages | toJson | quote }} + {{- end }} - name: TRANSFORMER_SCIENCE_IMAGE value: {{ .defaultScienceContainerImage }}:{{ .defaultScienceContainerTag }} {{- $compressionAlgorithm := .compressionAlgorithm | default "ZSTD" }} diff --git a/helm/servicex/values.yaml b/helm/servicex/values.yaml index 877df1c03..c7abd00f3 100644 --- a/helm/servicex/values.yaml +++ b/helm/servicex/values.yaml @@ -108,6 +108,8 @@ codeGen: tag: develop defaultScienceContainerImage: sslhep/servicex_science_image_topcp defaultScienceContainerTag: 2.17.0-25.2.45 + allowedImages: + - "sslhep/servicex_science_image_topcp:" didFinder: CERNOpenData: @@ -175,7 +177,6 @@ postgresql: image: repository: bitnamilegacy/os-shell tag: 12-debian-12-r51 - global: postgresql: auth: @@ -200,7 +201,6 @@ rabbitmq: image: repository: bitnamilegacy/os-shell tag: 12-debian-12-r51 - extraConfiguration: |- consumer_timeout = 3600000 diff --git a/servicex_app/servicex_app_test/resources/transformation/test_submit.py b/servicex_app/servicex_app_test/resources/transformation/test_submit.py index 4eba86c1b..79662b398 100644 --- a/servicex_app/servicex_app_test/resources/transformation/test_submit.py +++ b/servicex_app/servicex_app_test/resources/transformation/test_submit.py @@ -41,7 +41,6 @@ class TestSubmitTransformationRequest(ResourceTestBase): - @staticmethod def _generate_transformation_request(**kwargs): request = { @@ -432,7 +431,6 @@ def test_submit_transformation_request_no_docker_check( code_gen_service=mock_codegen, ) with client.application.app_context(): - request = self._generate_transformation_request() response = client.post( "/servicex/transformation", json=request, headers=self.fake_header() @@ -560,7 +558,6 @@ def test_submit_transformation_auth_enabled( extra_config={"ENABLE_AUTH": True}, code_gen_service=mock_codegen ) with client.application.app_context(): - response = client.post( "/servicex/transformation", json=self._generate_transformation_request(),