Skip to content

Commit be39a92

Browse files
authored
Duplicate samples generate an error (#207)
Note: distinct samples with duplicate IDs (provided or derived) are disambigutated. Duplicate samples, i.e. those that are the result of a copy/paste, are the only ones that generate an error. Fix for #201
1 parent 3703e7f commit be39a92

File tree

5 files changed

+56
-14
lines changed

5 files changed

+56
-14
lines changed

packages/gapic-generator/gapic/generator/generator.py

Lines changed: 19 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,7 @@
2121
from collections import (OrderedDict, defaultdict)
2222
from gapic.samplegen_utils.utils import (
2323
coerce_response_name, is_valid_sample_cfg)
24-
from gapic.samplegen_utils.types import InvalidConfig
24+
from gapic.samplegen_utils.types import (InvalidConfig, DuplicateSample)
2525
from gapic.samplegen import (manifest, samplegen)
2626
from gapic.generator import options
2727
from gapic.generator import formatter
@@ -118,7 +118,12 @@ def _generate_samples_and_manifest(
118118
Returns:
119119
Dict[str, CodeGeneratorResponse.File]: A dict mapping filepath to rendered file.
120120
"""
121-
id_to_samples: DefaultDict[str, List[Any]] = defaultdict(list)
121+
# The two-layer data structure lets us do two things:
122+
# * detect duplicate samples, which is an error
123+
# * detect distinct samples with the same ID, which are disambiguated
124+
id_to_hash_to_spec: DefaultDict[str, Dict[str, Any]] = defaultdict(
125+
dict)
126+
122127
for config_fpath in self._sample_configs:
123128
with open(config_fpath) as f:
124129
configs = yaml.safe_load_all(f.read())
@@ -137,23 +142,27 @@ def _generate_samples_and_manifest(
137142
#
138143
# Ideally the sample author should pick a descriptive, unique ID,
139144
# but this may be impractical and can be error-prone.
145+
spec_hash = sha256(str(spec).encode('utf8')).hexdigest()[:8]
140146
sample_id = (spec.get("id")
141147
or spec.get("region_tag")
142-
or sha256(str(spec).encode('utf8')).hexdigest()[:8])
143-
148+
or spec_hash)
144149
spec["id"] = sample_id
145-
id_to_samples[sample_id].append(spec)
150+
151+
hash_to_spec = id_to_hash_to_spec[sample_id]
152+
if spec_hash in hash_to_spec:
153+
raise DuplicateSample(
154+
f"Duplicate samplegen spec found: {spec}")
155+
156+
hash_to_spec[spec_hash] = spec
146157

147158
out_dir = "samples"
148159
fpath_to_spec_and_rendered = {}
149-
for samples in id_to_samples.values():
150-
for spec in samples:
151-
id_is_unique = len(samples) == 1
160+
for hash_to_spec in id_to_hash_to_spec.values():
161+
for spec_hash, spec in hash_to_spec.items():
162+
id_is_unique = len(hash_to_spec) == 1
152163
# The ID is used to generate the file name and by sample tester
153164
# to link filenames to invoked samples. It must be globally unique.
154165
if not id_is_unique:
155-
spec_hash = sha256(
156-
str(spec).encode('utf8')).hexdigest()[:8]
157166
spec["id"] += f"_{spec_hash}"
158167

159168
sample = samplegen.generate_sample(

packages/gapic-generator/gapic/samplegen_utils/types.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,10 @@ class InvalidSampleFpath(SampleError):
8080
pass
8181

8282

83+
class DuplicateSample(SampleError):
84+
pass
85+
86+
8387
class CallingForm(Enum):
8488
Request = auto()
8589
RequestPaged = auto()

packages/gapic-generator/gapic/samplegen_utils/utils.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -67,7 +67,7 @@ def parse_version(version_str: str) -> Tuple[int, ...]:
6767
# Yaml may return a dict, a list, or a str
6868
isinstance(doc, dict)
6969
and doc.get("type") == VALID_CONFIG_TYPE
70-
and parse_version(doc.get(version_token, "")) >= MIN_SCHEMA_VERSION
70+
and parse_version(doc.get(version_token, "")) >= min_version
7171
and doc.get("samples")
7272
)
7373

packages/gapic-generator/tests/unit/generator/test_generator.py

Lines changed: 31 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -433,7 +433,7 @@ def test_samplegen_id_disambiguation(mock_gmtime, mock_generate_sample, fs):
433433
content="\n",
434434
),
435435
CodeGeneratorResponse.File(
436-
name="samples/squid_sample_c8014108.py",
436+
name="samples/squid_sample_55051b38.py",
437437
content="\n",
438438
),
439439
CodeGeneratorResponse.File(
@@ -457,8 +457,8 @@ def test_samplegen_id_disambiguation(mock_gmtime, mock_generate_sample, fs):
457457
path: '{base_path}/squid_sample_91a465c6.py'
458458
region_tag: humboldt_tag
459459
- <<: *python
460-
sample: squid_sample_c8014108
461-
path: '{base_path}/squid_sample_c8014108.py'
460+
sample: squid_sample_55051b38
461+
path: '{base_path}/squid_sample_55051b38.py'
462462
region_tag: squid_sample
463463
- <<: *python
464464
sample: 157884ee
@@ -471,6 +471,34 @@ def test_samplegen_id_disambiguation(mock_gmtime, mock_generate_sample, fs):
471471
assert actual_response == expected_response
472472

473473

474+
def test_generator_duplicate_samples(fs):
475+
config_fpath = "samples.yaml"
476+
fs.create_file(
477+
config_fpath,
478+
contents=dedent(
479+
'''
480+
# Note: the samples are duplicates.
481+
type: com.google.api.codegen.SampleConfigProto
482+
schema_version: 1.2.0
483+
samples:
484+
- id: squid_sample
485+
region_tag: humboldt_tag
486+
rpc: get_squid
487+
- id: squid_sample
488+
region_tag: humboldt_tag
489+
rpc: get_squid
490+
'''
491+
)
492+
)
493+
494+
generator = make_generator('samples=samples.yaml')
495+
generator._env.loader = jinja2.DictLoader({'sample.py.j2': ''})
496+
api_schema = make_api(naming=naming.Naming(name='Mollusc', version='v6'))
497+
498+
with pytest.raises(types.DuplicateSample):
499+
generator.get_response(api_schema=api_schema)
500+
501+
474502
def make_generator(opts_str: str = '') -> generator.Generator:
475503
return generator.Generator(options.Options.build(opts_str))
476504

packages/gapic-generator/tests/unit/samplegen/test_integration.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -273,6 +273,7 @@ def test_generate_sample_config_partial_config(fs):
273273
contents=dedent(
274274
'''
275275
---
276+
# Note: not a valid config because of the type.
276277
type: com.google.api.codegen.SampleConfigPronto
277278
schema_version: 1.2.0
278279
samples:

0 commit comments

Comments
 (0)