Skip to content

[mypyc] Implement dict copy primitive #9721

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 1 commit into from
Feb 20, 2021
Merged

Conversation

vsakkas
Copy link
Contributor

@vsakkas vsakkas commented Nov 14, 2020

Description

Implements dict copy primitive for improved performance.

Related ticket: mypyc/mypyc#644

Test Plan

Ensure that both copying an empty and an non-empty dict works as expected, meaning that the original and the copied dicts contain the same key-values.

Generated IR

The following script was used:

d = {'a': 1, 'b': 2, 'c': 3}
c = d.copy()
print(c)

Master branch:

def __top_level__():
    r0, r1 :: object
    r2 :: bit
    r3 :: str
    r4 :: object
    r5, r6, r7 :: str
    r8, r9, r10 :: object
    r11, r12 :: dict
    r13 :: str
    r14 :: int32
    r15 :: bit
    r16 :: dict
    r17 :: str
    r18 :: object
    r19 :: dict
    r20 :: str
    r21 :: object
    r22, r23 :: dict
    r24 :: str
    r25 :: int32
    r26 :: bit
    r27 :: dict
    r28 :: str
    r29 :: object
    r30 :: dict
    r31 :: object
    r32 :: str
    r33, r34 :: object
    r35 :: None
L0:
    r0 = builtins :: module
    r1 = load_address _Py_NoneStruct
    r2 = r0 != r1
    if r2 goto L3 else goto L1 :: bool
L1:
    r3 = load_global CPyStatic_unicode_0 :: static  ('builtins')
    r4 = PyImport_Import(r3)
    if is_error(r4) goto L15 (error at <module>:-1) else goto L2
L2:
    builtins = r4 :: module
    dec_ref r4
L3:
    r5 = load_global CPyStatic_unicode_1 :: static  ('a')
    r6 = load_global CPyStatic_unicode_2 :: static  ('b')
    r7 = load_global CPyStatic_unicode_3 :: static  ('c')
    r8 = box(short_int, 2)
    r9 = box(short_int, 4)
    r10 = box(short_int, 6)
    r11 = CPyDict_Build(3, r5, r8, r6, r9, r7, r10)
    dec_ref r8
    dec_ref r9
    dec_ref r10
    if is_error(r11) goto L15 (error at <module>:1) else goto L4
L4:
    r12 = program.globals :: static
    r13 = load_global CPyStatic_unicode_4 :: static  ('d')
    r14 = CPyDict_SetItem(r12, r13, r11)
    dec_ref r11
    r15 = r14 >= 0 :: signed
    if not r15 goto L15 (error at <module>:1) else goto L5 :: bool
L5:
    r16 = program.globals :: static
    r17 = load_global CPyStatic_unicode_4 :: static  ('d')
    r18 = CPyDict_GetItem(r16, r17)
    if is_error(r18) goto L15 (error at <module>:2) else goto L6
L6:
    r19 = cast(dict, r18)
    if is_error(r19) goto L15 (error at <module>:2) else goto L7
L7:
    r20 = load_global CPyStatic_unicode_5 :: static  ('copy')
    r21 = CPyObject_CallMethodObjArgs(r19, r20, 0)
    dec_ref r19
    if is_error(r21) goto L15 (error at <module>:2) else goto L8
L8:
    r22 = cast(dict, r21)
    if is_error(r22) goto L15 (error at <module>:2) else goto L9
L9:
    r23 = program.globals :: static
    r24 = load_global CPyStatic_unicode_3 :: static  ('c')
    r25 = CPyDict_SetItem(r23, r24, r22)
    dec_ref r22
    r26 = r25 >= 0 :: signed
    if not r26 goto L15 (error at <module>:2) else goto L10 :: bool
L10:
    r27 = program.globals :: static
    r28 = load_global CPyStatic_unicode_3 :: static  ('c')
    r29 = CPyDict_GetItem(r27, r28)
    if is_error(r29) goto L15 (error at <module>:3) else goto L11
L11:
    r30 = cast(dict, r29)
    if is_error(r30) goto L15 (error at <module>:3) else goto L12
L12:
    r31 = builtins :: module
    r32 = load_global CPyStatic_unicode_6 :: static  ('print')
    r33 = CPyObject_GetAttr(r31, r32)
    if is_error(r33) goto L16 (error at <module>:3) else goto L13
L13:
    r34 = PyObject_CallFunctionObjArgs(r33, r30, 0)
    dec_ref r33
    dec_ref r30
    if is_error(r34) goto L15 (error at <module>:3) else goto L17
L14:
    return 1
L15:
    r35 = <error> :: None
    return r35
L16:
    dec_ref r30
    goto L15
L17:
    dec_ref r34
    goto L14

PR:

def __top_level__():
    r0, r1 :: object
    r2 :: bit
    r3 :: str
    r4 :: object
    r5, r6, r7 :: str
    r8, r9, r10 :: object
    r11, r12 :: dict
    r13 :: str
    r14 :: int32
    r15 :: bit
    r16 :: dict
    r17 :: str
    r18 :: object
    r19, r20, r21 :: dict
    r22 :: str
    r23 :: int32
    r24 :: bit
    r25 :: dict
    r26 :: str
    r27 :: object
    r28 :: dict
    r29 :: object
    r30 :: str
    r31, r32 :: object
    r33 :: None
L0:
    r0 = builtins :: module
    r1 = load_address _Py_NoneStruct
    r2 = r0 != r1
    if r2 goto L3 else goto L1 :: bool
L1:
    r3 = load_global CPyStatic_unicode_0 :: static  ('builtins')
    r4 = PyImport_Import(r3)
    if is_error(r4) goto L14 (error at <module>:-1) else goto L2
L2:
    builtins = r4 :: module
    dec_ref r4
L3:
    r5 = load_global CPyStatic_unicode_1 :: static  ('a')
    r6 = load_global CPyStatic_unicode_2 :: static  ('b')
    r7 = load_global CPyStatic_unicode_3 :: static  ('c')
    r8 = box(short_int, 2)
    r9 = box(short_int, 4)
    r10 = box(short_int, 6)
    r11 = CPyDict_Build(3, r5, r8, r6, r9, r7, r10)
    dec_ref r8
    dec_ref r9
    dec_ref r10
    if is_error(r11) goto L14 (error at <module>:1) else goto L4
L4:
    r12 = program.globals :: static
    r13 = load_global CPyStatic_unicode_4 :: static  ('d')
    r14 = CPyDict_SetItem(r12, r13, r11)
    dec_ref r11
    r15 = r14 >= 0 :: signed
    if not r15 goto L14 (error at <module>:1) else goto L5 :: bool
L5:
    r16 = program.globals :: static
    r17 = load_global CPyStatic_unicode_4 :: static  ('d')
    r18 = CPyDict_GetItem(r16, r17)
    if is_error(r18) goto L14 (error at <module>:2) else goto L6
L6:
    r19 = cast(dict, r18)
    if is_error(r19) goto L14 (error at <module>:2) else goto L7
L7:
    r20 = CPyDict_Copy(r19)
    dec_ref r19
    if is_error(r20) goto L14 (error at <module>:2) else goto L8
L8:
    r21 = program.globals :: static
    r22 = load_global CPyStatic_unicode_3 :: static  ('c')
    r23 = CPyDict_SetItem(r21, r22, r20)
    dec_ref r20
    r24 = r23 >= 0 :: signed
    if not r24 goto L14 (error at <module>:2) else goto L9 :: bool
L9:
    r25 = program.globals :: static
    r26 = load_global CPyStatic_unicode_3 :: static  ('c')
    r27 = CPyDict_GetItem(r25, r26)
    if is_error(r27) goto L14 (error at <module>:3) else goto L10
L10:
    r28 = cast(dict, r27)
    if is_error(r28) goto L14 (error at <module>:3) else goto L11
L11:
    r29 = builtins :: module
    r30 = load_global CPyStatic_unicode_5 :: static  ('print')
    r31 = CPyObject_GetAttr(r29, r30)
    if is_error(r31) goto L15 (error at <module>:3) else goto L12
L12:
    r32 = PyObject_CallFunctionObjArgs(r31, r28, 0)
    dec_ref r31
    dec_ref r28
    if is_error(r32) goto L14 (error at <module>:3) else goto L16
L13:
    return 1
L14:
    r33 = <error> :: None
    return r33
L15:
    dec_ref r28
    goto L14
L16:
    dec_ref r32
    goto L13

Performance

Sample script:

d = {'a': 1, 'b': 2, 'c': 3}
for i in range(100000):
    c = d.copy()

Master branch: 1.09x speedup
PR: 1.27x speedup

@vsakkas vsakkas changed the title Implement dict copy primitive [mypyc] Implement dict copy primitive Nov 14, 2020
@vsakkas vsakkas force-pushed the dict-copy branch 2 times, most recently from 0524209 to 2a4280d Compare November 14, 2020 17:24
@vsakkas
Copy link
Contributor Author

vsakkas commented Nov 14, 2020

It seems that a test case (testTypedDictClassInheritanceWithTotalArgument), which is (seemingly) unrelated to this PR is failing:

Expected:
  main:9: note: Revealed type is 'TypedDict('__main__.C', {'x': builtins.int, 'y'?: builtins.int, 'z': builtins.str})' (diff)
Actual:
  main:9: note: Revealed type is 'TypedDict('__main__.C', {'y'?: builtins.int, 'x': builtins.int, 'z': builtins.str})' (diff)

Any pointers as to why this could be happening?

@vsakkas vsakkas force-pushed the dict-copy branch 2 times, most recently from 9e93f51 to 78f3590 Compare November 17, 2020 20:57
Copy link
Collaborator

@TH3CHARLie TH3CHARLie left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks Good! Thanks! Please wait for another round review from @JukkaL and this can be merged

@vsakkas
Copy link
Contributor Author

vsakkas commented Nov 18, 2020

Results from https://github.com/mypyc/mypyc-benchmarks:

Master:

running dict_copy
..........
interpreted: 0.296086s (avg of 5 iterations; stdev 1.1%)
compiled:    0.185336s (avg of 5 iterations; stdev 0.67%)

compiled is 1.598x faster

PR:

running dict_copy
..........
interpreted: 0.300922s (avg of 5 iterations; stdev 0.42%)
compiled:    0.131764s (avg of 5 iterations; stdev 0.91%)

compiled is 2.284x faster

Copy link
Collaborator

@JukkaL JukkaL left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Looks good, thanks! This is a nice performance improvement for a pretty common operation.

Left some suggestions to improve test coverage (they are recommended but not necessary).

d = {}
assert d.copy() == d
d = {'a': 1, 'b': 2}
assert d.copy() == d
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Ideas for additional checks:

  • Test that the result is not the same object (assert d.copy() is not d).
  • Test that this works with DefaultDict objects when the annotated type is Dict[...], to check for the non-exact code path here.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Added some additional checks. Let me know if it looks ok.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noticed that these tests are in driver.py, which doesn't get compiled, so these aren't actually testing anything useful. The recommended way of writing tests is to avoid driver.py altogether so that everything gets compiled, in part because it's easy to accidentally have an essentially no-op test case. Unfortunately, most of the existing test cases use driver.py.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The test code still needs to be moved away from driver.py. It would probably be easier to create a new test case that doesn't define driver.py at all (see the mypyc developer docs for how to do this).

@vsakkas vsakkas force-pushed the dict-copy branch 3 times, most recently from 61f3fd0 to 2bc1a82 Compare November 20, 2020 21:27
d = {}
assert d.copy() == d
d = {'a': 1, 'b': 2}
assert d.copy() == d
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just noticed that these tests are in driver.py, which doesn't get compiled, so these aren't actually testing anything useful. The recommended way of writing tests is to avoid driver.py altogether so that everything gets compiled, in part because it's easy to accidentally have an essentially no-op test case. Unfortunately, most of the existing test cases use driver.py.

d = {}
assert d.copy() == d
d = {'a': 1, 'b': 2}
assert d.copy() == d
assert d.copy() is not d
d = defaultdict(int)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that this will infer defaultdict as the type, so the new primitive will not be used. Instead you can do something like this to override the inferred type:

dd: Dict[str, int] = defaultdict(int)

Copy link
Contributor Author

@vsakkas vsakkas Nov 22, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

If I understand correctly, would adding some functions for creating the dictionaries and copying them under something like [case testDictCopy] and then calling those functions and using assert under driver.py resolve this issue?

d = {}
assert d.copy() == d
d = {'a': 1, 'b': 2}
assert d.copy() == d
assert d.copy() is not d
d = defaultdict(int)
assert d.copy() == d
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Use a non-empty default dict instance, and check that the type of the return value is correct (assert isinstance(d.copy(), defaultdict).

[case testDictCopy]
from typing import Dict
def f(d: Dict[int, int]) -> Dict[int, int]:
return d.copy() # type: ignore
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add copy() to the test stubs in mypyc/test-data/fixtures/ir.py and remove # type: ignore afterwards? Using # type: ignore can be problematic, as it can hide legitimate errors.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I remember encountering some issues when removing the # type: ignore on CI that did not show up locally, so I ended up adding it as a temporary workaround. Removed it anyways and added copy() on mypyc/test-data/fixtures/ir.py

assert d.copy() is not d # type: ignore
dd: Dict[str, int] = defaultdict(int)
dd['a'] = 1
assert dd.copy() == dd # type: ignore
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It would be good to get rid of these # type: ignore comments as well (see above).

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed.

@TH3CHARLie
Copy link
Collaborator

TH3CHARLie commented Feb 17, 2021

@vsakkas Any followup on this? It's pretty close to be merged.

@vakaras
Copy link
Contributor

vakaras commented Feb 17, 2021

@vakaras Any followup on this? It's pretty close to be merged.

@vsakkas ^^

@TH3CHARLie
Copy link
Collaborator

@vakaras Any followup on this? It's pretty close to be merged.

@vsakkas ^^

Sorry for the misspell!

@vsakkas
Copy link
Contributor Author

vsakkas commented Feb 17, 2021

@TH3CHARLie I'm really sorry for the delay! Things came up and I never got to work on this. Thanks for reminding me.

Copy link
Collaborator

@TH3CHARLie TH3CHARLie left a comment

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

looks good!

Please avoid using force-push next time, it will ruin the commit history and make review difficult

@TH3CHARLie TH3CHARLie merged commit 497556f into python:master Feb 20, 2021
@vsakkas vsakkas deleted the dict-copy branch February 20, 2021 08:35
sthagen added a commit to sthagen/python-mypy that referenced this pull request Feb 20, 2021
Implement dict copy primitive (python#9721)
JukkaL pushed a commit that referenced this pull request Mar 8, 2021
Here are a few updates/fixes after a read through the mypyc docs.

For the corresponding primitive method updates, see:

* str.replace: #10088
* dict.copy: #9721
* dict.clear: #9724
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

Successfully merging this pull request may close these issues.

4 participants