Skip to content

DOC: Fixed example & description for pandas.cut #20069

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 20 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
20 commits
Select commit Hold shift + click to select a range
8a90d6d
Reworked doc string for pandas.cut
ikoevska Mar 8, 2018
690dbc5
Fixed example and extended descr
ikoevska Mar 8, 2018
00f35fb
DOC: Fixed example & description for pandas.cut
ikoevska Mar 8, 2018
f277f15
Merge branch 'patch-1' of https://github.com/ikoevska/pandas into pat…
ikoevska Mar 8, 2018
54df8d3
DOC: Fixed issues with panda.cut after flake8
ikoevska Mar 9, 2018
747501a
DOC: Improve docstring for pandas.Index.repeat (#19985)
alysivji Mar 9, 2018
9119d07
Temporary github PR template for sprint (#20055)
jorisvandenbossche Mar 9, 2018
c730d08
DOC: Update Kurt Docstr (#20044)
WillAyd Mar 9, 2018
cc1b934
BUG: Retain timezone dtype with cut and qcut (#19890)
mroeschke Mar 9, 2018
731d971
Fix typo in apply.py (#20058)
mroeschke Mar 9, 2018
7c14e4f
DOC: Add syntax highlighting to SAS code blocks in comparison_with_sa…
kylebarron Mar 9, 2018
ed96567
TST: series/indexing tests parametrization + moving test methods (#20…
almaleksia Mar 10, 2018
bd31f71
Added 'displayed_only' option to 'read_html' (#20047)
WillAyd Mar 10, 2018
da6f827
Refactored GroupBy ASVs (#20043)
WillAyd Mar 10, 2018
52cffa3
Cythonized GroupBy pct_change (#19919)
WillAyd Mar 10, 2018
4131149
DOC: Extend docstring pandas core index to_frame method (#20036)
stijnvanhoey Mar 10, 2018
2e6b4b1
Reworked doc string for pandas.cut
ikoevska Mar 8, 2018
1d392e4
Fixed example and extended descr
ikoevska Mar 8, 2018
2387be9
DOC: Fixed issues with panda.cut after flake8
ikoevska Mar 9, 2018
db337c1
Merge branch 'patch-1' of https://github.com/ikoevska/pandas into pat…
ikoevska Mar 10, 2018
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions .github/PULL_REQUEST_TEMPLATE.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,27 @@
Checklist for the pandas documentation sprint (ignore this if you are doing
an unrelated PR):

- [ ] PR title is "DOC: update the <your-function-or-method> docstring"
- [ ] The validation script passes: `scripts/validate_docstrings.py <your-function-or-method>`
- [ ] The PEP8 style check passes: `git diff upstream/master -u -- "*.py" | flake8 --diff`
- [ ] The html version looks good: `python doc/make.py --single <your-function-or-method>`
- [ ] It has been proofread on language by another sprint participant

Please include the output of the validation script below between the "```" ticks:

```
# paste output of "scripts/validate_docstrings.py <your-function-or-method>" here
# between the "```" (remove this comment, but keep the "```")

```

If the validation script still gives errors, but you think there is a good reason
to deviate in this case (and there are certainly such cases), please state this
explicitly.


Checklist for other PRs (remove this part if you are doing a PR for the pandas documentation sprint):

- [ ] closes #xxxx
- [ ] tests added / passed
- [ ] passes `git diff upstream/master -u -- "*.py" | flake8 --diff`
Expand Down
137 changes: 58 additions & 79 deletions asv_bench/benchmarks/groupby.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,10 @@
method_blacklist = {
'object': {'median', 'prod', 'sem', 'cumsum', 'sum', 'cummin', 'mean',
'max', 'skew', 'cumprod', 'cummax', 'rank', 'pct_change', 'min',
'var', 'mad', 'describe', 'std'}
'var', 'mad', 'describe', 'std'},
'datetime': {'median', 'prod', 'sem', 'cumsum', 'sum', 'mean', 'skew',
'cumprod', 'cummax', 'pct_change', 'var', 'mad', 'describe',
'std'}
}


Expand Down Expand Up @@ -90,45 +93,6 @@ def time_series_groups(self, data, key):
self.ser.groupby(self.ser).groups


class FirstLast(object):

goal_time = 0.2

param_names = ['dtype']
params = ['float32', 'float64', 'datetime', 'object']

def setup(self, dtype):
N = 10**5
# with datetimes (GH7555)
if dtype == 'datetime':
self.df = DataFrame({'values': date_range('1/1/2011',
periods=N,
freq='s'),
'key': range(N)})
elif dtype == 'object':
self.df = DataFrame({'values': ['foo'] * N,
'key': range(N)})
else:
labels = np.arange(N / 10).repeat(10)
data = Series(np.random.randn(len(labels)), dtype=dtype)
data[::3] = np.nan
data[1::3] = np.nan
labels = labels.take(np.random.permutation(len(labels)))
self.df = DataFrame({'values': data, 'key': labels})

def time_groupby_first(self, dtype):
self.df.groupby('key').first()

def time_groupby_last(self, dtype):
self.df.groupby('key').last()

def time_groupby_nth_all(self, dtype):
self.df.groupby('key').nth(0, dropna='all')

def time_groupby_nth_none(self, dtype):
self.df.groupby('key').nth(0)


class GroupManyLabels(object):

goal_time = 0.2
Expand All @@ -149,39 +113,40 @@ class Nth(object):

goal_time = 0.2

def setup_cache(self):
df = DataFrame(np.random.randint(1, 100, (10000, 2)))
df.iloc[1, 1] = np.nan
return df

def time_frame_nth_any(self, df):
df.groupby(0).nth(0, dropna='any')

def time_frame_nth(self, df):
df.groupby(0).nth(0)

param_names = ['dtype']
params = ['float32', 'float64', 'datetime', 'object']

def time_series_nth_any(self, df):
df[1].groupby(df[0]).nth(0, dropna='any')
def setup(self, dtype):
N = 10**5
# with datetimes (GH7555)
if dtype == 'datetime':
values = date_range('1/1/2011', periods=N, freq='s')
elif dtype == 'object':
values = ['foo'] * N
else:
values = np.arange(N).astype(dtype)

def time_series_nth(self, df):
df[1].groupby(df[0]).nth(0)
key = np.arange(N)
self.df = DataFrame({'key': key, 'values': values})
self.df.iloc[1, 1] = np.nan # insert missing data

def time_frame_nth_any(self, dtype):
self.df.groupby('key').nth(0, dropna='any')

class NthObject(object):
def time_groupby_nth_all(self, dtype):
self.df.groupby('key').nth(0, dropna='all')

goal_time = 0.2
def time_frame_nth(self, dtype):
self.df.groupby('key').nth(0)

def setup_cache(self):
df = DataFrame(np.random.randint(1, 100, (10000,)), columns=['g'])
df['obj'] = ['a'] * 5000 + ['b'] * 5000
return df
def time_series_nth_any(self, dtype):
self.df['values'].groupby(self.df['key']).nth(0, dropna='any')

def time_nth(self, df):
df.groupby('g').nth(5)
def time_groupby_nth_all(self, dtype):
self.df['values'].groupby(self.df['key']).nth(0, dropna='all')

def time_nth_last(self, df):
df.groupby('g').last()
def time_series_nth(self, dtype):
self.df['values'].groupby(self.df['key']).nth(0)


class DateAttributes(object):
Expand Down Expand Up @@ -243,7 +208,7 @@ def time_multi_count(self, df):
df.groupby(['key1', 'key2']).count()


class CountInt(object):
class CountMultiInt(object):

goal_time = 0.2

Expand All @@ -255,18 +220,18 @@ def setup_cache(self):
'ints2': np.random.randint(0, 1000, size=n)})
return df

def time_int_count(self, df):
def time_multi_int_count(self, df):
df.groupby(['key1', 'key2']).count()

def time_int_nunique(self, df):
def time_multi_int_nunique(self, df):
df.groupby(['key1', 'key2']).nunique()


class AggFunctions(object):

goal_time = 0.2

def setup_cache(self):
def setup_cache():
N = 10**5
fac1 = np.array(['A', 'B', 'C'], dtype='O')
fac2 = np.array(['one', 'two'], dtype='O')
Expand Down Expand Up @@ -361,9 +326,6 @@ def setup(self):
def time_multi_size(self):
self.df.groupby(['key1', 'key2']).size()

def time_dt_size(self):
self.df.groupby(['dates']).size()

def time_dt_timegrouper_size(self):
with warnings.catch_warnings(record=True):
self.df.groupby(TimeGrouper(key='dates', freq='M')).size()
Expand All @@ -376,15 +338,16 @@ class GroupByMethods(object):

goal_time = 0.2

param_names = ['dtype', 'method']
params = [['int', 'float', 'object'],
param_names = ['dtype', 'method', 'application']
params = [['int', 'float', 'object', 'datetime'],
['all', 'any', 'bfill', 'count', 'cumcount', 'cummax', 'cummin',
'cumprod', 'cumsum', 'describe', 'ffill', 'first', 'head',
'last', 'mad', 'max', 'min', 'median', 'mean', 'nunique',
'pct_change', 'prod', 'rank', 'sem', 'shift', 'size', 'skew',
'std', 'sum', 'tail', 'unique', 'value_counts', 'var']]
'std', 'sum', 'tail', 'unique', 'value_counts', 'var'],
['direct', 'transformation']]

def setup(self, dtype, method):
def setup(self, dtype, method, application):
if method in method_blacklist.get(dtype, {}):
raise NotImplementedError # skip benchmark
ngroups = 1000
Expand All @@ -398,12 +361,28 @@ def setup(self, dtype, method):
np.random.random(ngroups) * 10.0])
elif dtype == 'object':
key = ['foo'] * size
elif dtype == 'datetime':
key = date_range('1/1/2011', periods=size, freq='s')

df = DataFrame({'values': values, 'key': key})
self.df_groupby_method = getattr(df.groupby('key')['values'], method)

def time_method(self, dtype, method):
self.df_groupby_method()
if application == 'transform':
if method == 'describe':
raise NotImplementedError

self.as_group_method = lambda: df.groupby(
'key')['values'].transform(method)
self.as_field_method = lambda: df.groupby(
'values')['key'].transform(method)
else:
self.as_group_method = getattr(df.groupby('key')['values'], method)
self.as_field_method = getattr(df.groupby('values')['key'], method)

def time_dtype_as_group(self, dtype, method, application):
self.as_group_method()

def time_dtype_as_field(self, dtype, method, application):
self.as_field_method()


class Float32(object):
Expand Down
Loading