|
8 | 8 | import pyjviz |
9 | 9 |
|
10 | 10 | if __name__ == "__main__": |
11 | | - |
12 | 11 | if 1: |
13 | 12 | url = "https://github.com/pyjanitor-devs/pyjanitor/blob/dev/examples/notebooks/dirty_data.xlsx?raw=true" |
14 | | - dirty = pd.read_excel(url, engine = 'openpyxl') |
| 13 | + dirty = pd.read_excel(url, engine="openpyxl") |
15 | 14 | else: |
16 | 15 | dirty = pd.read_excel("../data/dirty_data.xlsx") |
17 | | - |
| 16 | + |
18 | 17 | print(dirty) |
19 | 18 |
|
20 | 19 | with pyjviz.CB("from_dirty_to_clean") as c: |
21 | | - clean = (dirty |
22 | | - .clean_names() |
23 | | - .dropna(axis='columns', how='all') |
24 | | - .dropna(axis='rows', how='all') |
25 | | - .rename(columns={"%_allocated": "percent_allocated", "full_time_": "full_time"}) |
26 | | - .assign(certification = lambda df: df.certification.combine_first(df.certification_1)) |
27 | | - .drop(columns='certification_1')) |
28 | | - clean = clean.assign(hire_date1 = clean.hire_date) # , hire_date = pd.to_datetime(clean.hire_date, unit='D', origin='1899-12-30')) |
29 | | - |
| 20 | + clean = ( |
| 21 | + dirty.clean_names() |
| 22 | + .dropna(axis="columns", how="all") |
| 23 | + .dropna(axis="rows", how="all") |
| 24 | + .rename( |
| 25 | + columns={ |
| 26 | + "%_allocated": "percent_allocated", |
| 27 | + "full_time_": "full_time", |
| 28 | + } |
| 29 | + ) |
| 30 | + .assign( |
| 31 | + certification=lambda df: df.certification.combine_first( |
| 32 | + df.certification_1 |
| 33 | + ) |
| 34 | + ) |
| 35 | + .drop(columns="certification_1") |
| 36 | + ) |
| 37 | + clean = clean.assign( |
| 38 | + hire_date1=clean.hire_date |
| 39 | + ) # , hire_date = pd.to_datetime(clean.hire_date, unit='D', origin='1899-12-30')) |
| 40 | + |
30 | 41 | print(clean) |
31 | 42 |
|
32 | | - pyjviz.save_dot(vertical = True, show_objects = False) |
| 43 | + pyjviz.save_dot(vertical=True, show_objects=False) |
0 commit comments