Skip to content

Commit b230002

Browse files
authored
Replace usage of deprecated DataFrame.append method (#495)
* Replace deprecated Pandas append method * update changelog --------- Co-authored-by: Michael Chin <[email protected]>
1 parent 74ed9a0 commit b230002

File tree

3 files changed

+53
-19
lines changed

3 files changed

+53
-19
lines changed

ChangeLog.md

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@ Starting with v1.31.6, this file will contain a record of major features and upd
1111
- Added support for setting `%graph_notebook_vis_options` from a variable ([Link to PR](https://github.com/aws/graph-notebook/pull/487))
1212
- Pinned JupyterLab<4.x to fix Python 3.8/3.10 builds ([Link to PR](https://github.com/aws/graph-notebook/pull/490))
1313
- Changed datatype of "amount" from String to numeric for "Transaction" vertices in Fraud Graph sample notebook ([Link to PR](https://github.com/aws/graph-notebook/pull/489))
14+
- Replaced usages of deprecated DataFrame.append method in ML samples([Link to PR](https://github.com/aws/graph-notebook/pull/495))
1415

1516
## Release 3.8.1 (April 17, 2023)
1617
- Reinstate Python 3.7 support for compatibility with legacy AL1 Neptune Notebooks ([Link to PR](https://github.com/aws/graph-notebook/pull/479))

src/graph_notebook/notebooks/04-Machine-Learning/Sample-Applications/01-People-Analytics/People-Analytics-using-Neptune-ML.ipynb

Lines changed: 44 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,12 @@
332332
"role_dept_list = []\n",
333333
"role_field_list = []\n",
334334
"\n",
335+
"edge_emp_dept_rows_list = [edge_emp_dept]\n",
336+
"edge_emp_role_rows_list = [edge_emp_role]\n",
337+
"edge_emp_field_rows_list = [edge_emp_field]\n",
338+
"edge_role_dept_rows_list = [edge_role_dept]\n",
339+
"edge_role_field_rows_list = [edge_role_field]\n",
340+
"\n",
335341
"for index, row in df.iterrows():\n",
336342
" emp = row['EmployeeNumber']\n",
337343
" emp_id = emp_map[emp]\n",
@@ -341,30 +347,52 @@
341347
" field_id = field_map[field]\n",
342348
" dept = row['Department']\n",
343349
" dept_id = dept_map[dept]\n",
344-
" \n",
345-
" edge_emp_dept = edge_emp_dept.append({'~id': uuid.uuid4(), '~from': emp_id, \n",
346-
" '~to': dept_id, \n",
347-
" '~label': 'works_in'}, ignore_index=True)\n",
348-
" edge_emp_role = edge_emp_role.append({'~id': uuid.uuid4(), '~from': emp_id, \n",
349-
" '~to': role_id, \n",
350-
" '~label': 'works_as'}, ignore_index=True)\n",
351-
" edge_emp_field = edge_emp_field.append({'~id': uuid.uuid4(), '~from': emp_id, \n",
352-
" '~to': field_id, \n",
353-
" '~label': 'has_education_level'}, ignore_index=True)\n",
350+
"\n",
351+
" edge_emp_dept_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(),\n",
352+
" '~from': emp_id,\n",
353+
" '~to': dept_id,\n",
354+
" '~label': 'works_in'},\n",
355+
" orient='index').T\n",
356+
" edge_emp_dept_rows_list.append(edge_emp_dept_row_df)\n",
357+
" edge_emp_role_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(),\n",
358+
" '~from': emp_id,\n",
359+
" '~to': role_id,\n",
360+
" '~label': 'works_as'},\n",
361+
" orient='index').T\n",
362+
" edge_emp_role_rows_list.append(edge_emp_role_row_df)\n",
363+
" edge_emp_field_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(),\n",
364+
" '~from': emp_id,\n",
365+
" '~to': field_id,\n",
366+
" '~label': 'has_education_level'},\n",
367+
" orient='index').T\n",
368+
" edge_emp_field_rows_list.append(edge_emp_field_row_df)\n",
354369
" \n",
355370
" role_dept = f\"{role_id}-{dept_id}\"\n",
356371
" role_field = f\"{role_id}-{field_id}\"\n",
357372
" if role_dept not in role_dept_list:\n",
358-
" edge_role_dept = edge_role_dept.append({'~id': uuid.uuid4(), '~from': role_id, \n",
359-
" '~to': dept_id, \n",
360-
" '~label': 'part_of'}, ignore_index=True)\n",
373+
" edge_role_dept_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(),\n",
374+
" '~from': role_id,\n",
375+
" '~to': dept_id,\n",
376+
" '~label': 'part_of'},\n",
377+
" orient='index').T\n",
378+
" edge_role_dept_rows_list.append(edge_role_dept_row_df)\n",
379+
" #edge_role_dept = pd.concat([edge_role_dept, edge_role_dept_row_df], ignore_index=True)\n",
361380
" role_dept_list.append(role_dept)\n",
362381
" if role_field not in role_field_list:\n",
363-
" edge_role_field = edge_role_field.append({'~id': uuid.uuid4(), '~from': role_id, \n",
364-
" '~to': field_id, \n",
365-
" '~label': 'requires'}, ignore_index=True)\n",
382+
" edge_role_field_row_df = pd.DataFrame.from_dict({'~id': uuid.uuid4(), '~from': role_id,\n",
383+
" '~to': field_id,\n",
384+
" '~label': 'requires'},\n",
385+
" orient='index').T\n",
386+
" edge_role_field_rows_list.append(edge_role_field_row_df)\n",
366387
" role_field_list.append(role_field)\n",
367388
" edge_cnt = edge_cnt + 1\n",
389+
"\n",
390+
"edge_emp_dept = pd.concat(edge_emp_dept_rows_list, ignore_index=True)\n",
391+
"edge_emp_role = pd.concat(edge_emp_role_rows_list, ignore_index=True)\n",
392+
"edge_emp_field = pd.concat(edge_emp_field_rows_list, ignore_index=True)\n",
393+
"edge_role_dept = pd.concat(edge_role_dept_rows_list, ignore_index=True)\n",
394+
"edge_role_field = pd.concat(edge_role_field_rows_list, ignore_index=True)\n",
395+
"\n",
368396
"edge_df = pd.concat([edge_emp_dept, edge_emp_role, edge_emp_field, edge_role_dept, edge_role_field])\n",
369397
"edge_df.to_csv(os.path.join(output_folder, 'edge.csv'), index=False)\n",
370398
"\n",

src/graph_notebook/notebooks/04-Machine-Learning/neptune_ml_utils.py

Lines changed: 8 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -360,18 +360,23 @@ def __process_movies_genres(self):
360360
genre_df['name'] = genre_df['~id']
361361
genre_df.to_csv(os.path.join(self.formatted_directory,
362362
'genre_vertex.csv'), index=False)
363+
genres_edge_df_rows_list = [genres_edges_df]
363364

364365
# Loop through all the movies and pull out the genres
365366
for index, row in movie_genre_df.iterrows():
366367
genre_lst = []
367368
for g in genres:
368369
if row[g] == 1:
369-
genres_edges_df = genres_edges_df.append(
370-
{'~id': f"{row['~id']}-included_in-{g}", '~label': 'included_in',
371-
'~from': row['~id'], '~to': g}, ignore_index=True)
370+
row_as_df = pd.DataFrame.from_dict({'~id': f"{row['~id']}-included_in-{g}",
371+
'~label': 'included_in',
372+
'~from': row['~id'],
373+
'~to': g},
374+
orient='index').T
375+
genres_edge_df_rows_list.append(row_as_df)
372376
genre_lst.append(g)
373377
movies_df.loc[index, 'genre:String[]'] = ';'.join(genre_lst)
374378

379+
genres_edges_df = pd.concat(genres_edge_df_rows_list, ignore_index=True)
375380
# rename the release data column to specify the data type
376381
movies_df['release_date:Date'] = movies_df['release_date']
377382
# Drop the genre columns as well as the uneeded release date columns

0 commit comments

Comments
 (0)