Skip to content

Commit d6fa3bf

Browse files
add case to handle indented .json parsing (#175) (#389)
1 parent ff8d5cf commit d6fa3bf

File tree

1 file changed

+14
-7
lines changed

1 file changed

+14
-7
lines changed

openai/validators.py

Lines changed: 14 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -526,14 +526,21 @@ def read_any_format(fname, fields=["prompt", "completion"]):
526526
else:
527527
pass # this is what we expect for a .jsonl file
528528
elif fname.lower().endswith(".json"):
529-
df = pd.read_json(fname, lines=True, dtype=str).fillna("")
530-
if len(df) == 1:
531-
# this is what we expect for a .json file
529+
try:
530+
# to handle case where .json file is actually a .jsonl file
531+
df = pd.read_json(fname, lines=True, dtype=str).fillna("")
532+
if len(df) == 1:
533+
# this code path corresponds to a .json file that has one line
534+
df = pd.read_json(fname, dtype=str).fillna("")
535+
else:
536+
# this is NOT what we expect for a .json file
537+
immediate_msg = "\n- Your JSON file appears to be in a JSONL format. Your file will be converted to JSONL format"
538+
necessary_msg = (
539+
"Your format `JSON` will be converted to `JSONL`"
540+
)
541+
except ValueError:
542+
# this code path corresponds to a .json file that has multiple lines (i.e. it is indented)
532543
df = pd.read_json(fname, dtype=str).fillna("")
533-
else:
534-
# this is NOT what we expect for a .json file
535-
immediate_msg = "\n- Your JSON file appears to be in a JSONL format. Your file will be converted to JSONL format"
536-
necessary_msg = "Your format `JSON` will be converted to `JSONL`"
537544
else:
538545
error_msg = "Your file must have one of the following extensions: .CSV, .TSV, .XLSX, .TXT, .JSON or .JSONL"
539546
if "." in fname:

0 commit comments

Comments
 (0)