Skip to content

Conversation

Yashsethi24
Copy link

Hey! Excellent work.
But can you please share the code which you used to get the annotations of the images?

@vladamisici
Copy link

vladamisici commented May 23, 2025

Hey! Excellent work. But can you please share the code which you used to get the annotations of the images?

Hi. Here's a snippet for that:

#!/usr/bin/env python3
"""
Usage:
    python publaynet_annotations_extractor.py --input path/to/train.json [--output annotations.json] [--limit 100]

"""
import json
import argparse
from pathlib import Path

from pycocotools.coco import COCO

# PubLayNet uses COCO categories but with document layout labels
CATEGORY_NAMES = {
    1: "text",
    2: "title",
    3: "list",
    4: "table",
    5: "figure",
}

def parse_args():
    parser = argparse.ArgumentParser(
        description="Load PubLayNet annotations and extract per-image annotation lists."
    )
    parser.add_argument(
        "-i", "--input", required=True,
        help="Path to the COCO-format PubLayNet JSON (train.json, val.json, or test.json)"
    )
    parser.add_argument(
        "-o", "--output", default="publaynet_image_annotations.json",
        help="Where to save the output JSON mapping"
    )
    parser.add_argument(
        "-l", "--limit", type=int,
        help="Process only the first N images (for quick testing)"
    )
    return parser.parse_args()

def load_coco_annotations(json_path: Path) -> COCO:
    """
    Load and return a COCO object from the given JSON file.
    """
    if not json_path.exists():
        raise FileNotFoundError(f"Annotation file not found: {json_path}")
    return COCO(str(json_path))

def gather_image_annotations(coco: COCO, limit: int = None) -> dict:
    """
    Return a dict mapping image filenames to their list of annotations.

    Each annotation contains:
      - bbox: [x, y, width, height]
      - category: human-readable name
      - area: float
      - iscrowd: 0 or 1
    """
    image_ids = coco.getImgIds()
    if limit:
        image_ids = image_ids[:limit]

    records = {}
    for img_id in image_ids:
        info = coco.loadImgs(img_id)[0]
        filename = info.get("file_name", f"<id_{img_id}>")

        ann_ids = coco.getAnnIds(imgIds=img_id)
        anns = coco.loadAnns(ann_ids)

        annotations = []
        for ann in anns:
            annotations.append({
                "bbox": ann["bbox"],
                "category": CATEGORY_NAMES.get(ann["category_id"], "unknown"),
                "area": ann.get("area", 0),
                "iscrowd": ann.get("iscrowd", 0),
            })

        records[filename] = annotations

    return records

def save_annotations(data: dict, out_path: Path) -> None:
    """
    Write the collected annotation data to a JSON file.
    """
    with out_path.open("w", encoding="utf-8") as f:
        json.dump(data, f, indent=2)
    print(f"Saved annotations for {len(data)} images to {out_path}")

def main():
    args = parse_args()
    input_path = Path(args.input)
    output_path = Path(args.output)

    print(f"Loading annotations from {input_path}...")
    coco = load_coco_annotations(input_path)

    print("Gathering per-image annotation data...")
    image_data = gather_image_annotations(coco, limit=args.limit)

    save_annotations(image_data, output_path)

if __name__ == "__main__":
    main()

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

Projects

None yet

Development

Successfully merging this pull request may close these issues.

3 participants