import os
import json
import collections
import glob
[docs]class Project:
"""Project consists of label groups, and datasets.
Args:
annotations_fp (str):
File path to the exported JSON annotation file.
images_dir (str):
File path to the DICOM images directory.
"""
def __init__(self, annotations_fp=None, images_dir=None):
"""
"""
self.annotations_fp = None
self.images_dir = None
self.label_groups = []
self.datasets = []
if annotations_fp is not None and images_dir is not None:
self.annotations_fp = annotations_fp
self.images_dir = images_dir
with open(self.annotations_fp, "r") as f:
self.data = json.load(f)
for dataset in self.data["datasets"]:
self.datasets.append(Dataset(dataset, images_dir))
for label_group in self.data["labelGroups"]:
self.label_groups.append(LabelGroup(label_group))
else:
print("Error: Missing data or images file paths!")
[docs] def get_label_groups(self):
return self.label_groups
[docs] def show_label_groups(self):
for label_group in self.label_groups:
print("Label Group, Id: %s, Name: %s" % (label_group.id, label_group.name))
label_group.show_labels("\t")
[docs] def get_label_group_by_name(self, label_group_name):
for label_group in self.label_groups:
if label_group.name == label_group_name:
return label_group
return None
[docs] def get_label_group_by_id(self, label_group_id):
for label_group in self.label_groups:
if label_group.id == label_group_id:
return label_group
return None
[docs] def get_datasets(self):
"""Get JSON representation of datasets"""
return self.datasets
[docs] def show_datasets(self):
print("Datasets:")
for dataset in self.datasets:
print("Id: %s, Name: %s" % (dataset.id, dataset.name))
print("")
[docs] def get_dataset_by_name(self, dataset_name):
for dataset in self.datasets:
if dataset.name == dataset_name:
return dataset
raise ValueError(f"Dataset name {dataset_name} does not exist.")
[docs] def get_dataset_by_id(self, dataset_id):
for dataset in self.datasets:
if dataset.id == dataset_id:
return dataset
raise ValueError(f"Dataset id {dataset_id} does not exist.")
[docs] def set_labels_dict(self, labels_dict):
self.classes_dict = self._create_classes_dict(labels_dict)
for dataset in self.datasets:
dataset.classes_dict = self.classes_dict
[docs] def get_label_id_annotation_mode(self, label_id):
"Return label id's annotation mode."
for label_group in self.label_groups:
labels_data = label_group.get_data()["labels"]
for label in labels_data:
if label["id"] == label_id:
return label["annotationMode"]
raise ValueError(f"Label id {label_id} does not exist.")
[docs] def get_label_id_type(self, label_id):
"Return label id's type."
for label_group in self.label_groups:
labels_data = label_group.get_data()["labels"]
for label in labels_data:
if label["id"] == label_id:
return label["type"]
raise ValueError(f"Label id {label_id} does not exist.")
[docs] def get_label_id_scope(self, label_id):
"Return label id's scope."
for label_group in self.label_groups:
labels_data = label_group.get_data()["labels"]
for label in labels_data:
if label["id"] == label_id:
return label["scope"]
raise ValueError(f"Label id {label_id} does not exist.")
def _create_classes_dict(self, labels_dict):
"""Create a dict with label id as key, and a nested dict of class_id, and class_text as \
values, e.g., {'L_v8n': {'class_id': 1, 'class_text': 'Lung Opacity'}}, where L_v8n is \
the label id, with a class_id of 1 and class text of 'Lung Opacity'.
Args:
labels_dict:
dictionary containing label ids, and (user defined) class ids
Returns:
classes dict
"""
classes_dict = {}
for label_id, class_id in labels_dict.items():
for label_group in self.label_groups:
labels_data = label_group.get_data()["labels"]
for label in labels_data:
if label["id"] == label_id:
if class_id == 0 and label["type"].lower() == "local":
raise Exception(
f"{label_id} is a local type, its class id cannot be 0."
)
classes_dict[label_id] = {
"class_id": class_id,
"class_text": label["name"],
"class_annotation_mode": label["annotationMode"],
"scope": label["scope"],
"type": label["type"],
}
if classes_dict.keys() != labels_dict.keys():
in_labels = labels_dict.keys()
out_labels = classes_dict.keys()
diff = set(in_labels).symmetric_difference(out_labels)
raise ValueError(f"Labels {diff} are not valid for this dataset.")
return classes_dict
[docs]class LabelGroup:
"""A label group contains multiple labels.
Each label has properties such id, name, color, type, scope, annotation mode, rad lex tag ids.
Label type:
Global typed annotations apply to the whole instance (e.g., a CT image), while
local typed annotations apply to a part of the image (e.g., ROI bounding box).
Label scope:
Scope can be of study, series, or instance.
Label annotation mode:
Annotation mode can be of bounding boxes, free form, polygon, etc.
"""
def __init__(self, label_group_data):
"""
Args:
label_group (object: json) JSON data for label group
"""
self.label_group_data = label_group_data
self.name = self.label_group_data["name"]
self.id = self.label_group_data["id"]
[docs] def get_data(self):
return self.label_group_data
[docs] def get_labels(self):
"""Get label ids and names """
return [(label["id"], label["name"]) for label in self.label_group_data["labels"]]
[docs] def show_labels(self, print_offset=""):
"""Show labels info"""
print(f"{print_offset}Labels:")
for label in self.label_group_data["labels"]:
print(f"{print_offset}Id: {label['id']}, Name: {label['name']}")
print("")
[docs]class Dataset:
"""A dataset consists of DICOM images and annotations.
Args:
dataset_data:
Dataset json data.
images_dir:
DICOM images directory.
"""
def __init__(self, dataset_data, images_dir):
self.dataset_data = dataset_data
self.images_dir = images_dir
self.id = dataset_data["id"]
self.name = dataset_data["name"]
self.all_annotations = dataset_data["annotations"]
self.image_ids = None
self.classes_dict = None
self.imgs_anns_dict = None
# all image ids
self.all_image_ids = glob.glob(os.path.join(self.images_dir, "**/*.dcm"), recursive=True)
[docs] def prepare(self):
if self.classes_dict is None:
raise Exception("Use `Project.set_labels_dict()` to set labels.")
label_ids = self.classes_dict.keys()
# filter annotations by label ids
ann_filtered = self.get_annotations(label_ids)
self.imgs_anns_dict = self._associate_images_and_annotations(ann_filtered)
[docs] def get_annotations(self, label_ids=None, verbose=False):
"""Returns annotations, filtered by label ids.
Args:
label_ids (optional):
Filter returned annotations by matching label ids.
verbose (optional:
Print debug messages.
"""
if label_ids is None:
if verbose:
print("Dataset contains %d annotations." % len(self.all_annotations))
return self.all_annotations
ann_filtered = [a for a in self.all_annotations if a["labelId"] in label_ids]
if verbose:
print(
f"Dataset contains {len(ann_filtered)} annotations"
+ f", filtered by label ids {label_ids}."
)
return ann_filtered
def _generate_uid(self, ann):
"""Generate an unique image identifier based on the DICOM file structure.
Args:
ann (list):
List of annotations.
Returns:
A unique image identifier based on the DICOM file structure.
"""
uid = None
if "StudyInstanceUID" and "SeriesInstanceUID" and "SOPInstanceUID" in ann:
# SOPInstanceUID aka image level
uid = os.path.join(
self.images_dir,
ann["StudyInstanceUID"],
ann["SeriesInstanceUID"],
ann["SOPInstanceUID"] + ".dcm",
)
return uid
elif "StudyInstanceUID" and "SeriesInstanceUID" in ann:
prefix = os.path.join(
self.images_dir, ann["StudyInstanceUID"], ann["SeriesInstanceUID"]
)
uid = [image_id for image_id in self.all_image_ids if image_id.startswith(prefix)]
return uid
elif "StudyInstanceUID" in ann:
prefix = os.path.join(self.images_dir, ann["StudyInstanceUID"])
uid = [image_id for image_id in self.all_image_ids if image_id.startswith(prefix)]
return uid
else:
raise ValueError(f"Unable to create UID from {ann}")
[docs] def get_image_ids(self, verbose=False):
"""Returns image ids. Must call prepare() method first in order to generate image ids.
Args:
verbose (Optional):
Print debug message.
"""
if not self.image_ids:
raise Exception("Call project.prepare() first.")
if verbose:
print(
f"Dataset contains {len(self.image_ids)} images"
+ f", filtered by label ids {self.classes_dict.keys()}."
)
return self.image_ids
def _generate_image_ids(self, anns):
"""Get images ids for annotations.
Args:
ann (list):
List of image ids.
Returns:
A list of image ids.
"""
image_ids = set()
for ann in anns:
uid = self._generate_uid(ann)
if uid:
if isinstance(uid, list):
for one_uid in uid:
image_ids.add(one_uid)
else:
image_ids.add(uid)
# image_ids = glob.glob(os.path.join(self.images_dir, "**/*.dcm"), recursive=True)
return sorted(list(image_ids))
[docs] def get_annotations_by_image_id(self, image_id):
if image_id not in self.image_ids:
raise ValueError(f"Image id {image_id} is not found in dataset {self.name}.")
return self.imgs_anns_dict[image_id]
def _associate_images_and_annotations(self, anns):
"""Generate image ids to annotations mapping.
Each image can have zero or more annotations.
Args:
anns (list):
List of annotations.
Returns:
A dictionary with image ids as keys and annotations as values.
"""
self.image_ids = self._generate_image_ids(anns)
# empty dictionary with image ids as keys
imgs_anns_dict = collections.OrderedDict()
imgs_anns_dict = {fp: [] for fp in self.image_ids}
for ann in anns:
uid = self._generate_uid(ann)
if uid:
if isinstance(uid, list):
for one_uid in uid:
imgs_anns_dict[one_uid].append(ann)
else:
imgs_anns_dict[uid].append(ann)
return imgs_anns_dict
[docs] def class_id_to_class_text(self, class_id):
for k, v in self.classes_dict.items():
if v["class_id"] == class_id:
return v["class_text"]
raise Exception(f"class_id {class_id} is invalid.")
[docs] def class_text_to_class_id(self, class_text):
for k, v in self.classes_dict.items():
if v["class_text"] == class_text:
return v["class_id"]
raise Exception(f"class_text {class_text} is invalid.")
[docs] def label_id_to_class_id(self, label_id):
for k, v in self.classes_dict.items():
if k == label_id:
return v["class_id"]
raise Exception(f"label_id {label_id} is invalid.")
[docs] def label_id_to_class_annotation_mode(self, label_id):
for k, v in self.classes_dict.items():
if k == label_id:
return v["class_annotation_mode"]
raise Exception(f"label_id {label_id} is invalid.")
[docs] def show_classes(self):
for k, v in self.classes_dict.items():
print(f"Label id: {k}, Class id: {v['class_id']}, Class text: {v['class_text']}")