Reading Export File Example

📘

Code Snippets

Please note these are provided "as is" and up to your to verify correctness for your case.

Example Reading Export File

import json
import requests
import time

def convert(diffgram_all_data_path):

    with open(diffgram_all_data_path, 'r') as readfile:
        all_data = json.load(readfile)
 
    label_map = all_data.pop('label_map')
    all_data.pop('export_info')
    all_data.pop('attribute_groups_reference')

    export_data = []

    for file_id, file_data in all_data.items():

        item = all_data[file_id]
        file = item.get('file')
        if not file:
            continue
            
        width = item.get('image').get('width')
        height = item.get('image').get('height')

        instance_list = item.get('instance_list')
        if not instance_list: continue)

        for instance in instance_list:
               
            xMin = instance.get('x_min')
            yMin = instance.get('y_min')
            xMax = instance.get('x_max')
            yMax = instance.get('y_max')

            new_item = {
                "displayName": label_map[str(instance.get('label_file_id'))],
                "xMin": xMin,
                "yMin": yMin,
                "xMax": xMax,
                "yMax": yMax
                }
            single_file['boundingBoxAnnotations'].append(new_item)

        export_data.append(single_file)

    with open(F'google_format_{time.time()}.jsonl', 'w') as outfile:
        for entry in export_data:
            json.dump(entry, outfile)
            outfile.write('\n')

convert("export.json")

Example Converting To Relative

Swap the instance block in above code with this:

xMin = instance.get('x_min') / width
yMin = instance.get('y_min') / height
xMax = instance.get('x_max') / width
yMax = instance.get('y_max') / height

Example Downloading Assets

import json
import requests
import time
from threading import Thread
from queue import Queue

class EventsWorker(Thread):

    def __init__(self, queue):
        Thread.__init__(self)
        self.queue = queue

    def run(self):
        while True:
            path, file_id = self.queue.get()
            try:
                download_one(path, file_id)
            finally:
                self.queue.task_done()

def download_one(path, file_id):
    r = requests.get(path)  
    with open(F'images/{file_id}.jpg', 'wb') as f:
        f.write(r.content)

def download_to_folder(diffgram_all_data_path):

    file = open(diffgram_all_data_path)
    all_data = json.load(file)
    all_data.pop('export_info')
    all_data.pop('attribute_groups_reference')

    queue = Queue()
    for i in range(25):
	    worker = EventsWorker(queue)
	    worker.daemon = True
	    worker.start()

    for file_id, file_data in all_data.items():

        item = all_data[file_id]
        file = item.get('file')
        if not file:
            continue
        instance_list = item.get('instance_list')
        if not instance_list: continue
        
        queue.put((file.get('blob_url'), file_id))

    queue.join()

download_to_folder("export.json")