Spaces:
Runtime error
Runtime error
| # Copyright (c) OpenMMLab. All rights reserved. | |
| import argparse | |
| import glob | |
| import math | |
| import os | |
| import os.path as osp | |
| import tempfile | |
| import zipfile | |
| import mmcv | |
| import numpy as np | |
| from mmengine.utils import ProgressBar, mkdir_or_exist | |
| def parse_args(): | |
| parser = argparse.ArgumentParser( | |
| description='Convert potsdam dataset to mmsegmentation format') | |
| parser.add_argument('dataset_path', help='potsdam folder path') | |
| parser.add_argument('--tmp_dir', help='path of the temporary directory') | |
| parser.add_argument('-o', '--out_dir', help='output path') | |
| parser.add_argument( | |
| '--clip_size', | |
| type=int, | |
| help='clipped size of image after preparation', | |
| default=512) | |
| parser.add_argument( | |
| '--stride_size', | |
| type=int, | |
| help='stride of clipping original images', | |
| default=256) | |
| args = parser.parse_args() | |
| return args | |
| def clip_big_image(image_path, clip_save_dir, args, to_label=False): | |
| # Original image of Potsdam dataset is very large, thus pre-processing | |
| # of them is adopted. Given fixed clip size and stride size to generate | |
| # clipped image, the intersection of width and height is determined. | |
| # For example, given one 5120 x 5120 original image, the clip size is | |
| # 512 and stride size is 256, thus it would generate 20x20 = 400 images | |
| # whose size are all 512x512. | |
| image = mmcv.imread(image_path) | |
| h, w, c = image.shape | |
| clip_size = args.clip_size | |
| stride_size = args.stride_size | |
| num_rows = math.ceil((h - clip_size) / stride_size) if math.ceil( | |
| (h - clip_size) / | |
| stride_size) * stride_size + clip_size >= h else math.ceil( | |
| (h - clip_size) / stride_size) + 1 | |
| num_cols = math.ceil((w - clip_size) / stride_size) if math.ceil( | |
| (w - clip_size) / | |
| stride_size) * stride_size + clip_size >= w else math.ceil( | |
| (w - clip_size) / stride_size) + 1 | |
| x, y = np.meshgrid(np.arange(num_cols + 1), np.arange(num_rows + 1)) | |
| xmin = x * clip_size | |
| ymin = y * clip_size | |
| xmin = xmin.ravel() | |
| ymin = ymin.ravel() | |
| xmin_offset = np.where(xmin + clip_size > w, w - xmin - clip_size, | |
| np.zeros_like(xmin)) | |
| ymin_offset = np.where(ymin + clip_size > h, h - ymin - clip_size, | |
| np.zeros_like(ymin)) | |
| boxes = np.stack([ | |
| xmin + xmin_offset, ymin + ymin_offset, | |
| np.minimum(xmin + clip_size, w), | |
| np.minimum(ymin + clip_size, h) | |
| ], | |
| axis=1) | |
| if to_label: | |
| color_map = np.array([[0, 0, 0], [255, 255, 255], [255, 0, 0], | |
| [255, 255, 0], [0, 255, 0], [0, 255, 255], | |
| [0, 0, 255]]) | |
| flatten_v = np.matmul( | |
| image.reshape(-1, c), | |
| np.array([2, 3, 4]).reshape(3, 1)) | |
| out = np.zeros_like(flatten_v) | |
| for idx, class_color in enumerate(color_map): | |
| value_idx = np.matmul(class_color, | |
| np.array([2, 3, 4]).reshape(3, 1)) | |
| out[flatten_v == value_idx] = idx | |
| image = out.reshape(h, w) | |
| for box in boxes: | |
| start_x, start_y, end_x, end_y = box | |
| clipped_image = image[start_y:end_y, | |
| start_x:end_x] if to_label else image[ | |
| start_y:end_y, start_x:end_x, :] | |
| idx_i, idx_j = osp.basename(image_path).split('_')[2:4] | |
| mmcv.imwrite( | |
| clipped_image.astype(np.uint8), | |
| osp.join( | |
| clip_save_dir, | |
| f'{idx_i}_{idx_j}_{start_x}_{start_y}_{end_x}_{end_y}.png')) | |
| def main(): | |
| args = parse_args() | |
| splits = { | |
| 'train': [ | |
| '2_10', '2_11', '2_12', '3_10', '3_11', '3_12', '4_10', '4_11', | |
| '4_12', '5_10', '5_11', '5_12', '6_10', '6_11', '6_12', '6_7', | |
| '6_8', '6_9', '7_10', '7_11', '7_12', '7_7', '7_8', '7_9' | |
| ], | |
| 'val': [ | |
| '5_15', '6_15', '6_13', '3_13', '4_14', '6_14', '5_14', '2_13', | |
| '4_15', '2_14', '5_13', '4_13', '3_14', '7_13' | |
| ] | |
| } | |
| dataset_path = args.dataset_path | |
| if args.out_dir is None: | |
| out_dir = osp.join('data', 'potsdam') | |
| else: | |
| out_dir = args.out_dir | |
| print('Making directories...') | |
| mkdir_or_exist(osp.join(out_dir, 'img_dir', 'train')) | |
| mkdir_or_exist(osp.join(out_dir, 'img_dir', 'val')) | |
| mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'train')) | |
| mkdir_or_exist(osp.join(out_dir, 'ann_dir', 'val')) | |
| zipp_list = glob.glob(os.path.join(dataset_path, '*.zip')) | |
| print('Find the data', zipp_list) | |
| for zipp in zipp_list: | |
| with tempfile.TemporaryDirectory(dir=args.tmp_dir) as tmp_dir: | |
| zip_file = zipfile.ZipFile(zipp) | |
| zip_file.extractall(tmp_dir) | |
| src_path_list = glob.glob(os.path.join(tmp_dir, '*.tif')) | |
| if not len(src_path_list): | |
| sub_tmp_dir = os.path.join(tmp_dir, os.listdir(tmp_dir)[0]) | |
| src_path_list = glob.glob(os.path.join(sub_tmp_dir, '*.tif')) | |
| prog_bar = ProgressBar(len(src_path_list)) | |
| for i, src_path in enumerate(src_path_list): | |
| idx_i, idx_j = osp.basename(src_path).split('_')[2:4] | |
| data_type = 'train' if f'{idx_i}_{idx_j}' in splits[ | |
| 'train'] else 'val' | |
| if 'label' in src_path: | |
| dst_dir = osp.join(out_dir, 'ann_dir', data_type) | |
| clip_big_image(src_path, dst_dir, args, to_label=True) | |
| else: | |
| dst_dir = osp.join(out_dir, 'img_dir', data_type) | |
| clip_big_image(src_path, dst_dir, args, to_label=False) | |
| prog_bar.update() | |
| print('Removing the temporary files...') | |
| print('Done!') | |
| if __name__ == '__main__': | |
| main() | |