''' 把gf2021的数据集中的飞机都选出来,即一张图片中若含有飞机模型的,则选出来,同时需要修改xml文件,将 objects属性中的非飞机的object去掉,由于删除的非annotation直系节点,需要转换一下才能删除。 ''' import os import numpy as np import scipy.misc as misc from xml.dom.minidom import Document import numpy as np import copy, cv2 # import imageio import json import glob import xml.etree.cElementTree as ET from xml.etree.ElementTree import ElementTree,Element import shutil import re from osgeo import gdal, gdalconst
defchoose_plane(image_path, xml_path,image_save_path,xml_save_path): plane_label = ["Boeing737","Boeing747","Boeing777","C919","A220","A321","A330","A350","ARJ21","other-airplane"] for count, xml inenumerate(glob.glob(xml_path + '/*.xml')):#进行索引,*代表任意匹配,得到可迭代对象 #flag =0 # to avoid path error in different development platform xml = xml.replace('\\', '/')#用/替换\\,后面的替换前面的 (_, xml_name) = os.path.split(xml)#分割。返回路径和文件名。若本身就是一个文件夹路径,则返回路径和空 #print(xml,xml_name)
for objects_list in node_root:#遍历objects,其实只有一个 for object_list inlist(objects_list):#得到object,不加list竟然无法遍历,迷惑 #print(object_list) if object_list.find("possibleresult/name").text notin plane_label: #print("成功移除") objects_list.remove(object_list) #flag = 1 #替换直系节点objects for i,external_node inenumerate(root): if external_node.tag == "objects":#寻找objects节点 root[i]=objects_list
if objects_list:#判断是否为空,空的list的bool属性为false.非空的为true #if flag == 1: # print(xml,object_list.find("possibleresult/name").text,objects_list.find("object/possibleresult/name").text) #存储xml xml_save=xml_save_path+"/"+xml_name tree.write(xml_save, encoding="utf-8",xml_declaration=True) #存储图片 shutil.copyfile(img_path, image_save_path+"/"+img_name)#将第一个参数的文件复制到第二个里面
ifnot os.path.exists(img_path): print('{} is not exist!'.format(img_path)) continue