source [<Element 'filename' at 0x000001CB382693B0>, <Element 'origin' at 0x000001CB392C7BD0>] research [<Element 'version' at 0x000001CB392C78B0>, <Element 'provider' at 0x000001CB392C71D0>, <Element 'author' at 0x000001CB392C7770>, <Element 'pluginname' at 0x000001CB392C7C20>, <Element 'pluginclass' at 0x000001CB392C7360>, <Element 'time' at 0x000001CB392C7DB0>] size [<Element 'width' at 0x000001CB392C7680>, <Element 'height' at 0x000001CB392C77C0>, <Element 'depth' at 0x000001CB392C7EF0>] objects [<Element 'object' at 0x000001CB392C7590>, <Element 'object' at 0x000001CB392C7720>]
import os import scipy.misc as misc from xml.dom.minidom import Document import numpy as np import copy, cv2 # import imageio import json import glob import xml.etree.cElementTree as ET import re from osgeo import gdal, gdalconst defread_xml_gtbox_and_label(xml_path): """ :param xml_path: the path of voc xml :return: a list contains gtboxes and labels, shape is [num_of_gtboxes, 9], and has [x1, y1, x2, y2, x3, y3, x4, y4, label] in a per row """
tree = ET.parse(xml_path) root = tree.getroot() box_list = [] difficult_list = [] isObjNone=True tmp_score_list=[] for child_of_root in root: # if child_of_root.tag == 'filename': # assert child_of_root.text == xml_path.split('/')[-1].split('.')[0] \ # + FLAGS.img_format, 'xml_name and img_name cannot match' if child_of_root.tag == 'source': for child_item in child_of_root: if child_item.tag == 'filename': img_name = child_item.text
if child_of_root.tag == 'objects': label = None for child_item in child_of_root: if child_item.tag == 'object': for child_pic in child_item: if child_pic.tag == 'possibleresult': for child_name in child_pic: if child_name.tag == 'name': label = child_name.text if child_name.tag == 'probability': tmp_score=float(child_name.text) tmp_score_list.append(tmp_score) # difficult_tmp = int(not(child_name.text)) tmp_list=re.findall('[^.\d]',child_name.text) iflen(tmp_list)>0: print(xml_path) print(tmp_list) if child_name.text.strip()=='': print(xml_path) print('empty') difficult_tmp = 0#int(not(child_name.text)) difficult_list.append(difficult_tmp) if child_pic.tag == 'points': tmp_box = [] for node in child_pic: tmp_list=re.findall('[^-,.\d]',node.text) iflen(tmp_list)>0: print(xml_path) print(tmp_list) if node.text.strip()=='': print(xml_path) print('empty') for node in child_pic[:4]: tmp_box=tmp_box+node.text.split(',') isObjNone=False tmp_box.append(label) box_list.append(tmp_box) if isObjNone: print(xml_path) print('obj none!')
#1 if child_of_root.tag == 'objects': label = None for child_item in child_of_root: if child_item.tag == 'object': for child_pic in child_item: if child_pic.tag == 'possibleresult': for child_name in child_pic: if child_name.tag == 'name': label = child_name.text if child_name.tag == 'probability': tmp_score=float(child_name.text) tmp_score_list.append(tmp_score) # difficult_tmp = int(not(child_name.text)) tmp_list=re.findall('[^.\d]',child_name.text) iflen(tmp_list)>0: print(xml_path) print(tmp_list) if child_name.text.strip()=='': print(xml_path) print('empty') difficult_tmp = 0#int(not(child_name.text)) difficult_list.append(difficult_tmp) if child_pic.tag == 'points': tmp_box = [] for node in child_pic: tmp_list=re.findall('[^-,.\d]',node.text) iflen(tmp_list)>0: print(xml_path) print(tmp_list) if node.text.strip()=='': print(xml_path) print('empty') for node in child_pic[:4]: tmp_box=tmp_box+node.text.split(',') isObjNone=False tmp_box.append(label) box_list.append(tmp_box)
#2 defconvert_pascal_to_tfrecord(image_path, xml_path): #葛改save_path=r'/emwuser/gzl/gf2021/data/plane_2020/train_split_test/train/json_c' save_path=r'/emwuser/gzl/gf2021/data/FAIR1M/train/part1/json_c'#json的保存地址#r其实是为了强制认为后面的都是字符串,不考虑转义字符,无r则会考虑转义字符,这里加不加其实无所谓,但是C:\data这种的就得加上r,或者直接C:\\data for count, xml inenumerate(glob.glob(xml_path + '/*.xml')):#进行索引,*代表任意匹配,得到可迭代对象 # to avoid path error in different development platform xml = xml.replace('\\', '/')#用/替换\\,即后面的替换前面的 (_, xml_name) = os.path.split(xml)#分割。返回路径和文件名。若本身就是一个文件夹路径,则返回路径和空
Traceback (most recent call last): File "/emwuser/gzl/gf2021/code/R3Det_Tensorflow_gf2021/tools_gf_2021/xml2json_zry.py", line 161, in <module> convert_pascal_to_tfrecord(raw_images_dir,raw_label_dir) File "/emwuser/gzl/gf2021/code/R3Det_Tensorflow_gf2021/tools_gf_2021/xml2json_zry.py", line 130, in convert_pascal_to_tfrecord shape['score']=tmp_score_list[i] IndexError: list index out of range