python xml
原文链接: python xml
ET读取xml
try:
import xml.etree.cElementTree as ET C实现速度快一点的xml.etree.cElementTree
except ImportError:
import xml.etree.ElementTree as ET
import xml.etree.ElementTree as ET----------导入xml模块
root = ET.parse('GHO.xml')------------------分析指定xml文件
tree = root.getroot()-----------------------获取第一标签
data = tree.find('Data')--------------------查找第一标签中'Data'标签
for obs in data:----------------------------历遍'Data'中的所有标签
for item in obs:------------------------历遍'Data'中的'obs'标签下的所有标签
key = item.attrib()-----------------提取key值参数
print(list(key))--------------------输出key值
try:
import xml.etree.cElementTree as ET
except ImportError:
import xml.etree.ElementTree as ET
import sys
try:
tree = ET.parse("country.xml") #打开xml文档
#root = ET.fromstring(country_string) #从字符串传递xml
root = tree.getroot() #获得root节点
except Exception, e:
print "Error:cannot parse file:country.xml."
sys.exit(1)
print root.tag, "---", root.attrib
for child in root:
print child.tag, "---", child.attrib
print "*"*10
print root[0][1].text #通过下标访问
print root[0].tag, root[0].text
print "*"*10
for country in root.findall('country'): #找到root节点下的所有country节点
rank = country.find('rank').text #子节点下节点rank的值
name = country.get('name') #子节点下属性name的值
print name, rank
#修改xml文件
for country in root.findall('country'):
rank = int(country.find('rank').text)
if rank > 50:
root.remove(country)
tree.write('output.xml')
voc2yolo 格式转换
import xml.etree.ElementTree as ET
import os
from os import getcwd
sets=[('2017', 'train'),('2017', 'val')]
format = 'png'
classes = ['banana', 'monkey', 'panda']
def convert(size, box):
dw = 1./size[0]
dh = 1./size[1]
x = (box[0] + box[1])/2.0
y = (box[2] + box[3])/2.0
w = box[1] - box[0]
h = box[3] - box[2]
x = x*dw
w = w*dw
y = y*dh
h = h*dh
return (x,y,w,h)
def convert_annotation(year, image_id):
in_file = open('devkit/%s/Annotations/%s.xml'%(year, image_id))
out_file = open('devkit/%s/labels/%s.txt'%(year, image_id), 'w')
tree=ET.parse(in_file)
root = tree.getroot()
size = root.find('size')
w = int(size.find('width').text)
h = int(size.find('height').text)
for obj in root.iter('object'):
difficult = obj.find('difficult').text
cls = obj.find('name').text
if cls not in classes or int(difficult) == 1:
continue
cls_id = classes.index(cls)
xmlbox = obj.find('bndbox')
b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
bb = convert((w,h), b)
out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
wd = getcwd()
for year, image_set in sets:
if not os.path.exists('devkit/%s/labels/'%(year)):
os.makedirs('devkit/%s/labels/'%(year))
image_ids = open('devkit/%s/ImageSets/%s.txt'%(year, image_set)).read().strip().split()
list_file = open('%s_%s.txt'%(year, image_set), 'w')
for image_id in image_ids:
list_file.write('%s/devkit/%s/Images/%s.%s\n'%(wd, year, image_id, format))
convert_annotation(year, image_id)
list_file.close()