blob: deaf2e251cd1bd4a61b0269b4b69025f24f3e445 [file] [log] [blame]
import os
import sys
import codecs
import glob
base_path = os.path.split(__file__)[0]
if os.path.exists(os.path.join(base_path, 'testdata')):
#release
test_dir = os.path.join(base_path, 'testdata')
else:
#development
test_dir = os.path.abspath(
os.path.join(base_path,
os.path.pardir, os.path.pardir,
os.path.pardir, 'testdata'))
assert os.path.exists(test_dir), "Test data not found"
#import the development html5lib
sys.path.insert(0, os.path.abspath(os.path.join(base_path,
os.path.pardir,
os.path.pardir)))
import html5lib
from html5lib import html5parser, treebuilders
del base_path
#Build a dict of avaliable trees
treeTypes = {"simpletree":treebuilders.getTreeBuilder("simpletree"),
"DOM":treebuilders.getTreeBuilder("dom")}
#Try whatever etree implementations are avaliable from a list that are
#"supposed" to work
try:
import xml.etree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
try:
import elementtree.ElementTree as ElementTree
treeTypes['ElementTree'] = treebuilders.getTreeBuilder("etree", ElementTree, fullTree=True)
except ImportError:
pass
try:
import xml.etree.cElementTree as cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
try:
import cElementTree
treeTypes['cElementTree'] = treebuilders.getTreeBuilder("etree", cElementTree, fullTree=True)
except ImportError:
pass
try:
import lxml.etree as lxml
treeTypes['lxml'] = treebuilders.getTreeBuilder("etree", lxml, fullTree=True)
except ImportError:
pass
try:
import BeautifulSoup
treeTypes["beautifulsoup"] = treebuilders.getTreeBuilder("beautifulsoup", fullTree=True)
except ImportError:
pass
def html5lib_test_files(subdirectory, files='*.dat'):
return glob.glob(os.path.join(test_dir,subdirectory,files))
class DefaultDict(dict):
def __init__(self, default, *args, **kwargs):
self.default = default
dict.__init__(self, *args, **kwargs)
def __getitem__(self, key):
return dict.get(self, key, self.default)
class TestData(object):
def __init__(self, filename, newTestHeading="data"):
self.f = codecs.open(filename, encoding="utf8")
self.newTestHeading = newTestHeading
def __iter__(self):
data = DefaultDict(None)
key=None
for line in self.f:
heading = self.isSectionHeading(line)
if heading:
if data and heading == self.newTestHeading:
#Remove trailing newline
data[key] = data[key][:-1]
yield self.normaliseOutput(data)
data = DefaultDict(None)
key = heading
data[key]=""
elif key is not None:
data[key] += line
if data:
yield self.normaliseOutput(data)
def isSectionHeading(self, line):
"""If the current heading is a test section heading return the heading,
otherwise return False"""
if line.startswith("#"):
return line[1:].strip()
else:
return False
def normaliseOutput(self, data):
#Remove trailing newlines
for key,value in data.iteritems():
if value.endswith("\n"):
data[key] = value[:-1]
return data
def convert(stripChars):
def convertData(data):
"""convert the output of str(document) to the format used in the testcases"""
data = data.split("\n")
rv = []
for line in data:
if line.startswith("|"):
rv.append(line[stripChars:])
else:
rv.append(line)
return "\n".join(rv)
return convertData
convertExpected = convert(2)