| import os |
| import unittest |
| from support import html5lib_test_files, TestData, test_dir |
| |
| from html5lib import HTMLParser, inputstream |
| |
| import re, unittest |
| |
| class Html5EncodingTestCase(unittest.TestCase): |
| def test_codec_name(self): |
| self.assertEquals(inputstream.codecName("utf-8"), "utf-8") |
| self.assertEquals(inputstream.codecName("utf8"), "utf-8") |
| self.assertEquals(inputstream.codecName(" utf8 "), "utf-8") |
| self.assertEquals(inputstream.codecName("ISO_8859--1"), "windows-1252") |
| |
| def buildTestSuite(): |
| for filename in html5lib_test_files("encoding"): |
| test_name = os.path.basename(filename).replace('.dat',''). \ |
| replace('-','') |
| tests = TestData(filename, "data") |
| for idx, test in enumerate(tests): |
| def encodingTest(self, data=test['data'], |
| encoding=test['encoding']): |
| p = HTMLParser() |
| t = p.parse(data, useChardet=False) |
| |
| errorMessage = ("Input:\n%s\nExpected:\n%s\nRecieved\n%s\n"% |
| (data, repr(encoding.lower()), |
| repr(p.tokenizer.stream.charEncoding))) |
| self.assertEquals(encoding.lower(), |
| p.tokenizer.stream.charEncoding[0], |
| errorMessage) |
| setattr(Html5EncodingTestCase, 'test_%s_%d' % (test_name, idx+1), |
| encodingTest) |
| |
| try: |
| import chardet |
| def test_chardet(self): |
| data = open(os.path.join(test_dir, "encoding" , "chardet", "test_big5.txt")).read() |
| encoding = inputstream.HTMLInputStream(data).charEncoding |
| assert encoding[0].lower() == "big5" |
| setattr(Html5EncodingTestCase, 'test_chardet', test_chardet) |
| except ImportError: |
| print "chardet not found, skipping chardet tests" |
| |
| |
| return unittest.defaultTestLoader.loadTestsFromName(__name__) |
| |
| def main(): |
| buildTestSuite() |
| unittest.main() |
| |
| if __name__ == "__main__": |
| main() |