<html><head><meta name="color-scheme" content="light dark"></head><body><pre style="word-wrap: break-word; white-space: pre-wrap;">#pylint: disable=too-many-public-methods
from functools import reduce
import os, re, shutil, tempfile
import unittest
from gleetex import htmlhandling


excl_filename = htmlhandling.HtmlImageFormatter.EXCLUSION_FILE_NAME

HTML_SKELETON = '''&lt;html&gt;&lt;head&gt;&lt;meta http-equiv="Content-Type" content="text/html; charset={0}" /&gt;
&lt;/head&gt;&lt;body&gt;{1}&lt;/body&gt;'''

def read(file_name, mode='r', encoding='utf-8'):
    """Read the file, return the string. Close file properly."""
    with open(file_name, mode, encoding=encoding) as handle:
        return handle.read()


class HtmlparserTest(unittest.TestCase):
    def setUp(self):
        self.p = htmlhandling.EqnParser()

    def test_start_tags_are_parsed_literally(self):
        self.p.feed("&lt;p   i='o'&gt;")
        self.assertEqual(self.p.get_data()[0], "&lt;p   i='o'&gt;",
                "The HTML parser should copy start tags literally.")

    def test_that_end_tags_are_copied_literally(self):
        self.p.feed("&lt;/ p&gt;&lt;/P&gt;")
        self.assertEqual(''.join(self.p.get_data()), "&lt;/ p&gt;&lt;/P&gt;")

    def test_entities_are_unchanged(self):
        self.p.feed("&amp;#xa;")
        self.assertEqual(self.p.get_data()[0], '&amp;#xa;')

    def test_charsets_are_copied(self):
        self.p.feed('&amp;gt;&amp;rarr;')
        self.assertEqual(''.join(self.p.get_data()[0]), '&amp;gt;&amp;rarr;')

    def test_without_eqn_all_blocks_are_strings(self):
        self.p.feed("&lt;html&gt;\n&lt;head/&gt;&lt;body&gt;&lt;p&gt;42&lt;/p&gt;&lt;h1&gt;blah&lt;/h1&gt;&lt;/body&gt;&lt;/html&gt;")
        self.assertTrue(reduce(lambda x,y: x and isinstance(y, str),
            self.p.get_data()), "all chunks have to be strings")

    def test_equation_is_detected(self):
        self.p.feed('&lt;eq&gt;foo \\pi&lt;/eq&gt;')
        self.assertTrue(isinstance(self.p.get_data()[0], (tuple, list)))
        self.assertEqual(self.p.get_data()[0][2], 'foo \\pi')

    def test_tag_followed_by_eqn_is_correctly_recognized(self):
        self.p.feed('&lt;p foo="bar"&gt;&lt;eq&gt;bar&lt;/eq&gt;')
        self.assertEqual(self.p.get_data()[0], '&lt;p foo="bar"&gt;')
        self.assertTrue(isinstance(self.p.get_data(), list), "second item of data must be equation data list")
    
    def test_document_with_tag_then_eqn_then_tag_works(self):
        self.p.feed('&lt;div style="invalid"&gt;bar&lt;/div&gt;&lt;eq&gt;baz&lt;/eq&gt;&lt;sometag&gt;')
        eqn = None
        # test should not depend on a specific position of equation, search for
        # it
        data = self.p.get_data()
        for chunk in data:
            if isinstance(chunk, (tuple, list)):
                eqn = chunk
                break
        self.assertTrue(isinstance(data[0], str))
        self.assertTrue(eqn is not None,
                "No equation found, must be tuple/list object.")
        self.assertTrue(isinstance(data[-1], str))

    def test_equation_is_copied_literally(self):
        self.p.feed('&lt;eq ignore="me"&gt;my\nlittle\n\\tau&lt;/eq&gt;')
        self.assertEqual(self.p.get_data()[0][2], 'my\nlittle\n\\tau')

    def test_unclosed_eqns_are_detected(self):
        self.assertRaises(htmlhandling.ParseException, self.p.feed,
                '&lt;p&gt;&lt;eq&gt;\\endless\\formula')

    def test_nested_formulas_trigger_exception(self):
        self.assertRaises(htmlhandling.ParseException, self.p.feed,
                "&lt;eq&gt;\\pi&lt;eq&gt;&lt;/eq&gt;&lt;/eq&gt;")
        self.assertRaises(htmlhandling.ParseException, self.p.feed,
                "&lt;eq&gt;\\pi&lt;eq&gt;&lt;/p&gt;&lt;/eq&gt;")

    def test_formulas_without_displaymath_attribute_are_detected(self):
        self.p.feed('&lt;p&gt;&lt;eq&gt;\frac12&lt;/eq&gt;&lt;br /&gt;&lt;eq env="inline"&gt;bar&lt;/eq&gt;&lt;/p&gt;')
        formulas = [c for c in self.p.get_data() if isinstance(c, (tuple, list))]
        self.assertEqual(len(formulas), 2) # there should be _2_ formulas
        self.assertEqual(formulas[0][1], False) # no displaymath
        self.assertEqual(formulas[1][1], False) # no displaymath

    def test_that_unclosed_formulas_detected(self):
            self.assertRaises(htmlhandling.ParseException, self.p.feed,
                "&lt;eq&gt;\\pi&lt;eq&gt;&lt;/p&gt;")
            self.assertRaises(htmlhandling.ParseException, self.p.feed,
                "&lt;eq&gt;\\pi")


    def test_formula_contains_only_formula(self):
        p = htmlhandling.EqnParser()
        p.feed("&lt;p&gt;&lt;eq&gt;1&lt;i&lt;9&lt;/eq&gt;&lt;/p&gt;")
        formula = next(e for e in p.get_data() if isinstance(e, (list, tuple)))
        self.assertEqual(formula[-1], "1&lt;i&lt;9")

        p = htmlhandling.EqnParser()
        p.feed('&lt;p&gt;&lt;eq env="displaymath"&gt;test&lt;/eq&gt;&lt;/p&gt;')
        formula = next(e for e in p.get_data() if isinstance(e, (list, tuple)))
        self.assertEqual(formula[-1], "test")

        p = htmlhandling.EqnParser()
        p.feed("&lt;p&gt;&lt;eq&gt;1&lt;i&lt;9&lt;/eq&gt;&lt;/p&gt;")
        formula = next(e for e in p.get_data() if isinstance(e, (list, tuple)))
        self.assertEqual(formula[-1], "1&lt;i&lt;9")
    

    def test_formula_with_html_sequences_are_unescaped(self):
        self.p.feed('&lt;eq&gt;a&amp;gt;b&lt;/eq&gt;')
        formula = self.p.get_data()[0]
        self.assertEqual(formula[-1], "a&gt;b")
        

    def test_displaymath_is_recognized(self):
        self.p.feed('&lt;eq env="displaymath"&gt;\\sum\limits_{n=1}^{e^i} a^nl^n&lt;/eq&gt;')
        self.assertEqual(self.p.get_data()[0][1], True) # displaymath flag set

    def test_encoding_is_parsed_from_HTML4(self):
        iso8859_1 = HTML_SKELETON.format('iso-8859-15', 'Ã¶Ã¤Ã¼ÃŸ').encode('iso-8859-1')
        self.p.feed(iso8859_1)
        self.assertEqual(self.p._EqnParser__encoding, 'iso-8859-15')

    def test_encoding_is_parsed_from_HTML5(self):
        document = r"""&lt;!DOCTYPE html&gt;
            &lt;html xmlns="http://www.w3.org/1999/xhtml" lang="" xml:lang=""&gt;
            &lt;head&gt;&lt;meta charset="utf-8" /&gt;
              &lt;meta name="generator" content="pandoc" /&gt;
              &lt;/head&gt;&lt;body&gt;&lt;p&gt;hi&lt;/p&gt;&lt;/body&gt;&lt;/html&gt;"""
        self.p.feed(document.encode('utf-8'))
        self.assertEqual(self.p._EqnParser__encoding.lower(), 'utf-8')


    def test_strings_can_be_passed_tO_parser_as_well(self):
        # no exception - everything is working as expected
        self.p.feed(HTML_SKELETON.format('utf-8', 'Ã¦Ã¸'))

class GetPositionTest(unittest.TestCase):
    def test_that_line_number_is_correct(self):
        self.assertEqual(htmlhandling.get_position('jojo', 0)[0], 0)
        self.assertEqual(htmlhandling.get_position('jojo', 3)[0], 0)
        self.assertEqual(htmlhandling.get_position('a\njojo', 3)[0], 1)
        self.assertEqual(htmlhandling.get_position('a\n\njojo', 3)[0], 2)

    def test_that_position_on_line_is_correct(self):
        self.assertEqual(htmlhandling.get_position('jojo', 0)[1], 0)
        self.assertEqual(htmlhandling.get_position('jojo', 3)[1], 3)
        self.assertEqual(htmlhandling.get_position('a\njojo', 3)[1], 2)
        self.assertEqual(htmlhandling.get_position('a\n\njojo', 3)[1], 1)
        



class HtmlImageTest(unittest.TestCase):
    def setUp(self):
        self.pos = {'depth':99, 'height' : 88, 'width' : 77}
        self.original_directory = os.getcwd()
        self.tmpdir = tempfile.mkdtemp()
        os.chdir(self.tmpdir)

    def tearDown(self):
        os.chdir(self.original_directory)
        shutil.rmtree(self.tmpdir, ignore_errors=True)

    def test_that_no_file_is_written_if_no_content(self):
        with htmlhandling.HtmlImageFormatter('foo.html'):
            pass
        self.assertFalse(os.path.exists('foo.html')        )

    def test_file_if_written_when_content_exists(self):
        with htmlhandling.HtmlImageFormatter() as img:
            img.format_excluded(self.pos, '\\tau\\tau', 'foo.png')
        self.assertTrue(os.path.exists(excl_filename)        )

    def test_written_file_starts_and_ends_more_or_less_properly(self):
        with htmlhandling.HtmlImageFormatter('.') as img:
            img.format_excluded(self.pos, '\\tau\\tau', 'foo.png')
        data = read(htmlhandling.HtmlImageFormatter.EXCLUSION_FILE_NAME, 'r', encoding='utf-8')
        self.assertTrue('&lt;html' in data and '&lt;/html&gt;' in data)
        self.assertTrue('&lt;body' in data and '&lt;/body&gt;' in data)
        # make sure encoding is specified
        self.assertTrue('&lt;meta' in data and 'charset=' in data)

    def test_id_contains_no_special_characters(self):
        data = htmlhandling.gen_id('\\tau!\'{}][~^')
        for character in {'!', "'", '\\', '{', '}'}:
            self.assertFalse(character in data)

    def test_formula_can_consist_only_of_numbers_and_id_is_generated(self):
        data = htmlhandling.gen_id('9*8*7=504')
        self.assertTrue(data.startswith('form'))
        self.assertTrue(data.endswith('504'))

    def test_that_empty_ids_raise_exception(self):
        self.assertRaises(ValueError, htmlhandling.gen_id, '')

    def test_that_same_characters_are_not_repeated(self):
        id = htmlhandling.gen_id("jo{{{{{{{{ha")
        self.assertEqual(id, "jo_ha")

    def test_that_ids_are_max_150_characters_wide(self):
        id = htmlhandling.gen_id('\\alpha\\cdot\\gamma + ' * 999)
        self.assertTrue(len(id) == 150)

    def test_that_ids_start_with_letter(self):
        id = htmlhandling.gen_id('{}\\[]Ã–Ã–Ã–Ã¶9343...Â·tau')
        self.assertTrue(id[0].isalpha())

    def test_that_link_to_external_image_points_to_file_and_formula(self):
        with htmlhandling.HtmlImageFormatter() as img:
            formatted_img = img.format_excluded(self.pos, '\\tau\\tau', 'foo.png')
            expected_id = htmlhandling.gen_id('\\tau\\tau')
        external_file = read(excl_filename, 'r', encoding='utf-8')
        # find linked formula path
        href = re.search('href="(.*?)"', formatted_img)
        self.assertTrue(href != None)
        # extract path and id from it
        self.assertTrue('#' in href.groups()[0])
        path, id = href.groups()[0].split('#')
        self.assertEqual(path, excl_filename)
        self.assertEqual(id, expected_id)

        # check external file
        self.assertTrue('&lt;p id' in external_file)
        self.assertTrue('="'+expected_id in external_file)


    def test_that_link_to_external_image_points_to_file_basepath_and_formula(self):
        os.mkdir('basepath')
        with htmlhandling.HtmlImageFormatter('basepath') as img:
            formatted_img = img.format_excluded(self.pos, '\\tau\\tau', 'foo.png')
            expected_id = htmlhandling.gen_id('\\tau\\tau')
        # find linked formula path
        href = re.search('href="(.*?)"', formatted_img)
        self.assertTrue(href != None)
        # extract path and id from it
        self.assertTrue('#' in href.groups()[0])
        path, id = href.groups()[0].split('#')
        self.assertEqual(path, 'basepath/' + excl_filename)
        self.assertEqual(id, expected_id)

    def test_height_and_width_is_in_formatted_html_img_tag(self):
        data = None
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            data = img.get_html_img(self.pos, '\\tau\\tau', 'foo.png')
        self.assertTrue('height=' in data and str(self.pos['height']) in data)
        self.assertTrue('width=' in data and str(self.pos['width']) in data)

    def test_no_formula_gets_lost_when_reparsing_external_formula_file(self):
        with htmlhandling.HtmlImageFormatter() as img:
            img.format_excluded(self.pos, '\\tau' * 999, 'foo.png')
        with htmlhandling.HtmlImageFormatter() as img:
            img.format_excluded(self.pos, '\\pi' * 666, 'foo_2.png')
        data = read(excl_filename)
        self.assertTrue('\\tau' in data)
        self.assertTrue('\\pi' in data)

    def test_too_long_formulas_are_not_outsourced_if_not_configured(self):
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            img.format(self.pos, '\\tau' * 999, 'foo.png')
        self.assertFalse(os.path.exists('foo.html'))

    def test_that_too_long_formulas_get_outsourced_if_configured(self):
        with htmlhandling.HtmlImageFormatter() as img:
            img.set_max_formula_length(90)
            img.set_exclude_long_formulas(True)
            img.format(self.pos, '\\tau' * 999, 'foo.png')
        self.assertTrue(os.path.exists(excl_filename))
        data = read(htmlhandling.HtmlImageFormatter.EXCLUSION_FILE_NAME)
        self.assertTrue('\\tau\\tau' in data)

    def test_url_is_included(self):
        prefix = "http://crustulus.de/blog"
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            img.set_url(prefix)
            data = img.format(self.pos, '\epsilon&lt;0', 'foo.png')
            self.assertTrue( prefix in data)

    def test_url_doesnt_contain_double_slashes(self):
        prefix = "http://crustulus.de/blog/"
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            img.set_url(prefix)
            data = img.format(self.pos, r'\gamma\text{strahlung}', 'foo.png')
            self.assertFalse('//' in data.replace('http://','ignore'))

    # depth is used as negative offset, so negative depth should result in
    # positive offset
    def test_that_negative_depth_results_in_positive_offset(self):
        self.pos['depth'] = '-999'
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            data = img.format(self.pos, r'\gamma\text{strahlung}', 'foo.png')
            self.assertTrue('align: ' + str(self.pos['depth'])[1:] in data)

    def test_that_displaymath_is_set_or_unset(self):
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            data = img.format(self.pos, r'\gamma\text{strahlung}', 'foo.png',
                    True)
            self.assertTrue('="displaymath' in data)
            data = img.format(self.pos, r'\gamma\text{strahlung}', 'foo.png',
                    False)
            self.assertTrue('="inlinemath' in data)

    def test_that_alternative_css_class_is_set_correctly(self):
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            img.set_display_math_css_class('no1')
            img.set_inline_math_css_class('no2')
            data = img.format(self.pos, r'\gamma\text{strahlung}', 'foo.png',
                    True)
            self.assertTrue('="no1"' in data)
            data = img.format(self.pos, r'\gamma\text{strahlung}', 'foo.png',
                    False)
            self.assertTrue('="no2' in data)

    def test_that_unicode_is_replaced_if_requested(self):
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            img.set_replace_nonascii(True)
            data = img.format(self.pos, 'â†', 'foo.png')
            self.assertTrue('\\leftarrow' in data,
                    'expected: "\\leftarrow" to be in "%s"' % data)
        
    def test_that_unicode_is_kept_if_not_requested_to_replace(self):
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            img.set_replace_nonascii(False)
            data = img.format(self.pos, 'â†', 'foo.png')
            self.assertTrue('â†' in data)
 
    def test_formatting_commands_are_stripped(self):
        with htmlhandling.HtmlImageFormatter('foo.html') as img:
            data = img.format(self.pos, 'a\,b\,c\,d', 'foo.png')
            self.assertTrue('a b c d' in data)
            data = img.format(self.pos, 'a\,b\;c\ d', 'foo.png')
            self.assertTrue('a b c d' in data)

            data = img.format(self.pos, '\Big\{foo\Big\}', 'foo.png')
            self.assertTrue('\{foo' in data and '\}' in data)
            data = img.format(self.pos, r'\left\{foo\right\}', 'foo.png')
            self.assertTrue('\{' in data and 'foo' in data and '\}' in data)
         

def htmleqn(formula, hr=True):
    """Format a formula to appear as if it would have been outsourced into an
    external file."""
    return '%s\n&lt;p id="%s"&gt;&lt;pre&gt;%s&lt;/pre&gt;&lt;/span&gt;&lt;/p&gt;\n' % (\
            ('&lt;hr/&gt;' if hr else ''), htmlhandling.gen_id(formula), formula)


class OutsourcingParserTest(unittest.TestCase):
    def setUp(self):
        self.html = ('&lt;!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01//EN"' +
        '\n  "http://www.w3.org/TR/html4/strict.dtd"&gt;\n&lt;html&gt;\n&lt;head&gt;\n' +
        '&lt;meta http-equiv="content-type" content="text/html; charset=utf-8"/&gt;' +
        '&lt;title&gt;Outsourced Formulas&lt;/title&gt;&lt;/head&gt;\n&lt;body&gt;\n&lt;h1&gt;heading&lt;/h1&gt;')

    def get_html(self, string):
        """Create html string with head / tail und put the specified string into
        it."""
        return self.html + string + '\n&lt;/body&gt;\n&lt;/html&gt;'


    def test_formulas_are_recognized(self):
        data = self.get_html(htmleqn('\\tau'))
        parser = htmlhandling.OutsourcedFormulaParser()
        parser.feed(data)
        self.assertEqual(len(parser.get_formulas()), 1)

    def test_formula_doesnt_contain_surrounding_rubbish(self):
        data = self.get_html(htmleqn('\\gamma'))
        parser = htmlhandling.OutsourcedFormulaParser()
        parser.feed(data)
        self.assertEqual(len(parser.get_formulas()), 1)
        key = next(iter(parser.get_formulas()))
        par = parser.get_formulas()[key]
        self.assertFalse('&lt;h1' in par)
        self.assertFalse('body&gt;' in par)
        self.assertFalse('hr' in par)

    def test_that_header_is_parsed_correctly(self):
        p = htmlhandling.OutsourcedFormulaParser()
        p.feed(self.get_html(htmleqn('test123', False)))
        head = p.get_head()
        self.assertTrue('DOCTYPE' in head)
        self.assertTrue('&lt;html' in head)
        self.assertTrue('&lt;title' in head)
        self.assertTrue('&lt;/title' in head)
        self.assertTrue('&lt;/head' in head)
        self.assertTrue('&lt;meta' in head)
        self.assertTrue('charset=' in head)

    def test_multiple_formulas_are_recognized_correctly(self):
        p = htmlhandling.OutsourcedFormulaParser()
        p.feed(self.get_html(htmleqn('\\tau', False) + '\n' + 
            htmleqn('\\gamma') + '\n' + htmleqn('\\epsilon&lt;0')))
        forms = p.get_formulas()
        self.assertEqual(len(forms), 3)
        self.assertTrue('\\gamma' in forms.values())
        self.assertTrue('\\gamma' in forms.values())
        self.assertTrue('\\epsilon&lt;0' in forms.values())

</pre></body></html>