# Make HTML entities for non-ASCII characters in Unicode string
# Thanks to Steffen Ries on comp.lang.python
# http://groups.google.com/groups?selm=m3k7yr9wci.fsf@gondolin.beleriand

_u2html = {}   # unicode to html mapping

def _make_u2html():
    from htmlentitydefs import entitydefs

    def c2u(c):
        if len(c) == 1:
            return unicode(c, 'latin1')
        if c.startswith('&#'):
            return unichr(int(c[2:-1]))
        
    for entity,val in entitydefs.items():
        _u2html[c2u(val)] = "&%s;" % entity

def htmlEntityEncode(s):
    """
    convert unicode string s to ascii, replace non-ascii characters with
    html entitydef or "?"
    """

    if not _u2html:
        _make_u2html()

    l = [_u2html.get(c, c) for c in s]

    return ''.join(l).encode('ascii', 'replace')
