UTF-8 bug

if converting unicode strings into url raises a UnicodeEncodeError use following snippet:

def encode_obj(in_obj):
    """
    Encodes the values for urllib (due to the bug in it)
    :param in_obj: input object
    :return: encoded object
    """
    def encode_list(in_list):
        out_list = []
        for el in in_list:
            out_list.append(encode_obj(el))
        return out_list
 
    def encode_dict(in_dict):
        out_dict = {}
        for k, v in in_dict.iteritems():
            out_dict[k] = encode_obj(v)
        return out_dict
 
    if isinstance(in_obj, unicode):
        return in_obj.encode('utf-8')
    elif isinstance(in_obj, list):
        return encode_list(in_obj)
    elif isinstance(in_obj, tuple):
        return tuple(encode_list(in_obj))
    elif isinstance(in_obj, dict):
        return encode_dict(in_obj)
 
    return in_obj

Credits given to ogurets from StackOverflow discussion

 
python/urllib.txt · Last modified: 2015/08/27 16:04 by vondra