Wednesday, June 26, 2019

Python Script to Replace Unicode

#coding: utf-8
import codecs

unicodeMap ={
u'¡': r'\\u00A1', u'¿': r'\\u00BF', u'Á': r'\\u00C1', u'À': r'\\u00C0', u'Â': r'\\u00C2',
u'Ä': r'\\u00C4', u'Ă': r'\\u0102', u'Å': r'\\u00C5', u'Ą': r'\\u0104', u'Æ': r'\\u00C6',
u'Ć': r'\\u0106', u'Č': r'\\u010C', u'Ç': r'\\u00C7', u'Đ': r'\\u0110', u'É': r'\\u00C9',
u'È': r'\\u00C8', u'Ê': r'\\u00CA', u'Ë': r'\\u00CB', u'Ę': r'\\u0118', u'Ğ': r'\\u011E',
u'Í': r'\\u00CD', u'İ': r'\\u0130', u'Î': r'\\u00CE', u'Ï': r'\\u00CF', u'Ł': r'\\u0141',
u'Ń': r'\\u0143', u'Ñ': r'\\u00D1', u'Ó': r'\\u00D3', u'Ô': r'\\u00D4', u'Ö': r'\\u00D6',
u'Ő': r'\\u0150', u'Ø': r'\\u00D8', u'Œ': r'\\u0152', u'Ś': r'\\u015A', u'Š': r'\\u0160',
u'Ş': r'\\u015E', u'Ș': r'\\u0218', u'ß': r'\\u00DF', u'Ț': r'\\u021A', u'Ú': r'\\u00DA',
u'Ü': r'\\u00DC', u'Ű': r'\\u0170', u'Ź': r'\\u0179', u'Ż': r'\\u017B', u'Ž': r'\\u017D',
u'А': r'\\u0410', u'Б': r'\\u0411', u'В': r'\\u0412', u'Г': r'\\u0413', u'Ґ': r'\\u0490',
u'Д': r'\\u0414', u'Е': r'\\u0415', u'Ё': r'\\u0401', u'Є': r'\\u0404', u'Ж': r'\\u0416',
u'З': r'\\u0417', u'И': r'\\u0418', u'Й': r'\\u0419', u'І': r'\\u0406', u'Ї': r'\\u0407',
u'К': r'\\u041A', u'Л': r'\\u041B', u'М': r'\\u041C', u'Н': r'\\u041D', u'О': r'\\u041E',
u'П': r'\\u041F', u'Р': r'\\u0420', u'С': r'\\u0421', u'Т': r'\\u0422', u'У': r'\\u0423',
u'Ф': r'\\u0424', u'Х': r'\\u0425', u'Ц': r'\\u0426', u'Ч': r'\\u0427', u'Ш': r'\\u0428',
u'Щ': r'\\u0429', u'Ъ': r'\\u042A', u'Ы': r'\\u042B', u'Ь': r'\\u042C', u'Э': r'\\u042D',
u'Ю': r'\\u042E', u'Я': r'\\u042F', u'á': r'\\u00E1', u'à': r'\\u00E0', u'â': r'\\u00E2',
u'ä': r'\\u00E4', u'ă': r'\\u0103', u'å': r'\\u00E5', u'ą': r'\\u0105', u'æ': r'\\u00E6',
u'ć': r'\\u0107', u'č': r'\\u010D', u'ç': r'\\u00E7', u'đ': r'\\u0111', u'é': r'\\u00E9',
u'è': r'\\u00E8', u'ê': r'\\u00EA', u'ë': r'\\u00EB', u'ę': r'\\u0119', u'ğ': r'\\u011F',
u'ı': r'\\u0131', u'í': r'\\u00ED', u'ì': r'\\u00EC', u'î': r'\\u00EE', u'ï': r'\\u00EF',
u'ł': r'\\u0142', u'ń': r'\\u0144', u'ñ': r'\\u00F1', u'ó': r'\\u00F3', u'ò': r'\\u00F2',
u'ô': r'\\u00F4', u'ö': r'\\u00F6', u'ő': r'\\u0151', u'ø': r'\\u00F8', u'œ': r'\\u0153',
u'ś': r'\\u015B', u'š': r'\\u0161', u'ş': r'\\u015F', u'ș': r'\\u0219', u'ț': r'\\u021B',
u'ú': r'\\u00FA', u'ù': r'\\u00F9', u'û': r'\\u00FB', u'ü': r'\\u00FC', u'ű': r'\\u0171',
u'ÿ': r'\\u00FF', u'ź': r'\\u017A', u'ż': r'\\u017C', u'ž': r'\\u017E', u'а': r'\\u0430',
u'б': r'\\u0431', u'в': r'\\u0432', u'г': r'\\u0433', u'ґ': r'\\u0491', u'д': r'\\u0434',
u'е': r'\\u0435', u'ё': r'\\u0451', u'є': r'\\u0454', u'ж': r'\\u0436', u'з': r'\\u0437',
u'и': r'\\u0438', u'й': r'\\u0439', u'і': r'\\u0456', u'ї': r'\\u0457', u'к': r'\\u043A',
u'л': r'\\u043B', u'м': r'\\u043C', u'н': r'\\u043D', u'о': r'\\u043E', u'п': r'\\u043F',
u'р': r'\\u0440', u'с': r'\\u0441', u'т': r'\\u0442', u'у': r'\\u0443', u'ф': r'\\u0444',
u'х': r'\\u0445', u'ц': r'\\u0446', u'ч': r'\\u0447', u'ш': r'\\u0448', u'щ': r'\\u0449',
u'ъ': r'\\u044A', u'ы': r'\\u044B', u'ь': r'\\u044C', u'э': r'\\u044D', u'ю': r'\\u044E',
u'я': r'\\u044F'
}

inputFile = r"""Put Path Here"""
outputFile = r"""Put Path here"""

def replace():
    default = 0
    fileIn = codecs.open(inputFile, 'r', 'utf-8')
    fileOut = codecs.open(outputFile, 'w', 'utf-8')
    for line in fileIn:
        for char in line:
            if unicodeMap.get(char, default) == 0:
                fileOut.write(char)
            else:
                fileOut.write(char.replace(char, unicodeMap[char]))
    fileIn.close()
    fileOut.close()

if __name__ == '__main__':
    replace()

# or just replace_all
for char2unicode in (('\\u00FC', 'ü'), ('\\u00A1', '¡'), ('\\u00BF', '¿'), ('\\u00C1', 'Á'), ('\\u00C0', 'À'), ('\\u00C2', 'Â'), ('\\u00C4', 'Ä'), ('\\u0102', 'Ă'), ('\\u00C5', 'Å'), ('\\u0104', 'Ą'), ('\\u00C6', 'Æ'), ('\\u0106', 'Ć'), ('\\u010C', 'Č'), ('\\u00C7', 'Ç'), ('\\u0110', 'Đ'), ('\\u00C9', 'É'), ('\\u00C8', 'È'), ('\\u00CA', 'Ê'), ('\\u00CB', 'Ë'), ('\\u0118', 'Ę'), ('\\u011E', 'Ğ'), ('\\u00CD', 'Í'), ('\\u0130', 'İ'), ('\\u00CE', 'Î'), ('\\u00CF', 'Ï'), ('\\u0141', 'Ł'), ('\\u0143', 'Ń'), ('\\u00D1', 'Ñ'), ('\\u00D3', 'Ó'), ('\\u00D4', 'Ô'), ('\\u00D6', 'Ö'), ('\\u0150', 'Ő'), ('\\u00D8', 'Ø'), ('\\u0152', 'Œ'), ('\\u015A', 'Ś'), ('\\u0160', 'Š'), ('\\u015E', 'Ş'), ('\\u0218', 'Ș'), ('\\u00DF', 'ß'), ('\\u021A', 'Ț'), ('\\u00DA', 'Ú'), ('\\u00DC', 'Ü'), ('\\u0170', 'Ű'), ('\\u0179', 'Ź'), ('\\u017B', 'Ż'), ('\\u017D', 'Ž'), ('\\u0410', 'А'), ('\\u0411', 'Б'), ('\\u0412', 'В'), ('\\u0413', 'Г'), ('\\u0490', 'Ґ'), ('\\u0414', 'Д'), ('\\u0415', 'Е'), ('\\u0401', 'Ё'), ('\\u0404', 'Є'), ('\\u0416', 'Ж'), ('\\u0417', 'З'), ('\\u0418', 'И'), ('\\u0419', 'Й'), ('\\u0406', 'І'), ('\\u0407', 'Ї'), ('\\u041A', 'К'), ('\\u041B', 'Л'), ('\\u041C', 'М'), ('\\u041D', 'Н'), ('\\u041E', 'О'), ('\\u041F', 'П'), ('\\u0420', 'Р'), ('\\u0421', 'С'), ('\\u0422', 'Т'), ('\\u0423', 'У'), ('\\u0424', 'Ф'), ('\\u0425', 'Х'), ('\\u0426', 'Ц'), ('\\u0427', 'Ч'), ('\\u0428', 'Ш'), ('\\u0429', 'Щ'), ('\\u042A', 'Ъ'), ('\\u042B', 'Ы'), ('\\u042C', 'Ь'), ('\\u042D', 'Э'), ('\\u042E', 'Ю'), ('\\u042F', 'Я'), ('\\u00E1', 'á'), ('\\u00E0', 'à'), ('\\u00E2', 'â'), ('\\u00E4', 'ä'), ('\\u0103', 'ă'), ('\\u00E5', 'å'), ('\\u0105', 'ą'), ('\\u00E6', 'æ'), ('\\u0107', 'ć'), ('\\u010D', 'č'), ('\\u00E7', 'ç'), ('\\u0111', 'đ'), ('\\u00E9', 'é'), ('\\u00E8', 'è'), ('\\u00EA', 'ê'), ('\\u00EB', 'ë'), ('\\u0119', 'ę'), ('\\u011F', 'ğ'), ('\\u0131', 'ı'), ('\\u00ED', 'í'), ('\\u00EC', 'ì'), ('\\u00EE', 'î'), ('\\u00EF', 'ï'), ('\\u0142', 'ł'), ('\\u0144', 'ń'), ('\\u00F1', 'ñ'), ('\\u00F3', 'ó'), ('\\u00F2', 'ò'), ('\\u00F4', 'ô'), ('\\u00F6', 'ö'), ('\\u0151', 'ő'), ('\\u00F8', 'ø'), ('\\u0153', 'œ'), ('\\u015B', 'ś'), ('\\u0161', 'š'), ('\\u015F', 'ş'), ('\\u0219', 'ș'), ('\\u021B', 'ț'), ('\\u00FA', 'ú'), ('\\u00F9', 'ù'), ('\\u00FB', 'û'), ('\\u00FC', 'ü'), ('\\u0171', 'ű'), ('\\u00FF', 'ÿ'), ('\\u017A', 'ź'), ('\\u017C', 'ż'), ('\\u017E', 'ž'), ('\\u0430', 'а'), ('\\u0431', 'б'), ('\\u0432', 'в'), ('\\u0433', 'г'), ('\\u0491', 'ґ'), ('\\u0434', 'д'), ('\\u0435', 'е'), ('\\u0451', 'ё'), ('\\u0454', 'є'), ('\\u0436', 'ж'), ('\\u0437', 'з'), ('\\u0438', 'и'), ('\\u0439', 'й'), ('\\u0456', 'і'), ('\\u0457', 'ї'), ('\\u043A', 'к'), ('\\u043B', 'л'), ('\\u043C', 'м'), ('\\u043D', 'н'), ('\\u043E', 'о'), ('\\u043F', 'п'), ('\\u0440', 'р'), ('\\u0441', 'с'), ('\\u0442', 'т'), ('\\u0443', 'у'), ('\\u0444', 'ф'), ('\\u0445', 'х'), ('\\u0446', 'ц'), ('\\u0447', 'ч'), ('\\u0448', 'ш'), ('\\u0449', 'щ'), ('\\u044A', 'ъ'), ('\\u044B', 'ы'), ('\\u044C', 'ь'), ('\\u044D', 'э'), ('\\u044E', 'ю'), ('\\u044F', 'я'), ('\\u00a1', '¡'), ('\\u00bf', '¿'), ('\\u00c1', 'Á'), ('\\u00c0', 'À'), ('\\u00c2', 'Â'), ('\\u00c4', 'Ä'), ('\\u0102', 'Ă'), ('\\u00c5', 'Å'), ('\\u0104', 'Ą'), ('\\u00c6', 'Æ'), ('\\u0106', 'Ć'), ('\\u010c', 'Č'), ('\\u00c7', 'Ç'), ('\\u0110', 'Đ'), ('\\u00c9', 'É'), ('\\u00c8', 'È'), ('\\u00ca', 'Ê'), ('\\u00cb', 'Ë'), ('\\u0118', 'Ę'), ('\\u011e', 'Ğ'), ('\\u00cd', 'Í'), ('\\u0130', 'İ'), ('\\u00ce', 'Î'), ('\\u00cf', 'Ï'), ('\\u0141', 'Ł'), ('\\u0143', 'Ń'), ('\\u00d1', 'Ñ'), ('\\u00d3', 'Ó'), ('\\u00d4', 'Ô'), ('\\u00d6', 'Ö'), ('\\u0150', 'Ő'), ('\\u00d8', 'Ø'), ('\\u0152', 'Œ'), ('\\u015a', 'Ś'), ('\\u0160', 'Š'), ('\\u015e', 'Ş'), ('\\u0218', 'Ș'), ('\\u00df', 'ß'), ('\\u021a', 'Ț'), ('\\u00da', 'Ú'), ('\\u00dc', 'Ü'), ('\\u0170', 'Ű'), ('\\u0179', 'Ź'), ('\\u017b', 'Ż'), ('\\u017d', 'Ž'), ('\\u0410', 'А'), ('\\u0411', 'Б'), ('\\u0412', 'В'), ('\\u0413', 'Г'), ('\\u0490', 'Ґ'), ('\\u0414', 'Д'), ('\\u0415', 'Е'), ('\\u0401', 'Ё'), ('\\u0404', 'Є'), ('\\u0416', 'Ж'), ('\\u0417', 'З'), ('\\u0418', 'И'), ('\\u0419', 'Й'), ('\\u0406', 'І'), ('\\u0407', 'Ї'), ('\\u041a', 'К'), ('\\u041b', 'Л'), ('\\u041c', 'М'), ('\\u041d', 'Н'), ('\\u041e', 'О'), ('\\u041f', 'П'), ('\\u0420', 'Р'), ('\\u0421', 'С'), ('\\u0422', 'Т'), ('\\u0423', 'У'), ('\\u0424', 'Ф'), ('\\u0425', 'Х'), ('\\u0426', 'Ц'), ('\\u0427', 'Ч'), ('\\u0428', 'Ш'), ('\\u0429', 'Щ'), ('\\u042a', 'Ъ'), ('\\u042b', 'Ы'), ('\\u042c', 'Ь'), ('\\u042d', 'Э'), ('\\u042e', 'Ю'), ('\\u042f', 'Я'), ('\\u00e1', 'á'), ('\\u00e0', 'à'), ('\\u00e2', 'â'), ('\\u00e4', 'ä'), ('\\u0103', 'ă'), ('\\u00e5', 'å'), ('\\u0105', 'ą'), ('\\u00e6', 'æ'), ('\\u0107', 'ć'), ('\\u010d', 'č'), ('\\u00e7', 'ç'), ('\\u0111', 'đ'), ('\\u00e9', 'é'), ('\\u00e8', 'è'), ('\\u00ea', 'ê'), ('\\u00eb', 'ë'), ('\\u0119', 'ę'), ('\\u011f', 'ğ'), ('\\u0131', 'ı'), ('\\u00ed', 'í'), ('\\u00ec', 'ì'), ('\\u00ee', 'î'), ('\\u00ef', 'ï'), ('\\u0142', 'ł'), ('\\u0144', 'ń'), ('\\u00f1', 'ñ'), ('\\u00f3', 'ó'), ('\\u00f2', 'ò'), ('\\u00f4', 'ô'), ('\\u00f6', 'ö'), ('\\u0151', 'ő'), ('\\u00f8', 'ø'), ('\\u0153', 'œ'), ('\\u015b', 'ś'), ('\\u0161', 'š'), ('\\u015f', 'ş'), ('\\u0219', 'ș'), ('\\u021b', 'ț'), ('\\u00fa', 'ú'), ('\\u00f9', 'ù'), ('\\u00fb', 'û'), ('\\u00fc', 'ü'), ('\\u0171', 'ű'), ('\\u00ff', 'ÿ'), ('\\u017a', 'ź'), ('\\u017c', 'ż'), ('\\u017e', 'ž'), ('\\u0430', 'а'), ('\\u0431', 'б'), ('\\u0432', 'в'), ('\\u0433', 'г'), ('\\u0491', 'ґ'), ('\\u0434', 'д'), ('\\u0435', 'е'), ('\\u0451', 'ё'), ('\\u0454', 'є'), ('\\u0436', 'ж'), ('\\u0437', 'з'), ('\\u0438', 'и'), ('\\u0439', 'й'), ('\\u0456', 'і'), ('\\u0457', 'ї'), ('\\u043a', 'к'), ('\\u043b', 'л'), ('\\u043c', 'м'), ('\\u043d', 'н'), ('\\u043e', 'о'), ('\\u043f', 'п'), ('\\u0440', 'р'), ('\\u0441', 'с'), ('\\u0442', 'т'), ('\\u0443', 'у'), ('\\u0444', 'ф'), ('\\u0445', 'х'), ('\\u0446', 'ц'), ('\\u0447', 'ч'), ('\\u0448', 'ш'), ('\\u0449', 'щ'), ('\\u044a', 'ъ'), ('\\u044b', 'ы'), ('\\u044c', 'ь'), ('\\u044d', 'э'), ('\\u044e', 'ю'), ('\\u044f', 'я')):
string = str(string).replace(*char2unicode)