Wednesday, June 26, 2019

Python Script to Replace Unicode

#coding: utf-8
import codecs

unicodeMap ={
u'¡': r'\\u00A1', u'¿': r'\\u00BF', u'Á': r'\\u00C1', u'À': r'\\u00C0', u'Â': r'\\u00C2',
u'Ä': r'\\u00C4', u'Ă': r'\\u0102', u'Å': r'\\u00C5', u'Ą': r'\\u0104', u'Æ': r'\\u00C6',
u'Ć': r'\\u0106', u'Č': r'\\u010C', u'Ç': r'\\u00C7', u'Đ': r'\\u0110', u'É': r'\\u00C9',
u'È': r'\\u00C8', u'Ê': r'\\u00CA', u'Ë': r'\\u00CB', u'Ę': r'\\u0118', u'Ğ': r'\\u011E',
u'Í': r'\\u00CD', u'İ': r'\\u0130', u'Î': r'\\u00CE', u'Ï': r'\\u00CF', u'Ł': r'\\u0141',
u'Ń': r'\\u0143', u'Ñ': r'\\u00D1', u'Ó': r'\\u00D3', u'Ô': r'\\u00D4', u'Ö': r'\\u00D6',
u'Ő': r'\\u0150', u'Ø': r'\\u00D8', u'Œ': r'\\u0152', u'Ś': r'\\u015A', u'Š': r'\\u0160',
u'Ş': r'\\u015E', u'Ș': r'\\u0218', u'ß': r'\\u00DF', u'Ț': r'\\u021A', u'Ú': r'\\u00DA',
u'Ü': r'\\u00DC', u'Ű': r'\\u0170', u'Ź': r'\\u0179', u'Ż': r'\\u017B', u'Ž': r'\\u017D',
u'А': r'\\u0410', u'Б': r'\\u0411', u'В': r'\\u0412', u'Г': r'\\u0413', u'Ґ': r'\\u0490',
u'Д': r'\\u0414', u'Е': r'\\u0415', u'Ё': r'\\u0401', u'Є': r'\\u0404', u'Ж': r'\\u0416',
u'З': r'\\u0417', u'И': r'\\u0418', u'Й': r'\\u0419', u'І': r'\\u0406', u'Ї': r'\\u0407',
u'К': r'\\u041A', u'Л': r'\\u041B', u'М': r'\\u041C', u'Н': r'\\u041D', u'О': r'\\u041E',
u'П': r'\\u041F', u'Р': r'\\u0420', u'С': r'\\u0421', u'Т': r'\\u0422', u'У': r'\\u0423',
u'Ф': r'\\u0424', u'Х': r'\\u0425', u'Ц': r'\\u0426', u'Ч': r'\\u0427', u'Ш': r'\\u0428',
u'Щ': r'\\u0429', u'Ъ': r'\\u042A', u'Ы': r'\\u042B', u'Ь': r'\\u042C', u'Э': r'\\u042D',
u'Ю': r'\\u042E', u'Я': r'\\u042F', u'á': r'\\u00E1', u'à': r'\\u00E0', u'â': r'\\u00E2',
u'ä': r'\\u00E4', u'ă': r'\\u0103', u'å': r'\\u00E5', u'ą': r'\\u0105', u'æ': r'\\u00E6',
u'ć': r'\\u0107', u'č': r'\\u010D', u'ç': r'\\u00E7', u'đ': r'\\u0111', u'é': r'\\u00E9',
u'è': r'\\u00E8', u'ê': r'\\u00EA', u'ë': r'\\u00EB', u'ę': r'\\u0119', u'ğ': r'\\u011F',
u'ı': r'\\u0131', u'í': r'\\u00ED', u'ì': r'\\u00EC', u'î': r'\\u00EE', u'ï': r'\\u00EF',
u'ł': r'\\u0142', u'ń': r'\\u0144', u'ñ': r'\\u00F1', u'ó': r'\\u00F3', u'ò': r'\\u00F2',
u'ô': r'\\u00F4', u'ö': r'\\u00F6', u'ő': r'\\u0151', u'ø': r'\\u00F8', u'œ': r'\\u0153',
u'ś': r'\\u015B', u'š': r'\\u0161', u'ş': r'\\u015F', u'ș': r'\\u0219', u'ț': r'\\u021B',
u'ú': r'\\u00FA', u'ù': r'\\u00F9', u'û': r'\\u00FB', u'ü': r'\\u00FC', u'ű': r'\\u0171',
u'ÿ': r'\\u00FF', u'ź': r'\\u017A', u'ż': r'\\u017C', u'ž': r'\\u017E', u'а': r'\\u0430',
u'б': r'\\u0431', u'в': r'\\u0432', u'г': r'\\u0433', u'ґ': r'\\u0491', u'д': r'\\u0434',
u'е': r'\\u0435', u'ё': r'\\u0451', u'є': r'\\u0454', u'ж': r'\\u0436', u'з': r'\\u0437',
u'и': r'\\u0438', u'й': r'\\u0439', u'і': r'\\u0456', u'ї': r'\\u0457', u'к': r'\\u043A',
u'л': r'\\u043B', u'м': r'\\u043C', u'н': r'\\u043D', u'о': r'\\u043E', u'п': r'\\u043F',
u'р': r'\\u0440', u'с': r'\\u0441', u'т': r'\\u0442', u'у': r'\\u0443', u'ф': r'\\u0444',
u'х': r'\\u0445', u'ц': r'\\u0446', u'ч': r'\\u0447', u'ш': r'\\u0448', u'щ': r'\\u0449',
u'ъ': r'\\u044A', u'ы': r'\\u044B', u'ь': r'\\u044C', u'э': r'\\u044D', u'ю': r'\\u044E',
u'я': r'\\u044F'
}

inputFile = r"""Put Path Here"""
outputFile = r"""Put Path here"""

def replace():
    default = 0
    fileIn = codecs.open(inputFile, 'r', 'utf-8')
    fileOut = codecs.open(outputFile, 'w', 'utf-8')
    for line in fileIn:
        for char in line:
            if unicodeMap.get(char, default) == 0:
                fileOut.write(char)
            else:
                fileOut.write(char.replace(char, unicodeMap[char]))
    fileIn.close()
    fileOut.close()

if __name__ == '__main__':
    replace()

# or just replace_all
for char2unicode in (('\\u00FC', 'ü'), ('\\u00A1', '¡'), ('\\u00BF', '¿'), ('\\u00C1', 'Á'), ('\\u00C0', 'À'), ('\\u00C2', 'Â'), ('\\u00C4', 'Ä'), ('\\u0102', 'Ă'), ('\\u00C5', 'Å'), ('\\u0104', 'Ą'), ('\\u00C6', 'Æ'), ('\\u0106', 'Ć'), ('\\u010C', 'Č'), ('\\u00C7', 'Ç'), ('\\u0110', 'Đ'), ('\\u00C9', 'É'), ('\\u00C8', 'È'), ('\\u00CA', 'Ê'), ('\\u00CB', 'Ë'), ('\\u0118', 'Ę'), ('\\u011E', 'Ğ'), ('\\u00CD', 'Í'), ('\\u0130', 'İ'), ('\\u00CE', 'Î'), ('\\u00CF', 'Ï'), ('\\u0141', 'Ł'), ('\\u0143', 'Ń'), ('\\u00D1', 'Ñ'), ('\\u00D3', 'Ó'), ('\\u00D4', 'Ô'), ('\\u00D6', 'Ö'), ('\\u0150', 'Ő'), ('\\u00D8', 'Ø'), ('\\u0152', 'Œ'), ('\\u015A', 'Ś'), ('\\u0160', 'Š'), ('\\u015E', 'Ş'), ('\\u0218', 'Ș'), ('\\u00DF', 'ß'), ('\\u021A', 'Ț'), ('\\u00DA', 'Ú'), ('\\u00DC', 'Ü'), ('\\u0170', 'Ű'), ('\\u0179', 'Ź'), ('\\u017B', 'Ż'), ('\\u017D', 'Ž'), ('\\u0410', 'А'), ('\\u0411', 'Б'), ('\\u0412', 'В'), ('\\u0413', 'Г'), ('\\u0490', 'Ґ'), ('\\u0414', 'Д'), ('\\u0415', 'Е'), ('\\u0401', 'Ё'), ('\\u0404', 'Є'), ('\\u0416', 'Ж'), ('\\u0417', 'З'), ('\\u0418', 'И'), ('\\u0419', 'Й'), ('\\u0406', 'І'), ('\\u0407', 'Ї'), ('\\u041A', 'К'), ('\\u041B', 'Л'), ('\\u041C', 'М'), ('\\u041D', 'Н'), ('\\u041E', 'О'), ('\\u041F', 'П'), ('\\u0420', 'Р'), ('\\u0421', 'С'), ('\\u0422', 'Т'), ('\\u0423', 'У'), ('\\u0424', 'Ф'), ('\\u0425', 'Х'), ('\\u0426', 'Ц'), ('\\u0427', 'Ч'), ('\\u0428', 'Ш'), ('\\u0429', 'Щ'), ('\\u042A', 'Ъ'), ('\\u042B', 'Ы'), ('\\u042C', 'Ь'), ('\\u042D', 'Э'), ('\\u042E', 'Ю'), ('\\u042F', 'Я'), ('\\u00E1', 'á'), ('\\u00E0', 'à'), ('\\u00E2', 'â'), ('\\u00E4', 'ä'), ('\\u0103', 'ă'), ('\\u00E5', 'å'), ('\\u0105', 'ą'), ('\\u00E6', 'æ'), ('\\u0107', 'ć'), ('\\u010D', 'č'), ('\\u00E7', 'ç'), ('\\u0111', 'đ'), ('\\u00E9', 'é'), ('\\u00E8', 'è'), ('\\u00EA', 'ê'), ('\\u00EB', 'ë'), ('\\u0119', 'ę'), ('\\u011F', 'ğ'), ('\\u0131', 'ı'), ('\\u00ED', 'í'), ('\\u00EC', 'ì'), ('\\u00EE', 'î'), ('\\u00EF', 'ï'), ('\\u0142', 'ł'), ('\\u0144', 'ń'), ('\\u00F1', 'ñ'), ('\\u00F3', 'ó'), ('\\u00F2', 'ò'), ('\\u00F4', 'ô'), ('\\u00F6', 'ö'), ('\\u0151', 'ő'), ('\\u00F8', 'ø'), ('\\u0153', 'œ'), ('\\u015B', 'ś'), ('\\u0161', 'š'), ('\\u015F', 'ş'), ('\\u0219', 'ș'), ('\\u021B', 'ț'), ('\\u00FA', 'ú'), ('\\u00F9', 'ù'), ('\\u00FB', 'û'), ('\\u00FC', 'ü'), ('\\u0171', 'ű'), ('\\u00FF', 'ÿ'), ('\\u017A', 'ź'), ('\\u017C', 'ż'), ('\\u017E', 'ž'), ('\\u0430', 'а'), ('\\u0431', 'б'), ('\\u0432', 'в'), ('\\u0433', 'г'), ('\\u0491', 'ґ'), ('\\u0434', 'д'), ('\\u0435', 'е'), ('\\u0451', 'ё'), ('\\u0454', 'є'), ('\\u0436', 'ж'), ('\\u0437', 'з'), ('\\u0438', 'и'), ('\\u0439', 'й'), ('\\u0456', 'і'), ('\\u0457', 'ї'), ('\\u043A', 'к'), ('\\u043B', 'л'), ('\\u043C', 'м'), ('\\u043D', 'н'), ('\\u043E', 'о'), ('\\u043F', 'п'), ('\\u0440', 'р'), ('\\u0441', 'с'), ('\\u0442', 'т'), ('\\u0443', 'у'), ('\\u0444', 'ф'), ('\\u0445', 'х'), ('\\u0446', 'ц'), ('\\u0447', 'ч'), ('\\u0448', 'ш'), ('\\u0449', 'щ'), ('\\u044A', 'ъ'), ('\\u044B', 'ы'), ('\\u044C', 'ь'), ('\\u044D', 'э'), ('\\u044E', 'ю'), ('\\u044F', 'я'), ('\\u00a1', '¡'), ('\\u00bf', '¿'), ('\\u00c1', 'Á'), ('\\u00c0', 'À'), ('\\u00c2', 'Â'), ('\\u00c4', 'Ä'), ('\\u0102', 'Ă'), ('\\u00c5', 'Å'), ('\\u0104', 'Ą'), ('\\u00c6', 'Æ'), ('\\u0106', 'Ć'), ('\\u010c', 'Č'), ('\\u00c7', 'Ç'), ('\\u0110', 'Đ'), ('\\u00c9', 'É'), ('\\u00c8', 'È'), ('\\u00ca', 'Ê'), ('\\u00cb', 'Ë'), ('\\u0118', 'Ę'), ('\\u011e', 'Ğ'), ('\\u00cd', 'Í'), ('\\u0130', 'İ'), ('\\u00ce', 'Î'), ('\\u00cf', 'Ï'), ('\\u0141', 'Ł'), ('\\u0143', 'Ń'), ('\\u00d1', 'Ñ'), ('\\u00d3', 'Ó'), ('\\u00d4', 'Ô'), ('\\u00d6', 'Ö'), ('\\u0150', 'Ő'), ('\\u00d8', 'Ø'), ('\\u0152', 'Œ'), ('\\u015a', 'Ś'), ('\\u0160', 'Š'), ('\\u015e', 'Ş'), ('\\u0218', 'Ș'), ('\\u00df', 'ß'), ('\\u021a', 'Ț'), ('\\u00da', 'Ú'), ('\\u00dc', 'Ü'), ('\\u0170', 'Ű'), ('\\u0179', 'Ź'), ('\\u017b', 'Ż'), ('\\u017d', 'Ž'), ('\\u0410', 'А'), ('\\u0411', 'Б'), ('\\u0412', 'В'), ('\\u0413', 'Г'), ('\\u0490', 'Ґ'), ('\\u0414', 'Д'), ('\\u0415', 'Е'), ('\\u0401', 'Ё'), ('\\u0404', 'Є'), ('\\u0416', 'Ж'), ('\\u0417', 'З'), ('\\u0418', 'И'), ('\\u0419', 'Й'), ('\\u0406', 'І'), ('\\u0407', 'Ї'), ('\\u041a', 'К'), ('\\u041b', 'Л'), ('\\u041c', 'М'), ('\\u041d', 'Н'), ('\\u041e', 'О'), ('\\u041f', 'П'), ('\\u0420', 'Р'), ('\\u0421', 'С'), ('\\u0422', 'Т'), ('\\u0423', 'У'), ('\\u0424', 'Ф'), ('\\u0425', 'Х'), ('\\u0426', 'Ц'), ('\\u0427', 'Ч'), ('\\u0428', 'Ш'), ('\\u0429', 'Щ'), ('\\u042a', 'Ъ'), ('\\u042b', 'Ы'), ('\\u042c', 'Ь'), ('\\u042d', 'Э'), ('\\u042e', 'Ю'), ('\\u042f', 'Я'), ('\\u00e1', 'á'), ('\\u00e0', 'à'), ('\\u00e2', 'â'), ('\\u00e4', 'ä'), ('\\u0103', 'ă'), ('\\u00e5', 'å'), ('\\u0105', 'ą'), ('\\u00e6', 'æ'), ('\\u0107', 'ć'), ('\\u010d', 'č'), ('\\u00e7', 'ç'), ('\\u0111', 'đ'), ('\\u00e9', 'é'), ('\\u00e8', 'è'), ('\\u00ea', 'ê'), ('\\u00eb', 'ë'), ('\\u0119', 'ę'), ('\\u011f', 'ğ'), ('\\u0131', 'ı'), ('\\u00ed', 'í'), ('\\u00ec', 'ì'), ('\\u00ee', 'î'), ('\\u00ef', 'ï'), ('\\u0142', 'ł'), ('\\u0144', 'ń'), ('\\u00f1', 'ñ'), ('\\u00f3', 'ó'), ('\\u00f2', 'ò'), ('\\u00f4', 'ô'), ('\\u00f6', 'ö'), ('\\u0151', 'ő'), ('\\u00f8', 'ø'), ('\\u0153', 'œ'), ('\\u015b', 'ś'), ('\\u0161', 'š'), ('\\u015f', 'ş'), ('\\u0219', 'ș'), ('\\u021b', 'ț'), ('\\u00fa', 'ú'), ('\\u00f9', 'ù'), ('\\u00fb', 'û'), ('\\u00fc', 'ü'), ('\\u0171', 'ű'), ('\\u00ff', 'ÿ'), ('\\u017a', 'ź'), ('\\u017c', 'ż'), ('\\u017e', 'ž'), ('\\u0430', 'а'), ('\\u0431', 'б'), ('\\u0432', 'в'), ('\\u0433', 'г'), ('\\u0491', 'ґ'), ('\\u0434', 'д'), ('\\u0435', 'е'), ('\\u0451', 'ё'), ('\\u0454', 'є'), ('\\u0436', 'ж'), ('\\u0437', 'з'), ('\\u0438', 'и'), ('\\u0439', 'й'), ('\\u0456', 'і'), ('\\u0457', 'ї'), ('\\u043a', 'к'), ('\\u043b', 'л'), ('\\u043c', 'м'), ('\\u043d', 'н'), ('\\u043e', 'о'), ('\\u043f', 'п'), ('\\u0440', 'р'), ('\\u0441', 'с'), ('\\u0442', 'т'), ('\\u0443', 'у'), ('\\u0444', 'ф'), ('\\u0445', 'х'), ('\\u0446', 'ц'), ('\\u0447', 'ч'), ('\\u0448', 'ш'), ('\\u0449', 'щ'), ('\\u044a', 'ъ'), ('\\u044b', 'ы'), ('\\u044c', 'ь'), ('\\u044d', 'э'), ('\\u044e', 'ю'), ('\\u044f', 'я')):
string = str(string).replace(*char2unicode)

Sunday, June 23, 2019

(m/w/d) in Stellenanzeigen: Was bedeutet das?

Wer aktuell nach einem Job Ausschau hält, trifft immer häufiger auf Kürzel wie (m/w/d) in Stellenanzeigen. Abkürzungen in Anzeigen sind nichts Neues, kann doch jedes weitere Wort den Preis in die Höhe treiben. Viele aus dem Printzeitalter stammende Kürzel haben sich etabliert. Den meisten Jobsuchenden ist somit klar, dass Angaben wie (m/w) hinter einem Jobtitel stellvertretend für das Geschlecht stehen und zeigen, dass eine Stelle sowohl mit einem Mann als auch einer Frau zu besetzen ist. Neu ist allerdings die dritte Position. Wir klären auf…

(m/w/d) in Stellenanzeigen: Variable Ausdrucksmöglichkeiten

Das Interessante ist: Eine beliebige Suche in Jobportalen wie Karrieresprung zeigt weitere ungewöhnliche Abkürzungen. Aber was bedeuten sie? (m/w/d) in Stellenanzeigen steht für männlich/weiblich/divers, letzteres mitunter in der englischen Variante diverse.
Die Angabe divers oder diverse ist eine Lösung im Sinne des Antidiskriminierungsgesetzes (AGG), um ausdrücklich Menschen vor Diskriminierung zu schützen, die sich weder männlich, noch weiblich fühlen.
Hintergrund ist ein Beschluss des Bundesverfassungsgerichts vom Oktober 2017 (Az 1 BvR 2019/16), dass künftig neben den Geschlechtsbezeichnungen männlich und weiblich eine dritte Variante möglich sein muss.
Das richtet sich an all jene, die sich innerhalb des sogenannten binären Geschlechtssystems keinem Geschlecht zuordnen lassen können oder wollen. Für das Geburtenregister gilt damit, dass bis Ende 2018 der Beschluss umgesetzt werden muss.

Geschlechterdiskussion: Wieviele Geschlechter gibt es?

Für viele Menschen ist die Diskussion um alles, was in irgendeiner Form „Gender“ oder „Diversity“ im Namen trägt, völlig unverständlich. Gerne wird entrüstet von „Genderwahnsinn“ geredet. Bisher gab es doch auch nur zwei Geschlechter, was soll denn jetzt anders sein?! Das stimmt so fundamental allerdings nicht.
Tatsache ist, dass Bezeichnungen wie divers oder diverse neu und für viele ungewohnt sein mögen. Ebenso die Tatsache, dass manche Menschen sich weder als Mann, noch als Frau fühlen. Das Phänomen hingegen ist nicht neu. Früher gab es wenig schmeichelhafte Bezeichnungen wie Zwitter oder Hermaphrodit.
Die eine weist auf das dritte Geschlecht im Tierreich hin, so etwa bei Schnecken. Die andere Bezeichnung stammt aus der griechischen Mythologie. Sie bezieht sich ursprünglich auf den Sohn von Aphrodite und Hermes, der durch die feste Umarmung einer verliebten Nymphe fortan zweierlei Geschlecht in sich trug.
Heutzutage ist von Intersexualität die Rede. Schätzungen zufolge betrachten sich 80.000 bis 120.000 Menschen in Deutschland als intersexuell. Mit diesem Begriff bezeichnet die Medizin laut Wikipedia…
Menschen, die genetisch (aufgrund der Geschlechtschromosomen) oder auch anatomisch (aufgrund der Geschlechtsorgane) und hormonell (aufgrund des Mengenverhältnisses der Geschlechtshormone) nicht eindeutig dem weiblichen oder dem männlichen Geschlecht zugeordnet werden können.
Diese Definition ist von Transsexualität abzugrenzen. Aus Sicht der Biologie lassen sich transsexuelle Menschen eindeutig einem Geschlecht zuordnen, allerdings fühlen sich die Betroffenen anders als rein biologisch zu vermuten wäre.

Was bedeutet das für Bewerber?

Unternehmen sind natürlich bemüht, alles zu unterlassen, das nur im entferntesten nach Diskriminierung aussehen könnte. Eine Untersuchung von 570.000 Stellenanzeigen zeigte, dass immerhin sieben Prozent davon bereits solche oder ähnliche Abkürzungen tragen.
Die Folge: Sehr variable Abkürzungen und Formulierungen, die unter sprachwissenschaftlichen Gesichtspunkten für Irritationen sorgen. Ein unerfreuliches Nebenprodukt dieses Wirrwarrs:
Kürzel wie (m/w/d) in Stellenanzeigen machen die Stellenanzeige weniger eindeutig. Denn es bleibt nicht bei (m/w/d), sondern ebenso tauchen auch Stellenausschreibungen mit (m/w/i) oder mit (m/w/i/t) auf. Dem Ideenreichtum sind kaum Grenzen gesetzt, oberste Priorität hat, alle Eventualitäten einzuschließen, um mögliche Klagen im Vorhinein zu vermeiden.
Wir erläutern kurz:
  • (m/w/d) steht für männlich/weiblich/divers
  • (m/w/i) steht für männlich/weiblich/intersexuell
  • (m/w/i/t) steht für männlich/weiblich/intersexuell/transsexuell
  • (m/w/a) steht für männlich/weiblich/anders
  • (m/w/x) steht für männlich/weiblich/egal welches Geschlecht beziehungsweise nicht definiert
  • (m/w/gn) steht für männlich/weiblich/geschlechtsneutral
  • (m/w/*) steht für männlich/weiblich/Asterisk kann ein beliebiges Geschlecht oder eine Fußnote symbolisieren
Aber auch diese Liste ist nicht vollständig. Denn parallel zu den bisher üblichen Abkürzungen (m/w) kursieren außerdem noch (m/f) mit f für englisch female, weiblich. So können Leser zukünftig statt (m/w/d) in Stellenanzeigen demnächst vermutlich auch mit Abkürzungen wie (m/f/d) oder einer Kombination aus den obigen rechnen.
Denkbar wären dann also auch (m/w/i/t – international: m/f/i/t) für männlich/weiblich/intersexuell/transsexuell. Und weil sich einige Menschen daran stören könnten, dass das Adjektiv männlich zuerst genannt wird, ist ebenso eine völlig andere Reihenfolge möglich. Die am wenigsten diskriminierende wäre dann vermutlich die alphabetische Reihenfolge.

Was bedeutet das für Unternehmen?

Eine Abkürzung wie (m/w/d) in Stellenanzeigen ist die eine Sache. Hierbei handelt es sich im Endeffekt um Konventionen, das heißt: Haben sich bestimmte Abkürzungen erst einmal eingebürgert, sich die Leser daran gewöhnt, dass die althergebrachte Geschlechtsangabe um ein, zwei Stellen erweitert wurde, wird irgendwann auch die Reihenfolge egal sein.
Der Leser weiß, dass es an dieser Stelle nicht um mysteriöse Qualifikationen geht, die er noch schnell erwerben muss, sondern lediglich eine Angabe, die signalisieren soll, dass dem Unternehmen das Geschlecht ziemlich egal ist, solange der Bewerber auf die ausgeschriebene Stelle passt.
Anders sieht es allerdings mit anderen Bereichen aus. (m/w/d) in Stellenanzeigen ist der Anfang von etwas, das sich im weiteren Bewerbungsprozess und auch in der Unternehmenskultur auswirken wird:
  • Bewerbungsprozess

    Konsequenterweise müssten die Abkürzungen und die Überlegungen, die hinter dem (m/w/d) in Stellenanzeigen stehen, auch in den ganzen Bewerbungsprozess mit eingegliedert werden. Das heißt dann im Falle von Online-Bewerbungen, dass es entsprechende Auswahlmöglichkeiten für den Bewerber geben muss, ein anderes Geschlecht als das übliche männlich/weiblich anzukreuzen.
  • Anrede

    Das führt direkt zum nächsten Punkt, nämlich der Anrede. Wie sollen Bewerber künftig angesprochen werden? Wie sollen Mitarbeiter und Kunden generell angesprochen werden? Auch hier ist eine Anrede mit „Sehr geehrter Herr/sehr geehrte Frau…“ üblich, aber wer kann schon mit Sicherheit wissen, wie das Gegenüber angesprochen werden möchte, ohne sich zuvor vergewissert zu haben?

    Eine Alternative sind neutrale Formulierungen, die alle Geschlechter einschließen und bereits im universitären Umfeld häufiger anzutreffen sind, beispielsweise Studierende statt Studenten. Analog dazu könnte eine Anrede „Liebe Mitarbeitende…“ oder „Liebe Belegschaft…“ lauten.

    Wird eine Person angesprochen, können Sie „Guten Tag, Max Mustermann“ sagen. Klingt sehr steif und ungewohnt, wäre aber geschlechtsneutral. Im Zweifelsfalle können Sie die betreffende Person zudem direkt fragen, wie sie angeredet werden möchte.

    Ebenfalls denkbar ist im Schriftverkehr der Einsatz des Asterisk: „Sehr geehrte*r Frau*Herr“. Wem das alles zu umständlich ist, kann – wie auf dieser Webseite ebenfalls usus – sich für eine Form entscheiden und darauf hinweisen, dass aus Gründen der Lesbarkeit das generische Maskulinum verwendet wird.
  • Berufsbezeichnungen

    Einige Berufsbezeichnungen tragen bereits -mann im Namen, so der Kaufmann. Seit längerem existiert passend dazu die Kauffrau – nur wie sollte eine Bezeichnung für das dritte Geschlecht aussehen? Kaufinter?

    Eine Bezeichnung für die Gesamtheit aller Kaufmänner und Kauffrauen, nämlich Kaufleute, existiert bereits. Im Singular bleibt allerdings die Herausforderung. Die Schwierigkeiten setzen sich fort, etwa beim Adjektiv „kaufmännisch“, beispielsweise für eine Stelle, in der eine kaufmännische Tätigkeit angeboten wird.

    „Kauffraulich“ oder „kaufinterlich“ als Ergänzungen zum bisherigen „kaufmännisch“ klingen sehr befremdlich. Richtig ausufernd wird es, wenn versucht wird, das Ganze zu umgehen, etwa: „Angestellt-x (m/w/i/t/d/a/gn/*) mit Berufsabschluss im betriebswirtschaftlichen Bereich gesucht“.
  • Dresscode

    Vor allem bei Jobs, in denen es zu repräsentieren gilt, sind die Geschlechterrollen häufig zu spüren: Frauen werden zwar Hosenanzüge zugestanden, aber beim Schuhwerk werden doch eher Pumps statt Schnürschuhen erwartet. Im angloamerikanischen Raum sind selbst Hosenanzüge bei Frauen oft nicht erwünscht.

    Besonders in Großbritannien herrscht ein sehr konservativer Dresscode, der Frauen hochhackige Pumps und Make-up aufzwängt. Zu überlegen wäre, ob eine geschlechtsneutralere Kleidung für alle die möglich ist, die sich nicht dem männlichen oder weiblichen Geschlecht zugehörig fühlen.
  • Toiletten

    Richtig ins Geld gehen kann eine weitere Bestimmung. Die Arbeitsstättenverordnung schreibt ab einer Mitarbeiterzahl von mehr als neun nach Geschlecht getrennte Toiletten vor. Unisex-Toiletten sind nur erlaubt, wenn die Zahl der Beschäftigten unter neun Mitarbeitern liegt. Zukünftig müsste bei mehr als neun Mitarbeitern unter Umständen ein dritter Toilettenraum eingerichtet werden.

Nachweis kann gerichtlich gefordert werden

Manche Überlegungen gehen sogar soweit, dass wenn (m/w/d) in Stellenanzeigen auftaucht, diese Vielfalt sich auch in Unternehmen widerspiegeln müsste. So wie seit einigen Jahren eine Frauenquote gefordert wird, könnte eine Quote für Intersexuelle und Transsexuelle gefordert werden.
Dass Menschen, die sich keinem Geschlecht zugehörig fühlen, nicht diskriminiert werden dürfen beziehungsweise ihre Rechte stärker gefördert werden sollen, ist nachvollziehbar. Unklar hingegen ist, wie das überprüft werden soll.
Denn das AGG eröffnet dem Arbeitnehmer die Möglichkeit zu klagen, falls er sich aufgrund seines Geschlechts diskriminiert fühlt. Dies könnte bei einer Ablehnung auf Stellenangebote, die nur (m/w) tragen, der Fall sein.
Vermutet eine intersexuelle Person Diskriminierung aufgrund ihrer Zugehörigkeit zum dritten Geschlecht und geht sie arbeitsgerichtlich dagegen vor, wird sie ihr Geschlecht gemäß §§ 373 ff. Zivilprozessordnung (ZPO) und §§ 445 ff. ZPO beweisen müssen.
Quelle: https://karrierebibel.de

Tuesday, April 16, 2019

Fraze de precauție P

P101
Dacă este necesară consultarea medicului, țineți la îndemână recipientul sau eticheta produsului.
P102
A nu se lăsa la îndemâna copiilor.
P103
Citiți eticheta înainte de utilizare.
P201
Procurați instrucțiuni speciale înainte de utilizare.
P202
A nu se manipula decât după ce au fost citite și înțelese toate măsurile de securitate.
P210
A se păstra departe de surse de căldură, suprafețe fierbinți, scântei, flăcări și alte surse de aprindere. Fumatul interzis.
P211
Nu pulverizați deasupra unei flăcări deschise sau unei alte surse de aprindere.
P220
A se păstra/depozita departe de îmbrăcăminte/ …/materiale combustibile.
P221
Luați toate măsurile de precauție pentru a evita amestecul cu combustibili…
P222
A nu se lăsa în contact cu aerul.
P223
A nu se lăsa în contact cu apa.
P230
A se păstra umezit cu…
P231
A se manipula sub un gaz inert.
P232
A se proteja de umiditate.
P233
Păstrați recipientul închis etanș.
P234
Păstrați numai în recipientul original.
P235
A se păstra la rece.
P240
Legătură la pământ/conexiune echipotențială cu recipientul și cu echipamentul de recepție.
P241
Utilizați echipamente electrice/de ventilare/de iluminat/…/ antideflagrante.
P242
Nu utilizați unelte care produc scântei.
P243
Luați măsuri de precauție împotriva descărcărilor electrostatice.
P244
Feriti valvele si racordurile de ulei si grăsime.
P250
A nu supune la abraziuni/șocuri/…/frecare.
P251
Nu perforați sau ardeți, chiar și după utilizare.
P260
Nu inspirați praful/fumul/gazul/ceața/vaporii/ spray-ul.
P261
Evitați să inspirați praful/fumul/gazul/ceața/ vaporii/spray-ul.
P262
Evitați orice contact cu ochii, pielea sau îmbrăcămintea.
P263
Evitați contactul în timpul sarcinii/alăptării.
P264
Spălați-vă … bine după utilizare.
P270
A nu mânca, bea sau fuma în timpul utilizării produsului.
P271
A se utiliza numai în aer liber sau în spații bine ventilate.
P272
Nu scoateți îmbrăcămintea de lucru contaminată în afara locului de muncă.
P273
Evitați dispersarea în mediu.
P280
Purtați mănuși de protecție/îmbrăcăminte de protecție/echipament de protecție a ochilor/ echipament de protecție a feței.
P282
Purtați mănuși izolante împotriva frigului/echipament de protecție a feței/ochilor.
P283
Purtați îmbrăcăminte rezistentă la foc/flacără/ ignifugă.
P284
[În cazul în care ventilarea este necorespunzătoare] purtați echipament de protecție respiratorie.P231 + P232
A se manipula sub un gaz inert. A se proteja de umiditate.
P235 + P410
A se păstra la rece. A se proteja de lumina solară.
P301
ÎN CAZ DE ÎNGHIȚIRE:
P302
ÎN CAZ DE CONTACT CU PIELEA:
P303
ÎN CAZ DE CONTACT CU PIELEA (sau părul):
P304
ÎN CAZ DE INHALARE:
P305
ÎN CAZ DE CONTACT CU OCHII:
P306
ÎN CAZ DE CONTACT CU ÎMBRĂCĂMINTEA:
P308
ÎN CAZ DE expunere sau de posibilă expunere:
P310
Sunați imediat la un CENTRU DE INFORMARE TOXICOLOGICĂ/un medic/…
P311
Sunați la un CENTRU DE INFORMARE TOXICOLOGICĂ/un medic…
P312
Sunați la un CENTRU DE INFORMARE TOXICOLOGICĂ/un medic/…/dacă nu vă simțiți bine.
P313
Consultați medicul.
P314
Consultați medicul, dacă nu vă simțiți bine.
P315
Consultați imediat medicul.
P320
Un tratament specific este urgent (a se vedea … de pe această etichetă).
P321
Tratament specific (a se vedea … de pe această etichetă).
P330
Clătiți gura.
P331
NU provocați voma.
P332
În caz de iritare a pielii:
P333
În caz de iritare a pielii sau de erupție cutanată:
P334
Introduceți în apă rece/acoperiți cu o compresă umedă.
P335
Îndepărtați particulele depuse pe piele.
P336
Dezghețați părțile degerate cu apă călduță. Nu frecați zona afectată.
P337
Dacă iritarea ochilor persistă:
P338
Scoateți lentilele de contact, dacă este cazul și dacă acest lucru se poate face cu ușurință. Continuați să clătiți.
P340
Transportați persoana la aer liber și mențineți-o într-o poziție confortabilă pentru respirație.
P342
În caz de simptome respiratorii:
P351
Clătiți cu atenție cu apă, timp de mai multe minute.
P352
Spălați cu multă apă/…
P353
Clătiți pielea cu apă/faceți duș.
P360
Clătiți imediat îmbrăcămintea contaminată și pielea cu multă apă, înainte de scoaterea îmbrăcămintei.
P361
Scoateți imediat toată îmbrăcămintea contaminată.
P362
Scoateți îmbrăcămintea contaminată.
P363
Spălați îmbracămintea contaminată, înainte de reutilizare.
P364
Și spălați înainte de reutilizare.
P370
În caz de incendiu:
P371
În caz de incendiu de proporții și de cantități mari de produs:
P372
Risc de explozie în caz de incendiu.
P373
NU încercați să stingeți incendiul atunci când focul a ajuns la explozivi.
P374
Stingeți incendiul de la o distanță rezonabilă, luând măsuri normale de precauție.
P375
Stingeți incendiul de la distanță din cauza pericolului de explozie.
P376
Opriți scurgerea, dacă acest lucru se poate face în siguranță.
P377
Incendiu cauzat de o scurgere de gaz: nu încercați să stingeți, decât dacă scurgerea poate fi oprită în siguranță.
P378
A se utiliza… pentru a stinge.
P380
Evacuați zona.
P381
Eliminați toate sursele de aprindere, dacă acest lucru se poate face în siguranță.
P390
Absorbiți scurgerile de produs, pentru a nu afecta materialele din apropiere.
P391
Colectați scurgerile de produs.
P301 + P310
ÎN CAZ DE ÎNGHIȚIRE: sunați imediat la un CENTRU DE INFORMARE TOXICOLOGICĂ/un medic/…
P301 + P312
ÎN CAZ DE ÎNGHIȚIRE: sunați la un CENTRU DE INFORMARE TOXICOLOGICĂ/un medic/…/dacă nu vă simțiți bine.
P301 + P330 + P331
ÎN CAZ DE ÎNGHIȚIRE: clătiți gura. NU provocați voma.
P302 + P334
ÎN CAZ DE CONTACT CU PIELEA: introduceți în apă rece/acoperiți cu o compresă umedă.
P302 + P352
ÎN CAZ DE CONTACT CU PIELEA: spălați cu multă apă/…
P303 + P361 + P353
ÎN CAZ DE CONTACT CU PIELEA (sau părul): scoateți imediat toată îmbrăcămintea contaminată. Clătiți pielea cu apă/faceți duș.
P304 + P340
ÎN CAZ DE INHALARE: transportați persoana la aer liber și mențineți-o într-o poziție confortabilă pentru respirație.
P305 + P351 + P338
ÎN CAZ DE CONTACT CU OCHII: clătiți cu atenție cu apă timp de mai multe minute. Scoateți lentilele de contact, dacă este cazul și dacă acest lucru se poate face cu ușurință. Continuați să clătiți.
P306 + P360
ÎN CAZ DE CONTACT CU ÎMBRĂCĂMINTEA: clătiți imediat îmbrăcămintea contaminată și pielea cu multă apă, înainte de scoaterea îmbrăcămintei.
P308 + P311
ÎN CAZ de expunere sau de posibilă expunere: sunați la un CENTRU DE INFORMARE TOXICOLOGICĂ/un medic/…
P308 + P313
ÎN CAZ DE expunere sau de posibilă expunere: consultați medicul.
P332 + P313
În caz de iritare a pielii: consultați medicul.
P333 + P313
În caz de iritare a pielii sau de erupție cutanată: consultați medicul.
P335 + P334
Îndepărtați particulele depuse pe piele. Introduceți în apă rece/acoperiți cu o compresă umedă.
P337 + P313
Dacă iritarea ochilor persistă: consultați medicul.
P342 + P311
În caz de simptome respiratorii: sunați la un CENTRU DE INFORMARE TOXICOLOGICĂ/un medic/…
P361 + P364
Scoateți imediat toată îmbrăcămintea contaminată și spalați-o înainte de reutilizare.
P362 + P364
Scoateți îmbrăcămintea contaminată și spalați-o înainte de reutilizare.
P370 + P376
În caz de incendiu: opriți scurgerea, dacă acest lucru se poate face în siguranță.
P370 + P378
În caz de incendiu: a se utiliza… pentru a stinge.
P370 + P380
În caz de incendiu: evacuați zona.
P370 + P380 + P375
În caz de incendiu: evacuați zona. Stingeți incendiul de la distanță din cauza pericolului de explozie.
P371 + P380 + P375
În caz de incendiu de proporții și de cantități mari de produs: evacuați zona. Stingeți incendiul de la distanță din cauza pericolului de explozie.
P401
A se depozita…
P402
A se depozita într-un loc uscat.
P403
A se depozita într-un spațiu bine ventilat.
P404
A se depozita într-un recipient închis.
P405
A se depozita sub cheie.
P406
Depozitați într-un recipient rezistent la coroziune/recipient din… cu dublură interioară rezistentă la coroziune.
P407
Păstrați un spațiu gol între stive/paleți.
P410
A se proteja de lumina solară.
P411
A se depozita la temperaturi care să nu depășească … o C/… o F.
P412
Nu expuneți la temperaturi care depășesc 50 o C/ 122 o F.
P413
Depozitați cantitățile în vrac mai mari de … kg/ … lbs la temperaturi care să nu depășească … o C/… o F.
P420
Depozitați departe de alte materiale.
P422
Depozitați conținutul sub …
P402 + P404
A se depozita într-un loc uscat, într-un recipient închis.
P403 + P233
A se depozita într-un spațiu bine ventilat. Păstrați recipientul închis etanș.
P403 + P235
A se depozita într-un spațiu bine ventilat. A se păstra la rece.
P410 + P403
A se proteja de lumina solară. A se depozita într-un spațiu bine ventilat.
P410 + P412
A se proteja de lumina solară. Nu expuneți la temperaturi care depășesc 50 o C/ 122 o F.
P411 + P235
A se depozita la temperaturi care să nu depășească … o C/… o F. A se păstra la rece.
P501
Aruncați conținutul/recipientul la …
P502
Adresați-vă producătorului pentru informații privind recuperarea/reciclarea
Source: https://www.msds-europe.com

Fraze de pericol H

Frazele de pericol și de precauție sunt codificate folosind un cod alfanumeric unic, care constă dintr-o literă și trei numere, după cum urmează:
  • litera „H” (pentru „fraza de pericol”) sau „P” (pentru „fraza de precauție„). Rețineți că frazele de pericol care sunt transmise de DSD și DPD, dar care nu sunt incluse în GHS sunt codificate ca „EUH”;
  • o cifră care desemnează tipul de pericol, de ex. „2” pentru pericolele fizice; și
  • două numere care corespund numerotării succesive a pericolelor, cum ar fi explozivitatea (codurile de la 200 la 210), inflamabilitatea (codurile de la 220 la 230) etc.
Etichetele dvs. trebuie să conţine și frazele de pericol relevante care descriu natura și gravitatea pericolelor substanței sau amestecului (articolul 21 din CLP).
Frazele de pericol relevante pentru fiecare clasificare specifică a pericolelor sunt stabilite în tabelele din părțile 2-5 din anexa I al regulamentului CLP. În cazul în care o clasificare a substanțelor este armonizată și inclusă în partea 3 a anexei VI al reg. CLP, pe etichetă trebuie să se utilizeze fraza de pericol corespunzătoară pentru această clasificare, împreună cu orice altă frază de pericol pentru o clasificare nearmonizată.
Anexa III din CLP enumeră formularea corectă a frazei de pericol așa trebuie să apară pe etichetă. Frazele de pericol ale unei limbi trebuie grupate împreună cu frazele de precauție de aceeași limbă pe etichetă.

Fraze de pericol H

H200
Exploziv instabil.
H201
Exploziv; pericol de explozie în masă.
H202
Exploziv; pericol grav de proiectare.
H203
Exploziv; pericol de incendiu, detonare sau proiectare.
H204
Pericol de incendiu sau de proiectare.
H205
Pericol de explozie în masă în caz de incendiu.
H220
Gaz extrem de inflamabil.
H221
Gaz inflamabil.
H222
Aerosol extrem de inflamabil.
H223
Aerosol inflamabil.
H224
Lichid și vapori extrem de inflamabili.
H225
Lichid și vapori foarte inflamabili.
H226
Lichid și vapori inflamabili.
H228
Solid inflamabil.
H229
Recipient sub presiune: Poate exploda daca este incalzit.
H230
Pericol de explozie, chiar si in absenta aerului.
H231
Pericol de explozie, chiar și în absența aerului la presiune și/sau temperatură ridicată.
H240
Pericol de explozie în caz de încălzire.
H241
Pericol de incendiu sau de explozie în caz de încălzire.
H242
Pericol de incendiu în caz de încălzire.
H250
Se aprinde spontan, în contact cu aerul.
H251
Se autoîncălzește, pericol de aprindere.
H252
Se autoîncălzește în cantități mari; pericol de aprindere.
H260
În contact cu apa degajă gaze inflamabile care se pot aprinde spontan.
H261
În contact cu apa degajă gaze inflamabile.
H270
Poate provoca sau agrava un incendiu; oxidant.
H271
Poate provoca un incendiu sau o explozie; oxidant puternic.
H272
Poate agrava un incendiu; oxidant.
H280
Conține un gaz sub presiune; pericol de explozie în caz de încălzire.
H281
Conține un gaz răcit; poate cauza arsuri sau leziuni criogenice.
H290
Poate fi corosiv pentru metale.
H300
Mortal în caz de înghițire.
H301
Toxic în caz de înghițire.
H302
Nociv în caz de înghițire.
H304
Poate fi mortal în caz de înghițire și de pătrundere în căile respiratorii.
H310
Mortal în contact cu pielea.
H311
Toxic în contact cu pielea.
H312
Nociv în contact cu pielea.
H314
Provoacă arsuri grave ale pielii și lezarea ochilor.
H315
Provoacă iritarea pielii.
H317
Poate provoca o reacție alergică a pielii.
H318
Provoacă leziuni oculare grave.
H319
Provoacă o iritare gravă a ochilor.
H330
Mortal în caz de inhalare.
H331
Toxic în caz de inhalare.
H332
Nociv în caz de inhalare.
H334
Poate provoca simptome de alergie sau astm sau dificultăți de respirație în caz de inhalare.
H335
Poate provoca iritarea căilor respiratorii.
H336
Poate provoca somnolență sau amețeală.
H340
Poate provoca anomalii genetice <indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H341
Susceptibil de a provoca anomalii genetice < indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H350
Poate provoca cancer <indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H350i
Poate provoca cancer prin inhalare.
H351
Susceptibil de a provoca cancer <indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H360
Poate dăuna fertilității sau fătului <indicați efectul specific, dacă este cunoscut><indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H360F – Poate dăuna fertilității.
H360D – Poate dăuna fătului.
H360FD – Poate dăuna fertilității. Poate dăuna fătului.
H360Fd – Poate dăuna fertilității. Susceptibil de a dăuna fătului.
H360Df – Poate dăuna fătului. Susceptibil de a dăuna fertilității.
H361
Susceptibil de a dăuna fertilității sau fătului <indicați efectul specific, dacă este cunoscut><indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H361f – Susceptibil de a dăuna fertilității.
H361d – Susceptibil de a dăuna fătului.
H361fd – Susceptibil de a dăuna fertilității. Susceptibil de a dăuna fătului.
H362
Poate dăuna copiilor alăptați la sân.
H370
Provoacă leziuni ale organelor <sau indicați toate organele afectate, dacă sunt cunoscute> <indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H371
Poate provoca leziuni ale organelor <sau indicați toate organele afectate, dacă sunt cunoscute> <indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H372
Provoacă leziuni ale organelor <sau indicați toate organele afectate, dacă sunt cunoscute> în caz de expunere prelungită sau repetată <indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H373
Poate provoca leziuni ale organelor <sau indicați toate organele afectate, dacă sunt cunoscute> în caz de expunere prelungită sau repetată <indicați calea de expunere, dacă există probe concludente că nicio altă cale de expunere nu provoacă acest pericol>.
H300 + H310
Mortal în caz de înghițire sau în contact cu pielea
H300 + H330
Mortal în caz de înghițire sau inhalare
H310 + H330
Mortal în contact cu pielea sau prin inhalare
H300 + H310 + H330
Mortal în caz de înghițire, în contact cu pielea sau prin inhalare
H301 + H311
Toxic în caz de înghițire sau în contact cu pielea
H301 + H331
Toxic în caz de înghițire sau prin inhalare
H311 + H331
Toxic în contact cu pielea sau prin inhalare
H301 + H311 + H331
Toxic în caz de înghițire, în contact cu pielea sau prin inhalare
H302 + H312
Nociv în caz de înghițire sau în contact cu pielea
H302 + H332
Nociv în caz de înghițire sau inhalare
H312 + H332
Nociv în contact cu pielea sau prin inhalare
H302 + H312 + H332
Nociv în caz de înghițire, în contact cu pielea sau prin inhalare
H400
Foarte toxic pentru mediul acvatic.
H410
Foarte toxic pentru mediul acvatic cu efecte pe termen lung.
H411
Toxic pentru mediul acvatic cu efecte pe termen lung.
H412
Nociv pentru mediul acvatic cu efecte pe termen lung.
H413
Poate provoca efecte nocive pe termen lung asupra mediului acvatic.
H420
Dăunează sănătății publice și mediului înconjurător prin distrugerea ozonului în atmosfera superioară



EUH 001
Exploziv în stare uscată.
EUH 014
Reacționează violent în contact cu apa.
EUH 018
În timpul utilizării poate forma un amestec vapori-aer, inflamabil/exploziv.
EUH 019
Poate forma peroxizi explozivi.
EUH 044
Risc de explozie, dacă este încălzit în spațiu închis.
EUH 029
În contact cu apa, degajă un gaz toxic.
EUH 031
În contact cu acizi, degajă un gaz toxic.
EUH 032
În contact cu acizi, degajă un gaz foarte toxic.
EUH 066
Expunerea repetată poate provoca uscarea sau crăparea pielii.
EUH 070
Toxic în caz de contact cu ochii.
EUH 071
Corosiv pentru căile respiratorii.
EUH 201/ 201A
Conține plumb. A nu se utiliza pe obiecte care pot fi mestecate sau supte de copii. Atenție! Conține plumb.
EUH 202
Cianoacrilat. Pericol. Se lipește de piele și ochi în câteva secunde. A nu se lăsa la îndemâna copiilor.
EUH 203
Conține crom (VI). Poate provoca o reacție alergică.
EUH 204
Conține izocianați. Poate provoca o reacție alergică.
EUH 205
Conține componenți epoxidici. Poate provoca o reacție alergică.
EUH 206
Atenție! A nu se folosi împreună cu alte produse. Poate elibera gaze periculoase (clor).
EUH 207
Atenție! Conține cadmiu. În timpul utilizării se degajă un fum periculos. A se vedea informațiile furnizate de producător. A se respecta instrucțiunile privind siguranța.
EUH 208
Conține <denumirea substanței sensibilizante>. Poate provoca o reacție alergică.
EUH 209/ 209A
Poate deveni foarte inflamabil în timpul utilizării. Poate deveni inflamabil în timpul utilizării.
EUH 210
Fișa cu date de securitate disponibilă la cerere.
EUH 401
Pentru a evita riscurile pentru sănătatea umană și mediu, a se respecta instrucțiunile de utilizare.
Source: https://www.msds-europe.com

Wednesday, November 28, 2018

Get Line Number in file with Python

lookup = 'text to find'

with open(filename) as myFile:
    for num, line in enumerate(myFile, 1):
        if lookup in line:
            print('found at line:', num)
 
Or:
 
f = open('some_file.txt','r')
line_num = 0
search_phrase = "the dog barked"
for line in f.readlines():
    line_num += 1
    if line.find(search_phrase) >= 0:
        print(line_num)
 
Or:
def line_num_for_phrase_in_file(phrase='the dog barked', filename='file.txt')
    with open(filename,'r') as f:
        for (i, line) in enumerate(f):
            if phrase in line:
                return i
    return -1

Or:
lookup="The_String_You're_Searching"
file_name = open("file.txt")
for num, line in enumerate(file_name,1):
        if lookup in line:
            print(num)
 
Or:
f_rd = open(path, 'r')
file_lines = f_rd.readlines()
f_rd.close()

matches = [line for line in file_lines if "chars of Interest" in line]
index = file_lines.index(matches[0]) 

Undocumented Features and Limitations of the Windows FINDSTR Command

Source: https://stackoverflow.com
The Windows FINDSTR command is horribly documented. There is very basic command line help available through FINDSTR /?, or HELP FINDSTR, but it is woefully inadequate. There is a wee bit more documentation online at https://docs.microsoft.com/en-us/windows-server/administration/windows-commands/findstr.
There are many FINDSTR features and limitations that are not even hinted at in the documentation. Nor could they be anticipated without prior knowledge and/or careful experimentation.
So the question is - What are the undocumented FINDSTR features and limitations?
The purpose of this question is to provide a one stop repository of the many undocumented features so that:
A) Developers can take full advantage of the features that are there.
B) Developers don't waste their time wondering why something doesn't work when it seems like it should.
Please make sure you know the existing documentation before responding. If the information is covered by the HELP, then it does not belong here.
Neither is this a place to show interesting uses of FINDSTR. If a logical person could anticipate the behavior of a particular usage of FINDSTR based on the documentation, then it does not belong here.
Along the same lines, if a logical person could anticipate the behavior of a particular usage based on information contained in any existing answers, then again, it does not belong here.
 
Preface
Much of the information in this answer has been gathered based on experiments run on a Vista machine. Unless explicitly stated otherwise, I have not confirmed whether the information applies to other Windows versions.
FINDSTR output
The documentation never bothers to explain the output of FINDSTR. It alludes to the fact that matching lines are printed, but nothing more.
The format of matching line output is as follows:
filename:lineNumber:lineOffset:text
where
fileName: = The name of the file containing the matching line. The file name is not printed if the request was explicitly for a single file, or if searching piped input or redirected input. When printed, the fileName will always include any path information provided. Additional path information will be added if the /S option is used. The printed path is always relative to the provided path, or relative to the current directory if none provided.
Note - The filename prefix can be avoided when searching multiple files by using the non-standard (and poorly documented) wildcards < and >. The exact rules for how these wildcards work can be found here. Finally, you can look at this example of how the non-standard wildcards work with FINDSTR.
lineNumber: = The line number of the matching line represented as a decimal value with 1 representing the 1st line of the input. Only printed if /N option is specified.
lineOffset: = The decimal byte offset of the start of the matching line, with 0 representing the 1st character of the 1st line. Only printed if /O option is specified. This is not the offset of the match within the line. It is the number of bytes from the beginning of the file to the beginning of the line.
text = The binary representation of the matching line, including any <CR> and/or <LF>. Nothing is left out of the binary output, such that this example that matches all lines will produce an exact binary copy of the original file.
FINDSTR "^" FILE >FILE_COPY
Most control characters and many extended ASCII characters display as dots on XP
FINDSTR on XP displays most non-printable control characters from matching lines as dots (periods) on the screen. The following control characters are exceptions; they display as themselves: 0x09 Tab, 0x0A LineFeed, 0x0B Vertical Tab, 0x0C Form Feed, 0x0D Carriage Return.
XP FINDSTR also converts a number of extended ASCII characters to dots as well. The extended ASCII characters that display as dots on XP are the same as those that are transformed when supplied on the command line. See the "Character limits for command line parameters - Extended ASCII transformation" section, later in this post
Control characters and extended ASCII are not converted to dots on XP if the output is piped, redirected to a file, or within a FOR IN() clause.
Vista and Windows 7 always display all characters as themselves, never as dots.
Return Codes (ERRORLEVEL)
  • 0 (success)
    • Match was found in at least one line of at least one file.
  • 1 (failure)
    • No match was found in any line of any file.
    • Invalid color specified by /A:xx option
  • 2 (error)
    • Incompatible options /L and /R both specified
    • Missing argument after /A:, /F:, /C:, /D:, or /G:
    • File specified by /F:file or /G:file not found
  • 255 (error)
Source of data to search (Updated based on tests with Windows 7)
Findstr can search data from only one of the following sources:
  • filenames specified as arguments and/or using the /F:file option.
  • stdin via redirection findstr "searchString" <file
  • data stream from a pipe type file | findstr "searchString"
Arguments/options take precedence over redirection, which takes precedence over piped data.
File name arguments and /F:file may be combined. Multiple file name arguments may be used. If multiple /F:file options are specified, then only the last one is used. Wild cards are allowed in filename arguments, but not within the file pointed to by /F:file.
Source of search strings (Updated based on tests with Windows 7)
The /G:file and /C:string options may be combined. Multiple /C:string options may be specified. If multiple /G:file options are specified, then only the last one is used. If either /G:file or /C:string is used, then all non-option arguments are assumed to be files to search. If neither /G:file nor /C:string is used, then the first non-option argument is treated as a space delimited list of search terms.
File names must not be quoted within the file when using the /F:FILE option.
File names may contain spaces and other special characters. Most commands require that such file names are quoted. But the FINDSTR /F:files.txt option requires that filenames within files.txt must NOT be quoted. The file will not be found if the name is quoted.
BUG - Short 8.3 filenames can break the /D and /S options
As with all Windows commands, FINDSTR will attempt to match both the long name and the short 8.3 name when looking for files to search. Assume the current folder contains the following non-empty files:
b1.txt
b.txt2
c.txt
The following command will successfully find all 3 files:
findstr /m "^" *.txt
b.txt2 matches because the corresponding short name B9F64~1.TXT matches. This is consistent with the behavior of all other Windows commands.
But a bug with the /D and /S options causes the following commands to only find b1.txt
findstr /m /d:. "^" *.txt
findstr /m /s "^" *.txt
The bug prevents b.txt2 from being found, as well as all file names that sort after b.txt2 within the same directory. Additional files that sort before, like a.txt, are found. Additional files that sort later, like d.txt, are missed once the bug has been triggered.
Each directory searched is treated independently. For example, the /S option would successfully begin searching in a child folder after failing to find files in the parent, but once the bug causes a short file name to be missed in the child, then all subsequent files in that child folder would also be missed.
The commands work bug free if the same file names are created on a machine that has NTFS 8.3 name generation disabled. Of course b.txt2 would not be found, but c.txt would be found properly.
Not all short names trigger the bug. All instances of bugged behavior I have seen involve an extension that is longer than 3 characters with a short 8.3 name that begins the same as a normal name that does not require an 8.3 name.
The bug has been confirmed on XP, Vista, and Windows 7.
Non-Printable characters and the /P option
The /P option causes FINDSTR to skip any file that contains any of the following decimal byte codes:
0-7, 14-25, 27-31.
Put another way, the /P option will only skip files that contain non-printable control characters. Control characters are codes less than or equal to 31 (0x1F). FINDSTR treats the following control characters as printable:
 8  0x08  backspace
 9  0x09  horizontal tab
10  0x0A  line feed
11  0x0B  vertical tab
12  0x0C  form feed
13  0x0D  carriage return
26  0x1A  substitute (end of text)
All other control characters are treated as non-printable, the presence of which causes the /P option to skip the file.
Piped and Redirected input may have <CR><LF> appended
If the input is piped in and the last character of the stream is not <LF>, then FINDSTR will automatically append <CR><LF> to the input. This has been confirmed on XP, Vista and Windows 7. (I used to think that the Windows pipe was responsible for modifying the input, but I have since discovered that FINDSTR is actually doing the modification.)
The same is true for redirected input on Vista. If the last character of a file used as redirected input is not <LF>, then FINDSTR will automatically append <CR><LF> to the input. However, XP and Windows 7 do not alter redirected input.
FINDSTR hangs on XP and Windows 7 if redirected input does not end with <LF>
This is a nasty "feature" on XP and Windows 7. If the last character of a file used as redirected input does not end with <LF>, then FINDSTR will hang indefinitely once it reaches the end of the redirected file.
Last line of Piped data may be ignored if it consists of a single character
If the input is piped in and the last line consists of a single character that is not followed by <LF>, then FINDSTR completely ignores the last line.
Example - The first command with a single character and no <LF> fails to match, but the second command with 2 characters works fine, as does the third command that has one character with terminating newline.
> set /p "=x" <nul | findstr "^"

> set /p "=xx" <nul | findstr "^"
xx

> echo x| findstr "^"
x
Reported by DosTips user Sponge Belly at new findstr bug. Confirmed on XP, Windows 7 and Windows 8. Haven't heard about Vista yet. (I no longer have Vista to test).
Option syntax
Options can be prefixed with either / or - Options may be concatenated after a single / or -. However, the concatenated option list may contain at most one multicharacter option such as OFF or F:, and the multi-character option must be the last option in the list.
The following are all equivalent ways of expressing a case insensitive regex search for any line that contains both "hello" and "goodbye" in any order
  • /i /r /c:"hello.*goodbye" /c:"goodbye.*hello"
  • -i -r -c:"hello.*goodbye" /c:"goodbye.*hello"
  • /irc:"hello.*goodbye" /c:"goodbye.*hello"
Search String length limits
On Vista the maximum allowed length for a single search string is 511 bytes. If any search string exceeds 511 then the result is a FINDSTR: Search string too long. error with ERRORLEVEL 2.
When doing a regular expression search, the maximum search string length is 254. A regular expression with length between 255 and 511 will result in a FINDSTR: Out of memory error with ERRORLEVEL 2. A regular expression length >511 results in the FINDSTR: Search string too long. error.
On Windows XP the search string length is apparently shorter. Findstr error: "Search string too long": How to extract and match substring in "for" loop? The XP limit is 127 bytes for both literal and regex searches.
Line Length limits
Files specified as a command line argument or via the /F:FILE option have no known line length limit. Searches were successfully run against a 128MB file that did not contain a single <LF>.
Piped data and Redirected input is limited to 8191 bytes per line. This limit is a "feature" of FINDSTR. It is not inherent to pipes or redirection. FINDSTR using redirected stdin or piped input will never match any line that is >=8k bytes. Lines >= 8k generate an error message to stderr, but ERRORLEVEL is still 0 if the search string is found in at least one line of at least one file.
Default type of search: Literal vs Regular Expression
/C:"string" - The default is /L literal. Explicitly combining the /L option with /C:"string" certainly works but is redundant.
"string argument" - The default depends on the content of the very first search string. (Remember that <space> is used to delimit search strings.) If the first search string is a valid regular expression that contains at least one un-escaped meta-character, then all search strings are treated as regular expressions. Otherwise all search strings are treated as literals. For example, "51.4 200" will be treated as two regular expressions because the first string contains an un-escaped dot, whereas "200 51.4" will be treated as two literals because the first string does not contain any meta-characters.
/G:file - The default depends on the content of the first non-empty line in the file. If the first search string is a valid regular expression that contains at least one un-escaped meta-character, then all search strings are treated as regular expressions. Otherwise all search strings are treated as literals.
Recommendation - Always explicitly specify /L literal option or /R regular expression option when using "string argument" or /G:file.
BUG - Specifying multiple literal search strings can give unreliable results
The following simple FINDSTR example fails to find a match, even though it should.
echo ffffaaa|findstr /l "ffffaaa faffaffddd"
This bug has been confirmed on Windows Server 2003, Windows XP, Vista, and Windows 7.
Based on experiments, FINDSTR may fail if all of the following conditions are met:
  • The search is using multiple literal search strings
  • The search strings are of different lengths
  • A short search string has some amount of overlap with a longer search string
  • The search is case sensitive (no /I option)
In every failure I have seen, it is always one of the shorter search strings that fails.
For more info see Why doesn't this FINDSTR example with multiple literal search strings find a match?
Quotes and backslahses within command line arguments - Note:
The information within this highlighted section is not 100% accurate. After I wrote this section, user MC ND pointed me to a reference that documents how the Microsoft C/C++ library parses parameters. It is horrifically complicated, but it appears to accurately predict the backslash and quote rules for FINDSTR command line arguments. I recommend you use the highlighted information below as a guide, but if you want more accurate info, refer to the link.

Escaping Quote within command line search strings
Quotes within command line search strings must be escaped with backslash like \". This is true for both literal and regex search strings. This information has been confirmed on XP, Vista, and Windows 7.
Note: The quote may also need to be escaped for the CMD.EXE parser, but this has nothing to do with FINDSTR. For example, to search for a single quote you could use:
FINDSTR \^" file && echo found || echo not found
Escaping Backslash within command line literal search strings
Backslash in a literal search string can normally be represented as \ or as \\. They are typically equivalent. (There may be unusual cases in Vista where the backslash must always be escaped, but I no longer have a Vista machine to test).
But there are some special cases:
When searching for consecutive backslashes, all but the last must be escaped. The last backslash may optionally be escaped.
  • \\ can be coded as \\\ or \\\\
  • \\\ can be coded as \\\\\ or \\\\\\
Searching for one or more backslashes before a quote is bizarre. Logic would suggest that the quote must be escaped, and each of the leading backslashes would need to be escaped, but this does not work! Instead, each of the leading backslashes must be double escaped, and the quote is escaped normally:
  • \" must be coded as \\\\\"
  • \\" must be coded as \\\\\\\\\"
As previously noted, one or more escaped quotes may also require escaping with ^ for the CMD parser
The info in this section has been confirmed on XP and Windows 7.
Escaping Backslash within command line regex search strings
  • Vista only: Backslash in a regex must be either double escaped like \\\\, or else single escaped within a character class set like [\\]
  • XP and Windows 7: Backslash in a regex can always be represented as [\\]. It can normally be represented as \\. But this never works if the backslash precedes an escaped quote.
    One or more backslashes before an escaped quote must either be double escaped, or else coded as [\\]
    • \" may be coded as \\\\\" or [\\]\"
    • \\" may be coded as \\\\\\\\\" or [\\][\\]\" or \\[\\]\"
Escaping Quote and Backslash within /G:FILE literal search strings
Standalone quotes and backslashes within a literal search string file specified by /G:file need not be escaped, but they can be.
" and \" are equivalent.
\ and \\ are equivalent.
If the intent is to find \\, then at least the leading backslash must be escaped. Both \\\ and \\\\ work.
If the intent is to find \", then at least the leading backslash must be escaped. Both \\" and \\\" work.
Escaping Quote and Backslash within /G:FILE regex search strings
This is the one case where the escape sequences work as expected based on the documentation. Quote is not a regex metacharacter, so it need not be escaped (but can be). Backslash is a regex metacharacter, so it must be escaped.
Character limits for command line parameters - Extended ASCII transformation
The null character (0x00) cannot appear in any string on the command line. Any other single byte character can appear in the string (0x01 - 0xFF). However, FINDSTR converts many extended ASCII characters it finds within command line parameters into other characters. This has a major impact in two ways:
1) Many extended ASCII characters will not match themselves if used as a search string on the command line. This limitation is the same for literal and regex searches. If a search string must contain extended ASCII, then the /G:FILE option should be used instead.
2) FINDSTR may fail to find a file if the name contains extended ASCII characters and the file name is specified on the command line. If a file to be searched contains extended ASCII in the name, then the /F:FILE option should be used instead.
Here is a complete list of extended ASCII character transformations that FINDSTR performs on command line strings. Each character is represented as the decimal byte code value. The first code represents the character as supplied on the command line, and the second code represents the character it is transformed into. Note - this list was compiled on a U.S machine. I do not know what impact other languages may have on this list.
158 treated as 080     199 treated as 221     226 treated as 071
169 treated as 170     200 treated as 043     227 treated as 112
176 treated as 221     201 treated as 043     228 treated as 083
177 treated as 221     202 treated as 045     229 treated as 115
178 treated as 221     203 treated as 045     231 treated as 116
179 treated as 221     204 treated as 221     232 treated as 070
180 treated as 221     205 treated as 045     233 treated as 084
181 treated as 221     206 treated as 043     234 treated as 079
182 treated as 221     207 treated as 045     235 treated as 100
183 treated as 043     208 treated as 045     236 treated as 056
184 treated as 043     209 treated as 045     237 treated as 102
185 treated as 221     210 treated as 045     238 treated as 101
186 treated as 221     211 treated as 043     239 treated as 110
187 treated as 043     212 treated as 043     240 treated as 061
188 treated as 043     213 treated as 043     242 treated as 061
189 treated as 043     214 treated as 043     243 treated as 061
190 treated as 043     215 treated as 043     244 treated as 040
191 treated as 043     216 treated as 043     245 treated as 041
192 treated as 043     217 treated as 043     247 treated as 126
193 treated as 045     218 treated as 043     249 treated as 250
194 treated as 045     219 treated as 221     251 treated as 118
195 treated as 043     220 treated as 095     252 treated as 110
196 treated as 045     222 treated as 221     254 treated as 221
197 treated as 043     223 treated as 095
198 treated as 221     224 treated as 097
Any character >0 not in the list above is treated as itself, including <CR> and <LF>. The easiest way to include odd characters like <CR> and <LF> is to get them into an environment variable and use delayed expansion within the command line argument.
Character limits for strings found in files specified by /G:FILE and /F:FILE options
The nul (0x00) character can appear in the file, but it functions like the C string terminator. Any characters after a nul character are treated as a different string as if they were on another line.
The <CR> and <LF> characters are treated as line terminators that terminate a string, and are not included in the string.
All other single byte characters are included perfectly within a string.
Searching Unicode files
FINDSTR cannot properly search most Unicode (UTF-16, UTF-16LE, UTF-16BE, UTF-32) because it cannot search for nul bytes and Unicode typically contains many nul bytes.
However, the TYPE command converts UTF-16LE with BOM to a single byte character set, so a command like the following will work with UTF-16LE with BOM.
type unicode.txt|findstr "search"
Note that Unicode code points that are not supported by your active code page will be converted to ? characters.
It is possible to search UTF-8 as long as your search string contains only ASCII. However, the console output of any multi-byte UTF-8 characters will not be correct. But if you redirect the output to a file, then the result will be correctly encoded UTF-8. Note that if the UTF-8 file contains a BOM, then the BOM will be considered as part of the first line, which could throw off a search that matches the beginning of a line.
It is possible to search multi-byte UTF-8 characters if you put your search string in a UTF-8 encoded search file (without BOM), and use the /G option.
End Of Line
FINDSTR breaks lines immediately after every <LF>. The presence or absence of <CR> has no impact on line breaks.
Searching across line breaks
As expected, the . regex metacharacter will not match <CR> or <LF>. But it is possible to search across a line break using a command line search string. Both the <CR> and <LF> characters must be matched explicitly. If a multi-line match is found, only the 1st line of the match is printed. FINDSTR then doubles back to the 2nd line in the source and begins the search all over again - sort of a "look ahead" type feature.
Assume TEXT.TXT has these contents (could be Unix or Windows style)
A
A
A
B
A
A
Then this script
@echo off
setlocal
::Define LF variable containing a linefeed (0x0A)
set LF=^


::Above 2 blank lines are critical - do not remove

::Define CR variable containing a carriage return (0x0D)
for /f %%a in ('copy /Z "%~dpf0" nul') do set "CR=%%a"

setlocal enableDelayedExpansion
::regex "!CR!*!LF!" will match both Unix and Windows style End-Of-Line
findstr /n /r /c:"A!CR!*!LF!A" TEST.TXT
gives these results
1:A
2:A
5:A
Searching across line breaks using the /G:FILE option is imprecise because the only way to match <CR> or <LF> is via a regex character class range expression that sandwiches the EOL characters.
  • [<TAB>-<0x0B>] matches <LF>, but it also matches <TAB> and <0x0B>
  • [<0x0C>-!] matches <CR>, but it also matches <0x0C> and !
    Note - the above are symbolic representations of the regex byte stream since I can't graphically represent the characters.

    Limited Regular Expressions (regex) Support
    FINDSTR support for regular expressions is extremely limited. If it is not in the HELP documentation, it is not supported.
    Beyond that, the regex expressions that are supported are implemented in a completely non-standard manner, such that results can be different then would be expected coming from something like grep or perl.
    Regex Line Position anchors ^ and $
    ^ matches beginning of input stream as well as any position immediately following a <LF>. Since FINDSTR also breaks lines after <LF>, a simple regex of "^" will always match all lines within a file, even a binary file.
    $ matches any position immediately preceding a <CR>. This means that a regex search string containing $ will never match any lines within a Unix style text file, nor will it match the last line of a Windows text file if it is missing the EOL marker of <CR><LF>.
    Note - As previously discussed, piped and redirected input to FINDSTR may have <CR><LF> appended that is not in the source. Obviously this can impact a regex search that uses $.
    Any search string with characters before ^ or after $ will always fail to find a match.
    Positional Options /B /E /X
    The positional options work the same as ^ and $, except they also work for literal search strings.
    /B functions the same as ^ at the start of a regex search string.
    /E functions the same as $ at the end of a regex search string.
    /X functions the same as having both ^ at the beginning and $ at the end of a regex search string.
    Regex word boundary
    \< must be the very first term in the regex. The regex will not match anything if any other characters precede it. \< corresponds to either the very beginning of the input, the beginning of a line (the position immediately following a <LF>), or the position immediately following any "non-word" character. The next character need not be a "word" character.
    \> must be the very last term in the regex. The regex will not match anything if any other characters follow it. \> corresponds to either the end of input, the position immediately prior to a <CR>, or the position immediately preceding any "non-word" character. The preceding character need not be a "word" character.
    Here is a complete list of "non-word" characters, represented as the decimal byte code. Note - this list was compiled on a U.S machine. I do not know what impact other languages may have on this list.
    001   028   063   179   204   230
    002   029   064   180   205   231
    003   030   091   181   206   232
    004   031   092   182   207   233
    005   032   093   183   208   234
    006   033   094   184   209   235
    007   034   096   185   210   236
    008   035   123   186   211   237
    009   036   124   187   212   238
    011   037   125   188   213   239
    012   038   126   189   214   240
    014   039   127   190   215   241
    015   040   155   191   216   242
    016   041   156   192   217   243
    017   042   157   193   218   244
    018   043   158   194   219   245
    019   044   168   195   220   246
    020   045   169   196   221   247
    021   046   170   197   222   248
    022   047   173   198   223   249
    023   058   174   199   224   250
    024   059   175   200   226   251
    025   060   176   201   227   254
    026   061   177   202   228   255
    027   062   178   203   229
    
    Regex character class ranges [x-y]
    Character class ranges do not work as expected. See this question: Why does findstr not handle case properly (in some circumstances)?, along with this answer: https://stackoverflow.com/a/8767815/1012053.
    The problem is FINDSTR does not collate the characters by their byte code value (commonly thought of as the ASCII code, but ASCII is only defined from 0x00 - 0x7F). Most regex implementations would treat [A-Z] as all upper case English capital letters. But FINDSTR uses a collation sequence that roughly corresponds to how SORT works. So [A-Z] includes the complete English alphabet, both upper and lower case (except for "a"), as well as non-English alpha characters with diacriticals.
  • Regex character class term limit and BUG
    Not only is FINDSTR limited to a maximum of 15 character class terms within a regex, it fails to properly handle an attempt to exceed the limit. Using 16 or more character class terms results in an interactive Windows pop up stating "Find String (QGREP) Utility has encountered a problem and needs to close. We are sorry for the inconvenience." The message text varies slightly depending on the Windows version. Here is one example of a FINDSTR that will fail:
    echo 01234567890123456|findstr [0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9][0-9]
    
    This bug was reported by DosTips user Judago here. It has been confirmed on XP, Vista, and Windows 7.
    Regex searches fail (and may hang indefinitely) if they include byte code 0xFF (decimal 255)
    Any regex search that includes byte code 0xFF (decimal 255) will fail. It fails if byte code 0xFF is included directly, or if it is implicitly included within a character class range. Remember that FINDSTR character class ranges do not collate characters based on the byte code value. Character <0xFF> appears relatively early in the collation sequence between the <space> and <tab> characters. So any character class range that includes both <space> and <tab> will fail.
    The exact behavior changes slightly depending on the Windows version. Windows 7 hangs indefinitely if 0xFF is included. XP doesn't hang, but it always fails to find a match, and occasionally prints the following error message - "The process tried to write to a nonexistent pipe."
    I no longer have access to a Vista machine, so I haven't been able to test on Vista.
    Regex bug: . and [^anySet] can match End-Of-File
    The regex . meta-character should only match any character other than <CR> or <LF>. There is a bug that allows it to match the End-Of-File if the last line in the file is not terminated by <CR> or <LF>. However, the . will not match an empty file.
    For example, a file named "test.txt" containing a single line of x, without terminating <CR> or <LF>, will match the following:
    findstr /r x......... test.txt
    
    This bug has been confirmed on XP and Win7.
    The same seems to be true for negative character sets. Something like [^abc] will match End-Of-File. Positive character sets like [abc] seem to work fine. I have only tested this on Win7.

Another grep.py

Grep.py - Source: https://github.com/rohitkrai03

import sys
import re
import os
import utils

def main():
files = utils.troll_directories(os.path.normpath(sys.argv[1]))
patterns = utils.convert_patterns(sys.argv[2:])
utils.apply_patterns(files, patterns)


if __name__ == '__main__' : main()

Utils.py:

import re
import os

def convert_patterns(patterns):
results = []

# for each pattern
for pattern in patterns:
# make a regular expression with it
expr = re.compile(pattern)
results.append(expr)
# return the results
return results

def troll_directories(start):
# troll for all the directories like in find
results = []
# Traverse the directory for all the files.
for root, dirs, files in os.walk(start):
for fname in files:
# put the full path into the results
results.append(os.path.join(root, fname))
return results

def apply_patterns(files, patterns):
# for each file in files
for fname in files:
# open the file and read the lines
lines = open(fname).readlines()
for num, line in enumerate(lines):
# for each pattern
for pattern in patterns:
# if pattern found in contents
if pattern.search(lines):
# print file, line number, line
print("{}:{}: {}".format(os.path.join(fname), num+1, line))

Python Find

It is a python based small utility which finds for given regular expression in all the filenames for the given directory and returns them with full path

How to Use

Just download the package or clone the repo from github.
Run the find.py file with source directory and regular expression to search for given as command line argument.

Example : - py find.py '.' '.*.py'


Find,py
#!/usr/bin/env python3
import sys
import re
import os
# Get the start directory.
start = os.path.normpath(sys.argv[1])
# Get the patterns from the command line arguments.
pattern = sys.argv[2]
# Convert them to regular expressions.
expr = re.compile(pattern)
# Traverse the directory for all the files.
for root, dirs, files in os.walk(start):
for fname in files:
# If a file matches the pattern then print its name.
if expr.search(fname):
print(os.path.join(root, fname))



Another interesting project:

https://pypi.org/project/grin

Python 101: Redirecting stdout

Source: https://www.blog.pythonlibrary.org
Redirecting stdout to something most developers will need to do at some point or other. It can be useful to redirect stdout to a file or to a file-like object. I have also redirected stdout to a text control in some of my desktop GUI projects. In this article we will look at the following:
  • Redirecting stdout to a file (simple)
  • The Shell redirection method
  • Redirecting stdout using a custom context manager
  • Python 3’s contextlib.redirect_stdout()
  • Redirect stdout to a wxPython text control


Redirecting stdout

The easiest way to redirect stdout in Python is to just assign it an open file object. Let’s take a look at a simple example:
import sys
 
def redirect_to_file(text):
    original = sys.stdout
    sys.stdout = open('/path/to/redirect.txt', 'w')
    print('This is your redirected text:')
    print(text)
    sys.stdout = original
 
    print('This string goes to stdout, NOT the file!')
 
if __name__ == '__main__':Redirecting stdout / stderr
    redirect_to_file('Python rocks!')
Here we just import Python’s sys module and create a function that we can pass strings that we want to have redirected to a file. We save off a reference to sys.stdout so we can restore it at the end of the function. This can be useful if you intend to use stdout for other things. Before you run this code, be sure to update the path to something that will work on your system. When you run it, you should see the following in your file:

This is your redirected text:
Python rocks!

That last print statement will go to stdout, not the file.

Shell Redirection

Shell redirection is also pretty common, especially in Linux, although Windows also works the same way in most cases. Let’s create a silly example of a noisy function that we will call noisy.py:
# noisy.py
def noisy(text):
    print('The noisy function prints a lot')
    print('Here is the string you passed in:')
    print('*' * 40)
    print(text)
    print('*' * 40)
    print('Thank you for calling me!')
 
if __name__ == '__main__':
    noisy('This is a test of Python!')
You will notice that we didn’t import the sys module this time around. The reason is that we don’t need it since we will be using shell redirection. To do shell redirection, open a terminal (or command prompt) and navigate to the folder where you saved the code above. Then execute the following command:

python noisy.py > redirected.txt

The greater than character (i.e. >) tells your operating system to redirect stdout to the filename you specified. At this point you should have a file named “redirected.txt” in the same folder as your Python script. If you open it up, the file should have the following contents:

The noisy function prints a lot
Here is the string you passed in:
****************************************
This is a test of Python!
****************************************
Thank you for calling me!

Now wasn’t that pretty cool?

Redirect stdout with a context manager

Another fun way to redirect stdout is by using a context manager. Let’s create a custom context manager that accepts a file object to redirect stdout to:
import sys
from contextlib import contextmanager
 
 
@contextmanager
def custom_redirection(fileobj):
    old = sys.stdout
    sys.stdout = fileobj
    try:
        yield fileobj
    finally:
        sys.stdout = old
 
if __name__ == '__main__':
    with open('/path/to/custom_redir.txt', 'w') as out:
        with custom_redirection(out):
            print('This text is redirected to file')
            print('So is this string')
        print('This text is printed to stdout')
When you run this code, it will write out two lines of text to your file and one to stdout. As usual, we reset stdout at the end of the function.

Using contextlib.redirect_stdout

Python 3.4 added the redirect_stdout function to their contextlib module. Let’s try using that to create a context manager to redirect stdout:
import sys
from contextlib import redirect_stdout
 
def redirected(text, path):
    with open(path, 'w') as out:
        with redirect_stdout(out):
            print('Here is the string you passed in:')
            print('*' * 40)
            print(text)
            print('*' * 40)
 
if __name__ == '__main__':
    path = '/path/to/red.txt'
    text = 'My test to redirect'
    redirected(text, path)
This code is a little simpler because the built-in function does all the yielding and resetting of stdout automatically for you. Otherwise, it works in pretty much the same way as our custom context manager.

Redirecting stdout in wxPython

wxredirect

import sys
import wx
 
class MyForm(wx.Frame):
 
    def __init__(self):
        wx.Frame.__init__(self, None,
                          title="wxPython Redirect Tutorial")
 
        # Add a panel so it looks the correct on all platforms
        panel = wx.Panel(self, wx.ID_ANY)
        style = wx.TE_MULTILINE|wx.TE_READONLY|wx.HSCROLL
        log = wx.TextCtrl(panel, wx.ID_ANY, size=(300,100),
                          style=style)
        btn = wx.Button(panel, wx.ID_ANY, 'Push me!')
        self.Bind(wx.EVT_BUTTON, self.onButton, btn)
 
        # Add widgets to a sizer
        sizer = wx.BoxSizer(wx.VERTICAL)
        sizer.Add(log, 1, wx.ALL|wx.EXPAND, 5)
        sizer.Add(btn, 0, wx.ALL|wx.CENTER, 5)
        panel.SetSizer(sizer)
 
        # redirect text here
        sys.stdout = log
 
    def onButton(self, event):
        print "You pressed the button!"
 
# Run the program
if __name__ == "__main__":
    app = wx.App(False)
    frame = MyForm().Show()
    app.MainLoop()

This code just creates a simple frame with a panel that contains a multi-line text control and a button. Whenever you press the button, it will print out some text to stdout, which we have redirected to the text control.

Personally I thought it was cool that Python 3 now has a context manager built-in just for this purpose. Speaking of which, Python 3 also has a function for redirecting stderr. All of these examples can be modified slightly to support redirecting stderr or both stdout and stderr. The very last thing we touched on was redirecting stdout to a text control in wxPython. This can be really useful for debugging or for grabbing the output from a subprocess, although in the latter case you will need to print out the output to have it redirected correctly.

Related Reading