| #! /usr/bin/env python |
| # vim: set fileencoding=utf-8 |
| # (c) Uwe Kleine-König <ukleine@strlen.de> |
| # GPLv2 |
| |
| import locale |
| import sys |
| |
| f = file(sys.argv[1]) |
| data = f.read() |
| |
| def len_utf8_char(data): |
| def check_cont(num): |
| if all(map(lambda c: ord(c) >= 0x80 and ord(c) <= 0xbf, data[1:num])): |
| return num |
| else: |
| return -1 |
| |
| if ord(data[0]) < 128: |
| # ASCII char |
| return 1 |
| elif ord(data[0]) & 0xe0 == 0xc0: |
| return check_cont(2) |
| elif ord(data[0]) & 0xf0 == 0xe0: |
| return check_cont(3) |
| elif ord(data[0]) & 0xf8 == 0xf0: |
| return check_cont(4) |
| elif ord(data[0]) & 0xfc == 0xf8: |
| return check_cont(5) |
| elif ord(data[0]) & 0xfe == 0xfc: |
| return check_cont(6) |
| |
| i = 0 |
| maxl = 0 |
| while i < len(data): |
| l = len_utf8_char(data[i:]) |
| if l < 0: |
| prefenc = locale.getpreferredencoding() |
| if prefenc not in ('UTF-8', 'ANSI_X3.4-1968'): |
| print prefenc |
| else: |
| print 'ISO-8859-1' |
| sys.exit(0) |
| |
| if maxl < l: |
| maxl = l |
| i += l |
| |
| if maxl > 1: |
| print 'UTF-8' |
| else: |
| print 'ANSI_X3.4-1968' |