The following class can identify encoding in a file, check a file for consistent encoding, and convert encoding in a file.
class EncodingTool { //@Return: Returns the name of the encoding of the file at filePath public static string GetFileEncoding(string filePath) { FileStream file = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read); string encoding = null; try { if (file.CanSeek) //if file is readable { byte[] bom = new byte[4]; //getting Byte-order mark file.Read(bom, 0, 4); if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) //utf-8 encoding = "UTF-8"; else if (bom[0] == 0xff && bom[1] == 0xfe) // ucs-2le, ucs-4le, and ucs-16le encoding = "UCS-21e, UCS-41e, and UCS-161e"; else if (bom[0] == 0xfe && bom[1] == 0xff) // utf-16 and ucs-2 encoding = "UTF-16 and UCS-2"; else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) //ucs-4 encoding = "UCS-4"; else //DEFAULT: ASCII encoding = "ASCII"; } } catch (Exception e) { Console.Error.WriteLine("ERROR: " + e.Message); } finally { if (file != null) file.Close(); } return encoding; } //@Return: Returns true if encoding is consistant throughout the file, else print all encodings public static bool ConsistantEncoding(string filePath) { FileStream file = new FileStream(filePath, FileMode.Open, FileAccess.Read, FileShare.Read); string encoding = null; bool consistant = true; try { if (file.CanSeek) //if file is readable { byte[] bom = new byte[4]; //getting Byte-order mark while (file.Read(bom, 0, 4) != 0) { if (encoding == null) { if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) //utf-8 encoding = "UTF-8"; else if (bom[0] == 0xff && bom[1] == 0xfe) // ucs-2le, ucs-4le, and ucs-16le encoding = "UCS-21e, UCS-41e, and UCS-161e"; else if (bom[0] == 0xfe && bom[1] == 0xff) // utf-16 and ucs-2 encoding = "UTF-16 and UCS-2"; else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) //ucs-4 encoding = "UCS-4"; else //DEFAULT: ASCII encoding = "ASCII"; } else { if (bom[0] == 0xef && bom[1] == 0xbb && bom[2] == 0xbf) //utf-8 { if (encoding != "UTF-8") consistant = false; } else if (bom[0] == 0xff && bom[1] == 0xfe) // ucs-2le, ucs-4le, and ucs-16le if (encoding != "UCS-21e, UCS-41e, and UCS-161e") consistant = false; else if (bom[0] == 0xfe && bom[1] == 0xff) // utf-16 and ucs-2 if (encoding != "UTF-16 and UCS-2") consistant = false; else if (bom[0] == 0 && bom[1] == 0 && bom[2] == 0xfe && bom[3] == 0xff) //ucs-4 if (encoding != "UCS-4") consistant = false; else //DEFAULT: ASCII if (encoding != "ASCII") consistant = false; } } } } catch (Exception e) { Console.Error.WriteLine("ERROR: " + e.Message); } finally { if (file != null) file.Close(); } return consistant; } //@Done: Converts a file to specified unicode and names it: Name_Encoding_.txt public static void ConvertEncoding(string filePath, Encoding enc) { StreamReader sr = new StreamReader(filePath); StreamWriter sw = new StreamWriter(filePath.Substring(0, filePath.Length - 4) + "_" + enc.EncodingName + ".txt", true, enc); string line; using (sw) { while ((line = sr.ReadLine()) != null) { sw.WriteLine(line); } } } }
Download the class here.
0 comments:
Post a Comment