Note: The first line of the file is ignored because the format I was working with in this case contains a header line.
Note 2: StreamWriter is writing in UTF-8 Encoding because the format of file I was working with is best interpreted in UTF-8 as opposed to ASCII.
I apologize for the lack of format in the following code. For some reason when I copied it over it lost all formatting.
/*
* Creator: Calvin Hawkes 7-15-10
*/
public void FiveKCharSplit(FileInfo file)
{
StreamReader sr = new StreamReader(file.FullName);
DirectoryInfo trgDir = new DirectoryInfo(file.Directory.FullName + "\\5KSplit\\");
if (!trgDir.Exists)
trgDir.Create();
StreamWriter sw = new StreamWriter(trgDir.FullName + file.Name, true, Encoding.UTF8); //File with <5k char lines
string header = sr.ReadLine(); //header
sw.WriteLine(header);
string line = null;
List over5k = new List(); //lines over5k
while ((line = sr.ReadLine()) != null)
{
if (line.Length >= 5000) //change 5000 to character number you wish to split by
{
over5k.Add(line);
}
else //if good line
{
sw.WriteLine(line);
}
}
sr.Close();
sw.Flush();
//Writing lines over 5k into different file
if (over5k.Count > 0)
{
Console.WriteLine("{0} Contains Line(s) over 5,000 Char.\n Splitting now...", file.Name);
sw = new StreamWriter(trgDir.FullName + file.Name.Substring(0, file.Name.Length - 4) +
"_Over5K.txt", true, Encoding.UTF8);
sw.WriteLine(header);
foreach (string s in over5k)
{
sw.WriteLine(s);
}
sw.Flush();
}
sw.Close();
}

0 comments:
Post a Comment