You guys got me wrong. I'm talking about compressing random data. If you divide a file into 4-byte data blocks, 1 out of 10 of each 4-byte number will be prime. You should also use a bitmap to determine whether each number is prime. Using a bitmap will clearly increase the size of the file, but I think it can be compressed more than 32/80. Because it has too many 0 bits.
You might say, "This is a very funny compression algorithm." Because it is more compressed size is using other compression algorithms. But isn't it important to have the ability to repeat the compression process? Please ignore this funny situation.
Step by step. First of all, let me share the Prime Helper codes we need.
How can the findPrimes () and saveToFile () methods be more efficient in these codes? For now, the prime numbers up to 2 ^ 32 are found on my Core i7 in about 23 minutes and are saved in 512MB. You can then load the prime numbers from the file for direct use without recalculating.
using System;
using System.Collections;
using System.Collections.Generic;
using System.IO;
using System.Linq;
namespace PrimeNumbers {
public static class PrimeHelper {
public static IEnumerable<long> FindPrimes (long limit) {
return (new Primes (limit));
}
public static IEnumerable<long> FindPrimes (long start, long end) {
return FindPrimes (end).Where (pn => pn >= start);
}
public static bool IsPrime (this long number) {
if (number < 2)
return false;
else if (number < 4)
return true;
var limit = (Int32) System.Math.Sqrt (number) + 1;
var foundPrimes = new Primes (limit);
return foundPrimes.IsPrime (number);
}
public static bool IsPrime (this int number) {
return IsPrime (Convert.ToInt64 (number));
}
public static bool IsPrime (this short number) {
return IsPrime (Convert.ToInt64 (number));
}
public static bool IsPrime (this byte number) {
return IsPrime (Convert.ToInt64 (number));
}
public static bool IsPrime (this uint number) {
return IsPrime (Convert.ToInt64 (number));
}
public static bool IsPrime (this ushort number) {
return IsPrime (Convert.ToInt64 (number));
}
public static bool IsPrime (this sbyte number) {
return IsPrime (Convert.ToInt64 (number));
}
}
public class Primes : IEnumerable<long>, IDisposable {
private long limit;
private MyBitArray numbers;
public Primes (long limit) {
startTime = DateTime.Now;
calculateTime = startTime - startTime;
this.limit = limit + 1;
try { findPrimes (); } catch (Exception e) { Console.WriteLine (e.Message); /*Overflows or Out of Memory*/ }
calculateTime = DateTime.Now - startTime;
}
public Primes (MyBitArray bitArray) {
this.numbers = bitArray;
this.limit = bitArray.Limit;
}
private void findPrimes () {
/*
The Sieve Algorithm
http://en.wikipedia.org/wiki/Sieve_of_Eratosthenes
*/
numbers = new MyBitArray (limit, true);
for (long i = 2; i < limit; i++)
if (numbers[i])
for (long j = i * 2; j < limit; j += i)
numbers[j] = false;
}
public IEnumerator<long> GetEnumerator () {
for (long i = 2; i < limit; i++)
if (numbers[i])
yield return i;
}
IEnumerator IEnumerable.GetEnumerator () {
return GetEnumerator ();
}
// Extended big long number for quickly calculation
public bool IsDivisible (long number) {
var sqrt = System.Math.Sqrt (number);
foreach (var prime in this) {
if (number % prime == 0) {
DivisibleBy = prime;
return true;
}
if (prime > sqrt)
return false;
}
return false;
}
// For big long number
public bool IsPrime (long number) {
return number < limit ?
this.SingleOrDefault (n => n == number) != 0 :
!IsDivisible (number);
}
public void Dispose () {
numbers.Dispose ();
}
public void SaveToFile (string fileName, SaveOptions saveOptions) {
int elementSize = 8;
if ((this.limit - 1L) <= Convert.ToInt64 (byte.MaxValue))
elementSize = 1;
else if ((this.limit - 1L) <= Convert.ToInt64 (ushort.MaxValue))
elementSize = 2;
else if ((this.limit - 1L) <= Convert.ToInt64 (uint.MaxValue))
elementSize = 4;
switch (saveOptions) {
case SaveOptions.TextFile:
saveToTextFile (fileName);
break;
case SaveOptions.BinaryAllNumbersBitArray:
saveToFile (fileName, this.Numbers.Bytes);
break;
case SaveOptions.BinaryPrimeList:
saveToFile (fileName, this, elementSize);
break;
case SaveOptions.BinaryAllNumbersAndPrimeIndex:
saveToFileOnlyIndex (fileName, this, elementSize);
break;
case SaveOptions.All:
saveToFile (Path.Combine (Path.GetDirectoryName (fileName), Path.GetFileNameWithoutExtension (fileName) + "_BinaryAllNumbersBitArray.bin"), this.Numbers.Bytes);
saveToFile (Path.Combine (Path.GetDirectoryName (fileName), Path.GetFileNameWithoutExtension (fileName) + "_BinaryPrimeList.bin"), this, elementSize);
saveToFileOnlyIndex (Path.Combine (Path.GetDirectoryName (fileName), Path.GetFileNameWithoutExtension (fileName) + "_BinaryAllNumbersPrimeIndex.bin"), this, elementSize);
saveToTextFile (Path.Combine (Path.GetDirectoryName (fileName), Path.GetFileNameWithoutExtension (fileName) + "_TextFile.txt"));
break;
}
}
protected void saveToFile (string fileName, byte[] data) {
Console.WriteLine ("Saving numbers bits if is prime bit 1 else bit 0...");
using (MemoryStream mstream = new MemoryStream (data)) {
ProgressCallback callback = (long position, long total) => { };
copy (mstream, fileName, callback);
}
}
protected void saveToFile (string fileName, Primes primes, int elementSize = 4) {
Console.WriteLine ("Saving primes. Element size: {0}...", elementSize);
using (FileStream file = File.OpenWrite (fileName)) {
long writed = 0;
foreach (var prime in primes) {
byte[] data = new byte[elementSize];
if (elementSize == 8) {
data = BitConverter.GetBytes (Convert.ToUInt64 (prime));
} else if (elementSize == 4) {
data = BitConverter.GetBytes (Convert.ToUInt32 (prime));
} else if (elementSize == 2) {
data = BitConverter.GetBytes (Convert.ToUInt16 (prime));
} else if (elementSize == 1) {
data = BitConverter.GetBytes (Convert.ToByte (prime));
}
file.Write (data, 0, elementSize);
writed++;
if (writed == 536870912L) {
file.Flush (true);
writed = 0;
}
}
file.Close ();
}
}
protected void saveToFileOnlyIndex (string fileName, Primes primes, int elementSize = 4) {
Console.WriteLine ("Saving all numbers prime indexes if not prime index is -1. Element size: {0}...", elementSize);
using (FileStream file = File.OpenWrite (fileName)) {
int index = 0;
long writed = 0;
long length = Convert.ToInt64 (uint.MaxValue) + 1L;
for (long i = 0; i < length; i++) {
byte[] data = new byte[elementSize];
if (elementSize == 8) {
if (primes.Numbers[i]) {
index++;
data = BitConverter.GetBytes (Convert.ToInt64 (index));
} else {
data = BitConverter.GetBytes (Convert.ToInt32 (-1));
}
} else if (elementSize == 4) {
if (primes.Numbers[i]) {
index++;
data = BitConverter.GetBytes (Convert.ToInt32 (index));
} else {
data = BitConverter.GetBytes (Convert.ToInt32 (-1));
}
} else if (elementSize == 2) {
if (primes.Numbers[i]) {
index++;
data = BitConverter.GetBytes (Convert.ToInt16 (index));
} else {
data = BitConverter.GetBytes (Convert.ToInt16 (-1));
}
} else if (elementSize == 1) {
if (primes.Numbers[i]) {
index++;
data = BitConverter.GetBytes (Convert.ToSByte (index));
} else {
data = BitConverter.GetBytes (Convert.ToSByte (-1));
}
}
file.Write (data, 0, elementSize);
writed++;
if (writed == 536870912L) {
file.Flush (true);
writed = 0;
}
}
file.Close ();
}
}
public delegate void ProgressCallback (long position, long total);
protected void copy (Stream inputStream, string outputFile, ProgressCallback progressCallback) {
using (var outputStream = File.OpenWrite (outputFile)) {
int writed = 0;
const int bufferSize = 65536;
while (inputStream.Position < inputStream.Length) {
byte[] data = new byte[bufferSize];
int amountRead = inputStream.Read (data, 0, bufferSize);
outputStream.Write (data, 0, amountRead);
writed++;
if (progressCallback != null)
progressCallback (inputStream.Position, inputStream.Length);
if (writed == 32768) {
outputStream.Flush (true);
writed = 0;
}
}
outputStream.Flush (true);
}
}
protected void saveToTextFile (string fileName) {
Console.WriteLine ("Saving primes to text file...");
using (System.IO.StreamWriter file =
new System.IO.StreamWriter (fileName)) {
long writed = 0;
foreach (var prime in this) {
file.WriteLine (prime.ToString ());
if (writed == 536870912L) {
file.Flush ();
writed = 0;
}
}
file.Close ();
}
}
private static DateTime startTime;
private static TimeSpan calculateTime;
public TimeSpan CalculateTime { get { return calculateTime; } }
public long DivisibleBy { get; set; }
public long Length { get { return limit; } }
public MyBitArray Numbers { get { return numbers; } }
}
public class MyBitArray : IDisposable {
byte[] bytes;
public MyBitArray (long limit, bool defaultValue = false) {
long byteCount = Convert.ToInt64 ((limit >> 3) + 1);
this.bytes = new byte[byteCount];
for (long i = 0; i < byteCount; i++) {
bytes[i] = (defaultValue == true ? (byte) 0xFF : (byte) 0x00);
}
this.limit = limit;
}
public MyBitArray (long limit, byte[] bytes) {
this.limit = limit;
this.bytes = bytes;
}
public bool this [long index] {
get {
return getValue (index);
}
set {
setValue (index, value);
}
}
bool getValue (long index) {
if (index < 8) {
return getBit (bytes[0], (byte) index);
}
long byteIndex = (index & 7) == 0 ? ((index >> 3) - 1) : index >> 3;
byte bitIndex = (byte) (index & 7);
return getBit (bytes[byteIndex], bitIndex);
}
void setValue (long index, bool value) {
if (index < 8) {
bytes[0] = setBit (bytes[0], (byte) index, value);
return;
}
long byteIndex = (index & 7) == 0 ? (index >> 3) - 1 : index >> 3;
byte bitIndex = (byte) (index & 7);
bool old = getBit (bytes[byteIndex], bitIndex);
bytes[byteIndex] = setBit (bytes[byteIndex], bitIndex, value);
}
bool getBit (byte byt, byte index) {
return ((byt & (1 << index)) >> index) == 1;
}
byte setBit (byte byt, byte index, bool value) {
return (byte) ((byt & ~(1 << index)) + (value ? 1 << index : 0));
}
public void Dispose () {
GC.Collect (2, GCCollectionMode.Optimized);
}
private long limit;
public long Limit { get { return limit; } }
public byte[] Bytes { get { return this.bytes; } }
}
public enum SaveOptions {
All,
TextFile,
BinaryAllNumbersBitArray,
BinaryPrimeList,
BinaryAllNumbersAndPrimeIndex
}
}