问题引出:从亿万级数据中存储查找某个数据是否存在?
什么是Bitmap算法?百度给了一个简单易懂的讲解:http://baijiahao.baidu.com/s?id=1575038901090600&wfr=spider&for=pc
我们在判断一个数据是否存在,基本思路是读出来然后遍历一遍,判断Boolean。
判断是否存在
import java.io.BufferedReader; import java.io.FileReader; import java.io.IOException; public class IsNumberExist { private int[] bitmap; private int size; private int SHIFT = 5;// 2的5次方是32 public boolean isNumberExist(int number) { int bit = number >> SHIFT; int index = number & ((1 << SHIFT) - 1); return ((1 << index) & bitmap[bit]) != 0; } public IsNumberExist(int size) { this.size = size; bitmap = new int[(size >> SHIFT) + 1]; } public void insertDate(int number) { int bit = number >> SHIFT; int index = number & ((1 << SHIFT) - 1); bitmap[bit] = bitmap[bit] | (1 << index); } public void insertFromTxt(String filename) { try { BufferedReader br = new BufferedReader(new FileReader(filename)); String str = null; while ((str = br.readLine()) != null) { insertDate(Integer.valueOf(str)); } br.close(); } catch (IOException e) { e.printStackTrace(); } } public static void main(String[] args) { Runtime rt = Runtime.getRuntime(); System.out.println("当前JVM所占内存:" + (rt.totalMemory() - rt.freeMemory()) / 1024 / 1024 + "M"); IsNumberExist tool = new IsNumberExist(1000000000); System.out.println("当前JVM所占内存:" + (rt.totalMemory() - rt.freeMemory()) / 1024 / 1024 + "M"); // Date.makeNumbers(100000000);//生成一亿个数到number.txt tool.insertFromTxt("numbers.txt");// 使用这个一亿个数初始化bitmap的状态 System.out.println(tool.isNumberExist(88888888));// 判断88888888是否在这个文件中 System.out.println(tool.isNumberExist(99999999));// 判断99999999是否在这个文件中 System.out.println(tool.isNumberExist(91725151));// 判断91725151是否在这个文件中 } }
生成数据class
import java.io.BufferedWriter; import java.io.FileWriter; import java.io.IOException; import java.util.Random; public class Date { public static boolean makeNumbers(int size) { boolean flag = true; Random random = new Random(); try { BufferedWriter bw = new BufferedWriter( new FileWriter("numbers.txt")); for (int i = 0; i < size; i++) { bw.write(String.valueOf(Math.abs(random.nextInt(size)))); bw.newLine(); } bw.close(); } catch (IOException e) { flag = false; e.printStackTrace(); } return flag; } public static void main(String[] args) { System.out.println(makeNumbers(100000000)); } }
利用位映射原理对大数据排重 --- Java代码实现:https://blog.csdn.net/a3192048/article/details/80261699