【密码学】一文读懂SipHash
SIPHASH
在redis的源码当中也有给出siphash(https://github.com/redis/redis/blob/unstable/src/siphash.c)的实现,这里直接给出链接,不贴出来具体的代码了,接下来,我们来看一下这个算法具体的结构。
算法结构
从理论上来说SipHash这个算法实际上是有两个参数c和d的,因此完整的SipHash算法的表示方法为SipHash-c-d(k, m) 但是呢,一般情况下c取2然后d取4在上面提到过的redis的实现当中,就是这么干的,完整的SipHash过程如下:
Initialization
这一步,把输入的128bit的密钥,分成两个64bit的子密钥,然后得到如下的初始化的值:
Compression
首先,将输入的消息进行分块,按照小端序将每8个字节转换成为一个u64的数字 ,之后做如下的操作:
然后,经过c轮的SipRound,之后做如下的操作:
Finalization
经过上面压缩过程的处理,我们得到处理之后的四个状态值,v0, v1, v2, v3
,然后通过
之后,经过d轮的SipRound,四个状态值异或得到最后的64bit的哈希输出。
感觉我对于上面这三个词,咋翻译咋别扭,索性就不翻译了,直接用英文来吧,完整的过程如下图:
SipHash过程
整个结构,到这里就介绍完了,整体其实也不算特别复杂,相对于前几篇文章提到过的MurMurHash系列稍微复杂了一点点,但是比起之前的MD5 SHA系列,这个过程相对来说就简单太多了。
代码实现
为了不让我写的go代码一股c味,因此呢,我去借(拷)鉴(贝)了一下Go 官方的密码学库,根据官方的风格来写一下这个代码。然后又去借鉴了一下其他人的代码实现,最终有了我这份代码,这里感谢开源作者们的贡献,这么搞一通之后,总算感觉我这代码有点Go的感觉了。
// blocks.go package siphash import ( "encoding/binary" "math/bits" ) func once(d *digest) { blocks(d, d.x[:]) } func sipRound(v0, v1, v2, v3 uint64) (uint64, uint64, uint64, uint64) { v0 += v1 v1 = bits.RotateLeft64(v1, 13) v1 ^= v0 v0 = bits.RotateLeft64(v0, 32) v2 += v3 v3 = bits.RotateLeft64(v3, 16) v3 ^= v2 v0 += v3 v3 = bits.RotateLeft64(v3, 21) v3 ^= v0 v2 += v1 v1 = bits.RotateLeft64(v1, 17) v1 ^= v2 v2 = bits.RotateLeft64(v2, 32) return v0, v1, v2, v3 } func finalize(d *digest) uint64 { d0 := *d once(&d0) v0, v1, v2, v3 := d0.v0, d0.v1, d0.v2, d0.v3 v2 ^= 0xff v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) return v0 ^ v1 ^ v2 ^ v3 } func blocks(d *digest, p []uint8) { v0, v1, v2, v3 := d.v0, d.v1, d.v2, d.v3 for len(p) >= BlockSize { m := binary.LittleEndian.Uint64(p) v3 ^= m v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0 ^= m p = p[BlockSize:] } d.v0, d.v1, d.v2, d.v3 = v0, v1, v2, v3 } // siphash.go package siphash import ( "encoding/binary" "hash" ) const ( // BlockSize is the block size of hash algorithm in bytes. BlockSize = 8 // Size is the size of hash output in bytes. Size = 8 // Size128 is the size of 128-bit hash output in bytes. Size128 = 16 ) const ( c0 = 0x736f6d6570736575 c1 = 0x646f72616e646f6d c2 = 0x6c7967656e657261 c3 = 0x7465646279746573 ) type digest struct { v0, v1, v2, v3 uint64 // state k0, k1 uint64 // keys x [8]byte // buffer for unprocessed bytes nx int // number of bytes in buffer x size int // output size in bytes (8 or 16) t uint8 // message bytes counter (mod 256) } func (d *digest) Sum(b []byte) []byte { if b != nil { _, _ = d.Write(b) } if d.size == Size { var res [8]byte r := d.Sum64() binary.LittleEndian.PutUint64(res[:], r) return res[:] } else { var res [16]byte r0, r1 := d._Sum128() binary.LittleEndian.PutUint64(res[:8], r0) binary.LittleEndian.PutUint64(res[8:], r1) return res[:] } } func newDigest(size int, key []byte) *digest { if size != Size && size != Size128 { panic("size must be 8 or 16") } d := new(digest) d.k0 = binary.LittleEndian.Uint64(key[0:]) d.k1 = binary.LittleEndian.Uint64(key[8:]) d.size = size d.Reset() return d } func New(key []byte) hash.Hash64 { return newDigest(Size, key) } func New128(key []byte) hash.Hash { return newDigest(Size128, key) } func (d *digest) Size() int { return d.size } func (d *digest) BlockSize() int { return BlockSize } func (d *digest) Write(p []byte) (nn int, err error) { nn = len(p) d.t += uint8(nn) if d.nx > 0 { n := len(p) if n > BlockSize-d.nx { n = BlockSize - d.nx } d.nx += copy(d.x[d.nx:], p) if d.nx == BlockSize { once(d) d.nx = 0 } p = p[n:] } if len(p) >= BlockSize { n := len(p) &^ (BlockSize - 1) blocks(d, p[:n]) p = p[n:] } if len(p) > 0 { d.nx = copy(d.x[:], p) } return } func (d *digest) Sum64() uint64 { for i := d.nx; i < BlockSize-1; i++ { d.x[i] = 0 } d.x[7] = d.t return finalize(d) } func (d *digest) _Sum128() (r0, r1 uint64) { for i := d.nx; i < BlockSize-1; i++ { d.x[i] = 0 } d.x[7] = d.t blocks(d, d.x[:]) v0, v1, v2, v3 := d.v0, d.v1, d.v2, d.v3 v2 ^= 0xee v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) r0 = v0 ^ v1 ^ v2 ^ v3 v1 ^= 0xdd v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) v0, v1, v2, v3 = sipRound(v0, v1, v2, v3) r1 = v0 ^ v1 ^ v2 ^ v3 return } func (d *digest) Reset() { d.v0 = d.k0 ^ c0 d.v1 = d.k1 ^ c1 d.v2 = d.k0 ^ c2 d.v3 = d.k1 ^ c3 d.t = 0 d.nx = 0 if d.size == Size128 { d.v1 ^= 0xee } }
接下来的rust版,那么就佛系来写了,虽然感觉我rust写的也是一股c味,等着我去抄一抄rust的官方实现,这里偷个懒,只给出64位hash的实现了。
use std::cmp::min; use std::convert::TryInto; struct SipHash { v0: u64, v1: u64, v2: u64, v3: u64, } impl SipHash { pub fn new(key: &[u8]) -> SipHash { // initialization let k0 = u64::from_le_bytes(key[0..8].try_into().unwrap()); let k1 = u64::from_le_bytes(key[8..16].try_into().unwrap()); SipHash { v0: k0 ^ 0x736f6d6570736575, v1: k1 ^ 0x646f72616e646f6d, v2: k0 ^ 0x6c7967656e657261, v3: k1 ^ 0x7465646279746573, } } fn round(&mut self) { self.v0 = self.v0.wrapping_add(self.v1); self.v1 = self.v1.rotate_left(13); self.v1 ^= self.v0; self.v0 = self.v0.rotate_left(32); self.v2 = self.v2.wrapping_add(self.v3); self.v3 = self.v3.rotate_left(16); self.v3 ^= self.v2; self.v0 = self.v0.wrapping_add(self.v3); self.v3 = self.v3.rotate_left(21); self.v3 ^= self.v0; self.v2 = self.v2.wrapping_add(self.v1); self.v1 = self.v1.rotate_left(17); self.v1 ^= self.v2; self.v2 = self.v2.rotate_left(32); } pub fn hash(&mut self, message: &[u8]) -> Vec<u8> { // config let c = 2; let d = 4; // b let b: u64 = ((message.len() % 256) as u64) << (8 * 7); let mut chunks = message.chunks_exact(8); while let Some(chunk) = chunks.next() { let mut chunk = u64::from_le_bytes(chunk.try_into().unwrap()); self.v3 ^= chunk; for _ in 0..c { self.round(); } self.v0 ^= chunk; } let mut remainder = chunks.remainder().to_vec(); if remainder.len() > 0 { for _ in 0..(8 - remainder.len()) { remainder.push(0x00) } let mut chunk = u64::from_le_bytes(remainder.try_into().unwrap()); chunk ^= b; self.v3 ^= chunk; for _ in 0..c { self.round(); } self.v0 ^= chunk; } // finalization self.v2 ^= 0xff; for _ in 0..d { self.round(); } (self.v0 ^ self.v1 ^ self.v2 ^ self.v3).to_be_bytes().to_vec() } }