Blog
python实现雪花算法根据时间戳生成id
import time
import uwsgi
import os
# 64位ID的划分
# 这是用来记录机器id的, 默认情况下这10bit会分成两部分前5bit代表数据中心,后5bit代表某个数据中心的机器id,默认情况下计算大概可以支持32*32 - 1= 1023台机器。
WORKER_ID_BITS = 5 # 某个数据中心的机器id
DATACENTER_ID_BITS = 5 # 数据中心
SEQUENCE_BITS = 12 # 循环位,来对应1毫秒内产生的不同的id, 大概可以满足1毫秒并发生成2^12-1=4095次id的要求
# 最大取值计算
MAX_WORKER_ID = -1 ^ (-1 << WORKER_ID_BITS) # 2**5-1 0b11111
MAX_DATACENTER_ID = -1 ^ (-1 << DATACENTER_ID_BITS)
# 移位偏移计算
WOKER_ID_SHIFT = SEQUENCE_BITS
DATACENTER_ID_SHIFT = SEQUENCE_BITS + WORKER_ID_BITS
TIMESTAMP_LEFT_SHIFT = SEQUENCE_BITS + WORKER_ID_BITS + DATACENTER_ID_BITS
# 序号循环掩码
SEQUENCE_MASK = -1 ^ (-1 << SEQUENCE_BITS)
# Twitter元年时间戳
TWEPOCH = 1288834974657
class InvalidSystemClock(Exception):
"""
时钟回拨异常
"""
pass
class IdWorker(object):
"""
用于生成IDs
"""
def __init__(self, datacenter_id=0, worker_id=0, sequence=0):
"""
初始化
:param datacenter_id: 数据中心(机器区域)ID
:param worker_id: 机器ID
:param sequence: 起始序号
"""
print("uwsgi.worker_id():", uwsgi.worker_id())
worker_id = uwsgi.worker_id()
print(f"init_IdWorker worker_id:{worker_id}")
# sanity check
if worker_id > MAX_WORKER_ID or worker_id < 0:
raise ValueError('worker_id值越界')
if datacenter_id > MAX_DATACENTER_ID or datacenter_id < 0:
raise ValueError('datacenter_id值越界')
self.worker_id = worker_id
self.datacenter_id = datacenter_id
self.sequence = sequence
self.last_timestamp = -1 # 上次计算的时间戳
def _gen_timestamp(self):
"""
生成整数时间戳
:return:int timestamp
"""
return int(time.time() * 1000)
def get_ids(self, count):
ids = []
for i in range(count):
ids.append(self.get_id())
return ids
def get_id(self):
"""
获取新ID
:return:
"""
timestamp = self._gen_timestamp()
# 时钟回拨
if timestamp < self.last_timestamp:
print('clock is moving backwards. Rejecting requests until {}'.
format(self.last_timestamp))
raise InvalidSystemClock
if timestamp == self.last_timestamp:
self.sequence = (self.sequence + 1) & SEQUENCE_MASK
if self.sequence == 0:
timestamp = self._til_next_millis(self.last_timestamp)
else:
self.sequence = 0
self.last_timestamp = timestamp
new_id = ((timestamp - TWEPOCH) << TIMESTAMP_LEFT_SHIFT) | (self.datacenter_id << DATACENTER_ID_SHIFT) | \
(self.worker_id << WOKER_ID_SHIFT) | self.sequence
return str(new_id)
def _til_next_millis(self, last_timestamp):
"""
等到下一毫秒
"""
timestamp = self._gen_timestamp()
while timestamp <= last_timestamp:
timestamp = self._gen_timestamp()
return timestamp
if __name__ == '__main__':
worker = IdWorker(datacenter_id=0, worker_id=1)
print(worker.get_id())
print(len(str(worker.get_id())))
鉴于多进程会出现生成的唯一id相同的问题:因为uwsgi项目启动是多进程的形式,所以我采取用每个uwsgi woker id作为雪花生成器的机器id-worker_id; 分布式的情况,可以将你的集群中的机器编号,将每个机器的编号代入datacenter_id。这样正常的生产环境就可以保证生成的雪花id相同了。可以根据循环位个数来增加雪花id每毫秒生成的最大数量。理论上无限大