modelscope有提供baichuan13B系列的int8或int4量化模型吗?
4bit量化代码,拿走不谢
import json
import torch
import streamlit as st
#from transformers import AutoModelForCausalLM, AutoTokenizer
#from transformers.generation.utils import GenerationConfig
from modelscope import AutoModelForCausalLM, AutoTokenizer
from transformers import BitsAndBytesConfig
from modelscope import GenerationConfig
import random
import time
modle_name = 'baichuan-inc/Baichuan-13B-Chat'
#modle_name = '/root/.cache/modelscope/hub/baichuan-inc/Baichuan-13B-Chat'
st.set_page_config(page_title='Baichuan-13B-Chat')
st.title('Baichuan-13B-Chat')
@st.cache_resource
def init_model():
quantization_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type='nf4',
bnb_4bit_compute_dtype=torch.bfloat16)
print('model start.............')
model = AutoModelForCausalLM.from_pretrained(
modle_name,
#revision = 'v1.0.1',
torch_dtype=torch.float16,
#device_map='auto',
quantization_config=quantization_config,
#fp16=True,
device_map='balanced',
trust_remote_code=True
)
#model = model.quantize(4).cuda()
print('model_generation_config start.............')
model.generation_config = GenerationConfig.from_pretrained(
modle_name,
trust_remote_code=True
)
print('tokenizer start.............')
tokenizer = AutoTokenizer.from_pretrained(
modle_name,
use_fast=False,
#revision = 'v1.0.1',
trust_remote_code=True
)
print('---------------Init End ---------------')
return model, tokenizer
def clear_chat_history():
del st.session_state.messages
def init_chat_history():
with st.chat_message('assistant', avatar='🤖'):
st.markdown('您好,我是百川大模型,很高兴为您服务🥰')
if 'messages' in st.session_state:
for message in st.session_state.messages:
avatar = '🧑💻' if message['role'] == 'user' else '🤖'
with st.chat_message(message['role'], avatar=avatar):
st.markdown(message['content'])
else:
st.session_state.messages = []
return st.session_state.messages
def main():
model, tokenizer = init_model()
messages = init_chat_history()
if prompt := st.chat_input('Shift + Enter 换行, Enter 发送'):
with st.chat_message('user', avatar='🧑💻'):
st.markdown(prompt)
messages.append({'role': 'user', 'content': prompt})
print(f'[user] {prompt}', flush=True)
with st.chat_message('assistant', avatar='🤖'):
placeholder = st.empty()
for response in model.chat(tokenizer, messages, stream=True):
placeholder.markdown(response)
if torch.backends.mps.is_available():
torch.mps.empty_cache()
messages.append({'role': 'assistant', 'content': response})
print(json.dumps(messages, ensure_ascii=False), flush=True)
st.button('清空对话', on_click=clear_chat_history)
if __name__ == '__main__':
main()
赞0
踩0