$ huggingface-cli download microsoft/DialoGPT-smallPython code:
$ vi llm-transformers.py
# import the model and the tokenizer objects from the Transformers library
from transformers import AutoModelForCausalLM, AutoTokenizer
# load the model
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
# load the model's tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
# tokenize the input
input_ids = tokenizer.encode("Hello!" + tokenizer.eos_token, return_tensors='pt')
# generate the text
output = model.generate(
input_ids=input_ids,
max_new_tokens=50
)
# decode generated tokens
print(tokenizer.decode(output[0]))
Run the Python script:$ python3 llm-transformers.pyOutput:
Hello!<|endoftext|>Hi!<|endoftext|>
$ vi llm-transformers-pipeline.py
# import the model and the tokenizer objects from the Transformers library
from transformers import AutoModelForCausalLM, AutoTokenizer, pipeline
# load the model
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
# load the model's tokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
# create a pipeline object for the "text-generation" task
generator = pipeline(
"text-generation",
model=model,
tokenizer=tokenizer,
max_new_tokens=50
)
# prompt pipeline with some initial text to generate more text
output = generator("Hello!" + tokenizer.eos_token)
print(output[0]["generated_text"])
Run the Python script:$ python3 llm-transformers-pipeline.pyOutput:
Hello!<|endoftext|>Hi!
$ wget https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf/resolve/main/Phi-3-mini-4k-instruct-q4.ggufPython code:
$ vi llm-llama-cpp.py
from llama_cpp import Llama
model = Llama(model_path="./Phi-3-mini-4k-instruct-q4.gguf")
prompt = """
Question: What's 1+1?
"""
output = model(
prompt,
max_tokens=50, # limits the length of the generated text.
temperature=0, # controls the randomness of the output. Lower values are more deterministic.
top_p=1, # (range should be (0, 1]). controls diversity of the selection of the tokens. Lower values means selecting the most probable tokens.
echo=True # includes the prompt in the output if True.
)
print(output["choices"][0]["text"])
Run the Python script:$ python3 llm-llama-cpp.pyOutput:
Question: What's 1+1? <|assistant|> 1+1 equals 2.
curl https://api.openai.com/v1/chat/completions \
-H "Content-Type: application/json" \
-H "Authorization: Bearer YOUR_API_KEY" \
-d '{
"model": "gpt-4o-mini",
"store": true,
"messages": [
{"role": "user", "content": "What is 1+1?"}
]
}'
Output:
{
"id": "chatcmpl-BWREFCaHDdorA60Q4ufKWGZ9yY70Z",
"object": "chat.completion",
"model": "gpt-4o-mini-2024-07-18",
"choices": [
{
"index": 0,
"message": {
"role": "assistant",
"content": "1 + 1 equals 2.",
"refusal": null,
"annotations": []
},
"logprobs": null,
"finish_reason": "stop"
}
],
"usage": {
"prompt_tokens": 14,
"completion_tokens": 9,
"total_tokens": 23,
"prompt_tokens_details": {
"cached_tokens": 0,
"audio_tokens": 0
},
"completion_tokens_details": {
"reasoning_tokens": 0,
"audio_tokens": 0,
"accepted_prediction_tokens": 0,
"rejected_prediction_tokens": 0
}
}
}
$ pip install openaiCheck OpenAI Python SDK installation:
$ pip show openai
Name: openai Version: 1.76.0 ...Python code:
$ vi llm-gpt.py
import openai
openai.api_key = "YOUR_API_KEY"
completion = openai.chat.completions.create(
model="gpt-4o-mini",
store=True,
messages=[
{"role": "user", "content": "What is 1+1?"}
]
)
print(completion.choices[0].message);
Run the Python script:$ python3 llm-gpt.pyOutput:
ChatCompletionMessage(
content='1 + 1 equals 2.',
refusal=None,
role='assistant',
annotations=[],
audio=None,
function_call=None,
tool_calls=None
)
$ vi llm-save-model.py
from transformers import AutoModelForCausalLM
model = AutoModelForCausalLM.from_pretrained("microsoft/DialoGPT-small")
path = "./models/microsoft/model/dialogpt-small"
# model serialization
model.save_pretrained(path)
Run the Python script:$ python3 llm-save-model.pyThis will create a directory containing:
$ ls -1 models/microsoft/model/dialogpt-small/
config.json generation_config.json model.safetensorsFiles:
{
"architectures": [
"GPT2LMHeadModel"
],
...
"transformers_version": "4.51.3",
"vocab_size": 50257
}
$ vi llm-save-tokenizer.py
from transformers import AutoTokenizer
tokenizer = AutoTokenizer.from_pretrained("microsoft/DialoGPT-small")
path = "./models/microsoft/tokenizer/dialogpt-small"
# tokenizer serialization
tokenizer.save_pretrained(path)
Run the Python script:$ python3 llm-save-tokenizer.pyThis will create a directory containing:
$ ls -1 models/microsoft/tokenizer/dialogpt-small/
merges.txt special_tokens_map.json tokenizer.json tokenizer_config.json vocab.jsonFiles:
{
"add_bos_token": false,
"add_prefix_space": false,
"added_tokens_decoder": {
"50256": {
"content": "<|endoftext|>",
"lstrip": false,
"normalized": true,
"rstrip": false,
"single_word": false,
"special": true
}
},
"bos_token": "<|endoftext|>",
"chat_template": "{% for message in messages %}{{ message.content }}{{ eos_token }}{% endfor %}",
"clean_up_tokenization_spaces": true,
"eos_token": "<|endoftext|>",
"errors": "replace",
"extra_special_tokens": {},
"model_max_length": 1024,
"pad_token": null,
"tokenizer_class": "GPT2Tokenizer",
"unk_token": "<|endoftext|>"
}
$ vi llm-save-model-config.py
from transformers import AutoConfig
config = AutoConfig.from_pretrained("microsoft/DialoGPT-small")
path = "./models/microsoft/config/dialogpt-small"
# configuration serialization
config.save_pretrained(path)
Run the Python script:$ python3 llm-save-model-config.pyThis will create a directory containing:
$ ls -1 models/microsoft/config/dialogpt-small/
config.json
$ vi llm-load-model-tokenizer-config.py
from transformers import AutoModelForCausalLM, AutoTokenizer, AutoConfig model_path = "./models/microsoft/model/dialogpt-small" tokenizer_path = "./models/microsoft/tokenizer/dialogpt-small" config_path = "./models/microsoft/config/dialogpt-small" model = AutoModelForCausalLM.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(tokenizer_path) config = AutoConfig.from_pretrained(config_path) print(model) print(tokenizer) print(config)Run the Python script:
$ python3 llm-load-model-tokenizer-config.pyOutput:
GPT2LMHeadModel(
(transformer): GPT2Model(
(wte): Embedding(50257, 768)
(wpe): Embedding(1024, 768)
...
)
(lm_head): Linear(in_features=768, out_features=50257, bias=False)
)
GPT2TokenizerFast(
name_or_path='./models/microsoft/tokenizer/dialogpt-small',
vocab_size=50257,
model_max_length=1024,
...
special_tokens={'bos_token': '<|endoftext|>', 'eos_token': '<|endoftext|>', 'unk_token': '<|endoftext|>'},
added_tokens_decoder={50256: AddedToken("<|endoftext|>", rstrip=False, lstrip=False, single_word=False, normalized=True, special=True),}
)
GPT2Config {
"architectures": [
"GPT2LMHeadModel"
],
...
"transformers_version": "4.51.3",
"vocab_size": 50257
}