使用之前需要把这三个模型申请权限:
它的请求参数如下:
{
"system": [
{
"text": string
}
],
"messages": [
{
"role": "user",# first turn should always be the user turn
"content": [
{
"text": string
},
{
"image": {
"format": "jpeg"| "png" | "gif" | "webp",
"source": {
"bytes": "base64EncodedImageDataHere..."# base64-encoded binary
}
}
},
{
"video": {
"format": "mkv" | "mov" | "mp4" | "webm" | "three_gp" | "flv" | "mpeg" | "mpg" | "wmv",
"source": {
# source can be s3 location of base64 bytes based on size of input file.
"s3Location": {
"uri": string, # example: s3://my-bucket/object-key
"bucketOwner": string # (Optional) example: 123456789012)
}
"bytes": "base64EncodedImageDataHere..." # base64-encoded binary
}
}
},
]
},
{
"role": "assistant",
"content": [
{
"text": string # prefilling assistant turn
}
]
}
],
"inferenceConfig":{ # all Optional
"max_new_tokens": int, # greater than 0, equal or less than 5k (default: dynamic*)
"temperature": float, # greater then 0 and less than 1.0 (default: 0.7)
"top_p": float, # greater than 0, equal or less than 1.0 (default: 0.9)
"top_k": int # 0 or greater (default: 50)
"stopSequences": [string]
},
"toolConfig": { # all Optional
"tools": [
{
"toolSpec": {
"name": string # menaingful tool name (Max char: 64)
"description": string # meaningful description of the tool
"inputSchema": {
"json": { # The JSON schema for the tool. For more information, see JSON Schema Reference
"type": "object",
"properties": {
<args>: { # arguments
"type": string, # argument data type
"description": string # meaningful description
}
},
"required": [
string # args
]
}
}
}
}
],
"toolChoice": "any" //Amazon Nova models ONLY support tool choice of "any"
}
}
}
以下示例使用 Nova Lite 进行文本理解。
以下代码调用nova lite,来讲个笑话:
from random import randint
import json
import boto3
import base64
client = boto3.client(service_name='bedrock-runtime', region_name="us-east-1")
PRO_MODEL_ID = "us.amazon.nova-pro-v1:0"
LITE_MODEL_ID = "us.amazon.nova-lite-v1:0"
MICRO_MODEL_ID = "us.amazon.nova-micro-v1:0"
native_request = {
"messages": [ # Define one or more messages using the "user" and "assistant" roles.
{"role": "user", "content": [{"text": "tell me a joke"}]},
],
"system": [
{"text": "You should respond to all messages in Chinese"}
],
"inferenceConfig": {"max_new_tokens": 300, "top_p": 0.9, "top_k": 20, "temperature": 0.7},
}
# Invoke the model and extract the response body.
response = client.invoke_model(modelId=LITE_MODEL_ID, body=json.dumps(native_request))
model_response = json.loads(response["body"].read())
# Print the text content for easy readability.
content_text = model_response["output"]["message"]["content"][0]["text"]
print("\n[Response Content Text]")
print(content_text)
下面的示例演示了如何使用基于文本的流式 API:
import json
import boto3
import base64
from datetime import datetime
client = boto3.client(service_name='bedrock-runtime', region_name="us-east-1")
PRO_MODEL_ID = "us.amazon.nova-pro-v1:0"
LITE_MODEL_ID = "us.amazon.nova-lite-v1:0"
MICRO_MODEL_ID = "us.amazon.nova-micro-v1:0"
request_body = {
"messages": [{"role": "user", "content": [{"text": "A camping trip"}]}],
"system": [
{
"text": "Act as a creative writing assistant. When the user provides you with a topic, write a short story about that topic."}
],
"inferenceConfig": {"max_new_tokens": 500, "top_p": 0.9, "top_k": 20, "temperature": 0.7},
}
start_time = datetime.now()
# Invoke the model with the response stream
response = client.invoke_model_with_response_stream(
modelId=LITE_MODEL_ID, body=json.dumps(request_body)
)
print("Awaiting first token...")
chunk_count = 0
time_to_first_token = None
# Process the response stream
stream = response.get("body")
if stream:
for event in stream:
chunk = event.get("chunk")
if chunk:
# Print the response chunk
chunk_json = json.loads(chunk.get("bytes").decode())
# Pretty print JSON
# print(json.dumps(chunk_json, indent=2, ensure_ascii=False))
content_block_delta = chunk_json.get("contentBlockDelta")
if content_block_delta:
if time_to_first_token is None:
time_to_first_token = datetime.now() - start_time
print(f"Time to first token: {time_to_first_token}")
chunk_count += 1
current_time = datetime.now().strftime("%Y-%m-%d %H:%M:%S:%f")
# print(f"{current_time} - ", end="")
print(content_block_delta.get("delta").get("text"), end="")
print(f"Total chunks: {chunk_count}")
else:
print("No response stream received.")
让我们看看 Nova 模型在图像理解用例上的表现。在这里,我们将传递一张日落图像,并要求模型尝试为该图像创建 3 个艺术标题。
将上面图片保存为sunset.png
,放在代码同一目录并运行:
import json
import boto3
import base64
from datetime import datetime
client = boto3.client(service_name='bedrock-runtime', region_name="us-east-1")
PRO_MODEL_ID = "us.amazon.nova-pro-v1:0"
LITE_MODEL_ID = "us.amazon.nova-lite-v1:0"
MICRO_MODEL_ID = "us.amazon.nova-micro-v1:0"
with open("sunset.png", "rb") as image_file:
binary_data = image_file.read()
base_64_encoded_data = base64.b64encode(binary_data)
base64_string = base_64_encoded_data.decode("utf-8")
native_request = {
"messages": [
{
"role": "user",
"content": [
{
"image": {
"format": "png",
"source": {"bytes": base64_string},
}
},
{"text": "Provide art titles for this image."},
],
}
],
"system": [
{"text": "You are an expert artist. When the user provides you with an image, provide 3 potential art titles"}
],
"inferenceConfig": {"max_new_tokens": 300, "top_p": 0.1, "top_k": 20, "temperature": 0.3},
}
# Invoke the model and extract the response body.
response = client.invoke_model(modelId=LITE_MODEL_ID, body=json.dumps(native_request))
model_response = json.loads(response["body"].read())
# Print the text content for easy readability.
content_text = model_response["output"]["message"]["content"][0]["text"]
print("\n[Response Content Text]")
print(content_text)
结果:
现在,让我们看看 Nova 在视频理解用例方面的表现:
下载视频:
wget https://pingfan.s3.amazonaws.com/files/the-sea.mp4
使用Nova分析视频,并为它起标题:
import json
import boto3
import base64
client = boto3.client(service_name='bedrock-runtime', region_name="us-east-1")
PRO_MODEL_ID = "us.amazon.nova-pro-v1:0"
LITE_MODEL_ID = "us.amazon.nova-lite-v1:0"
MICRO_MODEL_ID = "us.amazon.nova-micro-v1:0"
with open("the-sea.mp4", "rb") as video_file:
binary_data = video_file.read()
base_64_encoded_data = base64.b64encode(binary_data)
base64_string = base_64_encoded_data.decode("utf-8")
native_request = {
"messages": [
{
"role": "user",
"content": [
{
"video": {
"format": "mp4",
"source": {"bytes": base64_string},
}
},
{"text": "Provide video titles for this clip."},
],
}
]
,
"system": [
{
"text": "You are an expert media analyst. When the user provides you with a video, provide 3 potential video titles"}
],
"inferenceConfig": {"max_new_tokens": 300, "top_p": 0.1, "top_k": 20, "temperature": 0.3},
}
# Invoke the model and extract the response body.
response = client.invoke_model(modelId=LITE_MODEL_ID, body=json.dumps(native_request))
model_response = json.loads(response["body"].read())
# Pretty print the response JSON.
print("[Full Response]")
print(json.dumps(model_response, indent=2))
# Print the text content for easy readability.
content_text = model_response["output"]["message"]["content"][0]["text"]
print("\n[Response Content Text]")
print(content_text)