RUN pip install tensorflow
training_image = '.dkr.ecr..amazonaws.com/deep-learning-container:latest'
# 定义超参数
hyperparameters = {
'learning_rate': 0.01,
'batch_size': 64,
...
}
# 将超参数传递给Estimator
rl_estimator = RLEstimator(entry_point="entry_point.py",
source_dir="source_dir",
...
hyperparameters=hyperparameters)
job_name = 'your-training-job-name'
client = boto3.client('logs')
log_group_name = '/aws/sagemaker/TrainingJobs'
log_stream_name = job_name
response = client.get_log_events(
logGroupName=log_group_name,
logStreamName=log_stream_name,
startFromHead=True
)
for event in response['events']:
print(event['message'])
这将打印有关您的训练任务的日志消息,以帮助您诊断并解决问题。