如果Argo工作流在挂起状态下没有任何事件记录,则很有可能存在以下原因:
没有足够的资源:请确保您的Kubernetes集群有足够的资源来运行工作流。
工作流中的任务遇到问题:请检查每个任务的日志并确保其中没有任何错误。
以下是一份示例代码,可以用于检查Argo工作流中每个任务的状态和日志:
apiVersion: argoproj.io/v1alpha1
kind: Workflow
metadata:
generateName: my-workflow-
spec:
entrypoint: my-entrypoint
templates:
- name: my-entrypoint
container:
image: docker/whalesay
command: [cowsay]
args: ["hello world"]
- name: check-task-states
inputs:
parameters:
- name: workflow-name
script:
image: python:alpine3.8
command: [python]
source: |
import requests
import json
import sys
headers = {"Authorization": "Bearer " + open('/var/run/secrets/kubernetes.io/serviceaccount/token').read()}
namespace = open('/var/run/secrets/kubernetes.io/serviceaccount/namespace').read()
workflow_name = sys.argv[1]
url = f"https://kubernetes.default.svc/api/v1/namespaces/{namespace}/pods?labelSelector=workflows.argoproj.io/workflow={workflow_name}"
resp = json.loads(requests.get(url, headers=headers).text)
if len(resp["items"]) == 0:
print("Unable to find any pods corresponding to the workflow")
sys.exit(1)
for pod in resp["items"]:
name = pod["metadata"]["name"]
node_name = pod["spec"]["nodeName"]
container_statuses = pod["status"]["containerStatuses"]
print(f"Pod Name: {name}")
print(f"Node Name: {node_name}")
print(f"Container Statuses: {json.dumps(container_statuses)}")
outputs:
parameters