Skip to content

Commit f53e0f9

Browse files
committed
feat: make docker.image idempotent
`docker.image(...)` will now check if the image exist and only pull when required. It uses the following logic for this: 1. If `force == True` it will always pull the image 2. If an exact digest is provided, it will pull if there is no image with the same registry + repo + name + digest 3. If a tag is provided and the tag is `latest`, it will always pull 4. If a tag is provided and it is not `latest`, it will pull if no image exists with the same registry + repo + name + tag It offloads most of this logic to Docker itself, by relying on the `DockerImage` fact, which in turn uses `docker image inspect` to figure out if an image with name + tag or digest exists.
1 parent 6677c61 commit f53e0f9

19 files changed

+795
-48
lines changed

src/pyinfra/operations/docker.py

Lines changed: 57 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -8,9 +8,15 @@
88

99
from pyinfra import host
1010
from pyinfra.api import operation
11-
from pyinfra.facts.docker import DockerContainer, DockerNetwork, DockerPlugin, DockerVolume
11+
from pyinfra.facts.docker import (
12+
DockerContainer,
13+
DockerImage,
14+
DockerNetwork,
15+
DockerPlugin,
16+
DockerVolume,
17+
)
1218

13-
from .util.docker import ContainerSpec, handle_docker
19+
from .util.docker import ContainerSpec, handle_docker, parse_image_reference
1420

1521

1622
@operation()
@@ -127,8 +133,8 @@ def container(
127133
)
128134

129135

130-
@operation(is_idempotent=False)
131-
def image(image, present=True):
136+
@operation()
137+
def image(image: str, present: bool = True, force: bool = False):
132138
"""
133139
Manage Docker images
134140
@@ -153,20 +159,55 @@ def image(image, present=True):
153159
present=False,
154160
)
155161
"""
156-
162+
image_info = parse_image_reference(image)
157163
if present:
158-
yield handle_docker(
159-
resource="image",
160-
command="pull",
161-
image=image,
162-
)
163-
164+
if force:
165+
# always pull the image if force is True
166+
yield handle_docker(
167+
resource="image",
168+
command="pull",
169+
image=image,
170+
)
171+
return
172+
else:
173+
existent_image = host.get_fact(DockerImage, object_id=image)
174+
if image_info.digest:
175+
# If a digest is specified, we must ensure the exact image is present
176+
if existent_image:
177+
host.noop(f"Image with digest {image_info.digest} already exists!")
178+
else:
179+
yield handle_docker(
180+
resource="image",
181+
command="pull",
182+
image=image,
183+
)
184+
elif image_info.tag == "latest" or not image_info.tag:
185+
# If the tag is 'latest' or not specified, always pull to ensure freshness
186+
yield handle_docker(
187+
resource="image",
188+
command="pull",
189+
image=image,
190+
)
191+
else:
192+
# For other tags, check if the image exists
193+
if existent_image:
194+
host.noop(f"Image with tag {image_info.tag} already exists!")
195+
else:
196+
yield handle_docker(
197+
resource="image",
198+
command="pull",
199+
image=image,
200+
)
164201
else:
165-
yield handle_docker(
166-
resource="image",
167-
command="remove",
168-
image=image,
169-
)
202+
existent_image = host.get_fact(DockerImage, object_id=image)
203+
if existent_image:
204+
yield handle_docker(
205+
resource="image",
206+
command="remove",
207+
image=image,
208+
)
209+
else:
210+
host.noop("There is no {0} image!".format(image))
170211

171212

172213
@operation()

src/pyinfra/operations/util/docker.py

Lines changed: 160 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -1,16 +1,168 @@
1-
import dataclasses
2-
from typing import Any, Dict, List
1+
from dataclasses import dataclass, field
2+
from typing import Any
33

44
from pyinfra.api import OperationError
55

66

7-
@dataclasses.dataclass
7+
@dataclass
8+
class ImageReference:
9+
"""Represents a parsed Docker image reference."""
10+
11+
repository: str
12+
namespace: str | None = None
13+
tag: str | None = None
14+
digest: str | None = None
15+
registry_host: str | None = None
16+
registry_port: int | None = None
17+
18+
@property
19+
def registry(self) -> str | None:
20+
"""Get the full registry address (host:port)."""
21+
if not self.registry_host:
22+
return None
23+
if self.registry_port:
24+
return f"{self.registry_host}:{self.registry_port}"
25+
return self.registry_host
26+
27+
@property
28+
def name(self) -> str:
29+
"""Get the full image name without tag or digest."""
30+
parts = []
31+
if self.registry:
32+
parts.append(self.registry)
33+
if self.namespace:
34+
parts.append(self.namespace)
35+
parts.append(self.repository)
36+
return "/".join(parts)
37+
38+
@property
39+
def full_reference(self) -> str:
40+
"""Get the complete image reference string."""
41+
ref = self.name
42+
if self.tag:
43+
ref += f":{self.tag}"
44+
if self.digest:
45+
ref += f"@{self.digest}"
46+
return ref
47+
48+
49+
def parse_registry(registry: str) -> tuple[str, int | None]:
50+
"""
51+
Parse a registry string into host and port components.
52+
53+
Args:
54+
registry: String like "registry.io:5000" or "registry.io"
55+
56+
Returns:
57+
tuple: (host, port) where port is None if not specified
58+
59+
Raises:
60+
ValueError: If port is specified but not a valid integer
61+
"""
62+
if ":" in registry:
63+
host, port_str = registry.rsplit(":", 1)
64+
if port_str: # Only try to parse if port_str is not empty
65+
try:
66+
port = int(port_str)
67+
if port < 0 or port > 65535:
68+
raise ValueError(
69+
f"Invalid port number: {port}. Port must be between 0 and 65535"
70+
)
71+
return host, port
72+
except ValueError as e:
73+
if "invalid literal" in str(e):
74+
raise ValueError(
75+
f"Invalid port in registry '{registry}': '{port_str}' is not a valid port number"
76+
)
77+
raise # Re-raise port range error
78+
else:
79+
# Empty port (e.g., "registry.io:")
80+
raise ValueError(f"Invalid registry format '{registry}': port cannot be empty")
81+
else:
82+
return registry, None
83+
84+
85+
def parse_image_reference(image: str) -> ImageReference:
86+
"""
87+
Parse a Docker image reference into components.
88+
89+
Format: [HOST[:PORT]/]NAMESPACE/REPOSITORY[:TAG][@DIGEST]
90+
91+
Raises:
92+
ValueError: If the image reference is empty or invalid
93+
"""
94+
if not image or not image.strip():
95+
raise ValueError("Image reference cannot be empty")
96+
97+
original = image.strip()
98+
registry_host = None
99+
registry_port = None
100+
namespace = None
101+
repository = None
102+
tag = None
103+
digest = None
104+
105+
# Extract digest first (format: name@digest)
106+
if "@" in original:
107+
original, digest = original.rsplit("@", 1)
108+
109+
# Extract tag (format: name:tag)
110+
if ":" in original:
111+
parts = original.split(":")
112+
if len(parts) >= 2:
113+
potential_tag = parts[-1]
114+
# Tag cannot contain '/' - if it does, the colon is part of the registry, separating host and port
115+
if "/" not in potential_tag:
116+
original = ":".join(parts[:-1])
117+
tag = potential_tag
118+
119+
# Split by '/' to separate registry/namespace/repository
120+
parts = original.split("/")
121+
122+
if len(parts) == 1:
123+
# Just repository name (e.g., "nginx")
124+
repository = parts[0]
125+
elif len(parts) == 2:
126+
# Could be namespace/repository or registry/repository
127+
if "." in parts[0] or ":" in parts[0]:
128+
# Likely a registry (registry.io:5000/repo or registry.io/repo)
129+
registry_host, registry_port = parse_registry(parts[0])
130+
repository = parts[1]
131+
else:
132+
# Likely namespace/repository
133+
namespace = parts[0]
134+
repository = parts[1]
135+
elif len(parts) >= 3:
136+
# registry/namespace/repository or registry/nested/namespace/repository
137+
registry_host, registry_port = parse_registry(parts[0])
138+
namespace = "/".join(parts[1:-1])
139+
repository = parts[-1]
140+
141+
# Validate that we found a repository
142+
if not repository:
143+
raise ValueError(f"Invalid image reference: no repository found in '{image}'")
144+
145+
# Default tag to 'latest' if neither tag nor digest specified. This is Docker's default behavior.
146+
if tag is None and digest is None:
147+
tag = "latest"
148+
149+
return ImageReference(
150+
repository=repository,
151+
namespace=namespace,
152+
tag=tag,
153+
digest=digest,
154+
registry_host=registry_host,
155+
registry_port=registry_port,
156+
)
157+
158+
159+
@dataclass
8160
class ContainerSpec:
9161
image: str = ""
10-
ports: List[str] = dataclasses.field(default_factory=list)
11-
networks: List[str] = dataclasses.field(default_factory=list)
12-
volumes: List[str] = dataclasses.field(default_factory=list)
13-
env_vars: List[str] = dataclasses.field(default_factory=list)
162+
ports: list[str] = field(default_factory=list)
163+
networks: list[str] = field(default_factory=list)
164+
volumes: list[str] = field(default_factory=list)
165+
env_vars: list[str] = field(default_factory=list)
14166
pull_always: bool = False
15167

16168
def container_create_args(self):
@@ -34,7 +186,7 @@ def container_create_args(self):
34186

35187
return args
36188

37-
def diff_from_inspect(self, inspect_dict: Dict[str, Any]) -> List[str]:
189+
def diff_from_inspect(self, inspect_dict: dict[str, Any]) -> list[str]:
38190
# TODO(@minor-fixes): Diff output of "docker inspect" against this spec
39191
# to determine if the container needs to be recreated. Currently, this
40192
# function will never recreate when attributes change, which is
Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,27 @@
1+
{
2+
"kwargs": {
3+
"image": "nginx:alpine",
4+
"present": true,
5+
"force": true
6+
},
7+
"facts": {
8+
"docker.DockerImage": {
9+
"object_id=nginx:alpine": [
10+
{
11+
"Id": "sha256:e784f4560448b14a66f55c26e1b4dad2c2877cc73d001b7cd0b18e24a700a070",
12+
"RepoTags": [
13+
"nginx:alpine"
14+
],
15+
"RepoDigests": [
16+
"nginx@sha256:abcd1234"
17+
],
18+
"Created": "2024-05-26T22:01:24.10525839Z",
19+
"Size": 41390752
20+
}
21+
]
22+
}
23+
},
24+
"commands": [
25+
"docker image pull nginx:alpine"
26+
]
27+
}
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
{
2+
"kwargs": {
3+
"image": "ghcr.io/owner/repo:v2.0",
4+
"present": true,
5+
"force": false
6+
},
7+
"facts": {
8+
"docker.DockerImage": {
9+
"object_id=ghcr.io/owner/repo:v2.0": [
10+
{
11+
"Id": "sha256:e784f4560448b14a66f55c26e1b4dad2c2877cc73d001b7cd0b18e24a700a070",
12+
"RepoTags": [
13+
"ghcr.io/owner/repo:v2.0"
14+
],
15+
"RepoDigests": [
16+
"ghcr.io/owner/repo@sha256:abcd1234"
17+
],
18+
"Created": "2024-05-26T22:01:24.10525839Z",
19+
"Size": 41390752
20+
}
21+
]
22+
}
23+
},
24+
"commands": [],
25+
"noop_description": "Image with tag v2.0 already exists!"
26+
}
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
{
2+
"kwargs": {
3+
"image": "nginx@sha256:abcd1234567890",
4+
"present": true,
5+
"force": false
6+
},
7+
"facts": {
8+
"docker.DockerImage": {
9+
"object_id=nginx@sha256:abcd1234567890": [
10+
{
11+
"Id": "sha256:e784f4560448b14a66f55c26e1b4dad2c2877cc73d001b7cd0b18e24a700a070",
12+
"RepoTags": [],
13+
"RepoDigests": [
14+
"nginx@sha256:abcd1234567890"
15+
],
16+
"Created": "2024-05-26T22:01:24.10525839Z",
17+
"Size": 41390752
18+
}
19+
]
20+
}
21+
},
22+
"commands": [],
23+
"noop_description": "Image with digest sha256:abcd1234567890 already exists!"
24+
}
Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,15 @@
1+
{
2+
"kwargs": {
3+
"image": "nginx@sha256:abcd1234567890",
4+
"present": true,
5+
"force": false
6+
},
7+
"facts": {
8+
"docker.DockerImage": {
9+
"object_id=nginx@sha256:abcd1234567890": []
10+
}
11+
},
12+
"commands": [
13+
"docker image pull nginx@sha256:abcd1234567890"
14+
]
15+
}

0 commit comments

Comments
 (0)