模型部署资源分配部分

This commit is contained in:
dc
2025-05-30 13:45:01 +08:00
parent 3fb10b1e2f
commit 9eef82b642
4 changed files with 115 additions and 0 deletions

View File

@ -0,0 +1,74 @@
package com.bipt.intelligentapplicationorchestrationservice.deploy.deployment;
import com.bipt.intelligentapplicationorchestrationservice.deploy.entity.DeploymentResource;
import com.bipt.intelligentapplicationorchestrationservice.gpu.model.entity.GpuResource;
import com.bipt.intelligentapplicationorchestrationservice.utils.ConfigConstants;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.beans.factory.annotation.Value;
import org.springframework.stereotype.Component;
import org.springframework.web.client.ResourceAccessException;
import java.util.Comparator;
import java.util.List;
@Component
public class ResourceAllocator {
@Autowired
private ConfigConstants config;
//获取剩余内存
private int getRemainingMemory(GpuResource resource){
return resource.getGPUMaxMemory()-resource.getGPUMemorySize();
}
public DeploymentResource allocate(
List<GpuResource> resources,
int requiredMemory,
String modelId,
boolean isGray
){
resources.sort(Comparator.comparingInt(GpuResource::getGPUMemorySize));
//第一轮分配
for(GpuResource resource:resources){
if(getRemainingMemory(resource) >= requiredMemory) {
return createResource(resource, modelId, isGray);
}
}
//第二轮分配
return defragmentation(resources,requiredMemory, modelId, isGray);
}
private DeploymentResource defragmentation(
List<GpuResource> resources,
int requiredMemory,
String modelId,
boolean isGray
){
//按内存碎片大小排序(最小碎片优先)
resources.sort(Comparator.comparingDouble(
r -> (double)getRemainingMemory(r) / r.getGPUMaxMemory()));
for(GpuResource resource:resources){
if(getRemainingMemory(resource) >= requiredMemory){
return createResource(resource, modelId, isGray);
}
}
throw new ResourceAccessException("GPU资源不足");
}
private DeploymentResource createResource(GpuResource gpu, String modelId, boolean isGray){
String urlType = isGray ? "gray":"prod";
String url = String.format(
config.URL_TEMPLATE,
gpu.getIp(),
config.MODEL_PORT,
modelId,
urlType
);
return new DeploymentResource(gpu, url);
}
}

View File

@ -0,0 +1,4 @@
package com.bipt.intelligentapplicationorchestrationservice.deploy.entity;
public class DeployRequest {
}

View File

@ -0,0 +1,25 @@
package com.bipt.intelligentapplicationorchestrationservice.deploy.entity;
public class DeployResponse<T> {
private boolean isSuccess;
private String errorInfo;
private int status;
private T data;
public DeployResponse(boolean b, String s, int i, T data) {
isSuccess = b;
errorInfo = s;
status = i;
this.data = data;
}
// 成功响应
public static <T> DeployResponse<T> success(T data) {
return new DeployResponse<>(true, "", 200, data);
}
// 失败响应
public static <T> DeployResponse<T> fail(int status, String error) {
return new DeployResponse<>(false, error, status, null);
}
}

View File

@ -0,0 +1,12 @@
package com.bipt.intelligentapplicationorchestrationservice.deploy.entity;
import com.bipt.intelligentapplicationorchestrationservice.gpu.model.entity.GpuResource;
import lombok.AllArgsConstructor;
import lombok.Getter;
@Getter
@AllArgsConstructor
public class DeploymentResource {
private final GpuResource gpu;
private final String url;
}