WIP: dc-feature #14
@ -0,0 +1,74 @@
|
||||
package com.bipt.intelligentapplicationorchestrationservice.deploy.deployment;
|
||||
|
||||
import com.bipt.intelligentapplicationorchestrationservice.deploy.entity.DeploymentResource;
|
||||
import com.bipt.intelligentapplicationorchestrationservice.gpu.model.entity.GpuResource;
|
||||
import com.bipt.intelligentapplicationorchestrationservice.utils.ConfigConstants;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.stereotype.Component;
|
||||
import org.springframework.web.client.ResourceAccessException;
|
||||
|
||||
import java.util.Comparator;
|
||||
import java.util.List;
|
||||
|
||||
@Component
|
||||
public class ResourceAllocator {
|
||||
|
||||
@Autowired
|
||||
private ConfigConstants config;
|
||||
|
||||
//获取剩余内存
|
||||
private int getRemainingMemory(GpuResource resource){
|
||||
return resource.getGPUMaxMemory()-resource.getGPUMemorySize();
|
||||
}
|
||||
|
||||
public DeploymentResource allocate(
|
||||
List<GpuResource> resources,
|
||||
int requiredMemory,
|
||||
String modelId,
|
||||
boolean isGray
|
||||
){
|
||||
resources.sort(Comparator.comparingInt(GpuResource::getGPUMemorySize));
|
||||
|
||||
//第一轮分配
|
||||
for(GpuResource resource:resources){
|
||||
if(getRemainingMemory(resource) >= requiredMemory) {
|
||||
return createResource(resource, modelId, isGray);
|
||||
}
|
||||
}
|
||||
|
||||
//第二轮分配
|
||||
return defragmentation(resources,requiredMemory, modelId, isGray);
|
||||
}
|
||||
|
||||
|
||||
private DeploymentResource defragmentation(
|
||||
List<GpuResource> resources,
|
||||
int requiredMemory,
|
||||
String modelId,
|
||||
boolean isGray
|
||||
){
|
||||
//按内存碎片大小排序(最小碎片优先)
|
||||
resources.sort(Comparator.comparingDouble(
|
||||
r -> (double)getRemainingMemory(r) / r.getGPUMaxMemory()));
|
||||
|
||||
for(GpuResource resource:resources){
|
||||
if(getRemainingMemory(resource) >= requiredMemory){
|
||||
return createResource(resource, modelId, isGray);
|
||||
}
|
||||
}
|
||||
throw new ResourceAccessException("GPU资源不足");
|
||||
}
|
||||
|
||||
private DeploymentResource createResource(GpuResource gpu, String modelId, boolean isGray){
|
||||
String urlType = isGray ? "gray":"prod";
|
||||
String url = String.format(
|
||||
config.URL_TEMPLATE,
|
||||
gpu.getIp(),
|
||||
config.MODEL_PORT,
|
||||
modelId,
|
||||
urlType
|
||||
);
|
||||
return new DeploymentResource(gpu, url);
|
||||
}
|
||||
}
|
@ -0,0 +1,4 @@
|
||||
package com.bipt.intelligentapplicationorchestrationservice.deploy.entity;
|
||||
|
||||
public class DeployRequest {
|
||||
}
|
@ -0,0 +1,25 @@
|
||||
package com.bipt.intelligentapplicationorchestrationservice.deploy.entity;
|
||||
|
||||
public class DeployResponse<T> {
|
||||
private boolean isSuccess;
|
||||
private String errorInfo;
|
||||
private int status;
|
||||
private T data;
|
||||
|
||||
public DeployResponse(boolean b, String s, int i, T data) {
|
||||
isSuccess = b;
|
||||
errorInfo = s;
|
||||
status = i;
|
||||
this.data = data;
|
||||
}
|
||||
|
||||
// 成功响应
|
||||
public static <T> DeployResponse<T> success(T data) {
|
||||
return new DeployResponse<>(true, "", 200, data);
|
||||
}
|
||||
|
||||
// 失败响应
|
||||
public static <T> DeployResponse<T> fail(int status, String error) {
|
||||
return new DeployResponse<>(false, error, status, null);
|
||||
}
|
||||
}
|
@ -0,0 +1,12 @@
|
||||
package com.bipt.intelligentapplicationorchestrationservice.deploy.entity;
|
||||
|
||||
import com.bipt.intelligentapplicationorchestrationservice.gpu.model.entity.GpuResource;
|
||||
import lombok.AllArgsConstructor;
|
||||
import lombok.Getter;
|
||||
|
||||
@Getter
|
||||
@AllArgsConstructor
|
||||
public class DeploymentResource {
|
||||
private final GpuResource gpu;
|
||||
private final String url;
|
||||
}
|
Reference in New Issue
Block a user