Skip to content

Commit

Permalink
revert: "fix: ai和hpc在提交作业和应用前检查一下是否重名" (#1359)
Browse files Browse the repository at this point in the history
Reverts #1356
  • Loading branch information
OYX-1 authored Jul 15, 2024
1 parent e9c8bfa commit 56f5a69
Show file tree
Hide file tree
Showing 9 changed files with 6 additions and 117 deletions.
7 changes: 0 additions & 7 deletions .changeset/tasty-files-end.md

This file was deleted.

22 changes: 2 additions & 20 deletions apps/ai/src/server/trpc/route/jobs/apps.ts
Original file line number Diff line number Diff line change
Expand Up @@ -299,24 +299,6 @@ export const createAppSession = procedure
model, mountPoints = [], account, partition, coreCount, nodeCount, gpuCount, memory,
maxTime, workingDirectory, customAttributes } = input;

const userId = user.identityId;
const client = getAdapterClient(clusterId);

// 检查是否存在同名的作业
const existedJobName = await asyncClientCall(client.job, "getJobs", {
fields: ["job_id"],
filter: {
users: [userId], accounts: [],states: [],jobName:appJobName,
},
}).then((resp) => resp.jobs);

if (existedJobName.length) {
throw new TRPCError({
code: "CONFLICT",
message: `appJobName ${appJobName} is already existed`,
});
}

const apps = getClusterAppConfigs(clusterId);
const app = checkAppExist(apps, appId);

Expand Down Expand Up @@ -389,7 +371,7 @@ export const createAppSession = procedure
throw clusterNotFound(clusterId);
}


const userId = user.identityId;
return await sshConnect(host, userId, logger, async (ssh) => {
const homeDir = await getUserHomedir(ssh, userId, logger);

Expand Down Expand Up @@ -478,7 +460,7 @@ export const createAppSession = procedure

// 将entry.sh写入后将路径传给适配器后启动容器
await sftpWriteFile(sftp)(remoteEntryPath, entryScript);

const client = getAdapterClient(clusterId);
const reply = await asyncClientCall(client.job, "submitJob", {
userId,
jobName: appJobName,
Expand Down
18 changes: 1 addition & 17 deletions apps/ai/src/server/trpc/route/jobs/jobs.ts
Original file line number Diff line number Diff line change
Expand Up @@ -128,23 +128,6 @@ procedure
throw clusterNotFound(clusterId);
}

const client = getAdapterClient(clusterId);

// 检查是否存在同名的作业
const existedJobName = await asyncClientCall(client.job, "getJobs", {
fields: ["job_id"],
filter: {
users: [userId], accounts: [],states: [],jobName:trainJobName,
},
}).then((resp) => resp.jobs);

if (existedJobName.length) {
throw new TRPCError({
code: "CONFLICT",
message: `trainJobName ${trainJobName} is already existed`,
});
}

const em = await forkEntityManager();
const {
datasetVersion,
Expand Down Expand Up @@ -191,6 +174,7 @@ procedure
const entryScript = command;
await sftpWriteFile(sftp)(remoteEntryPath, entryScript);

const client = getAdapterClient(clusterId);
const reply = await asyncClientCall(client.job, "submitJob", {
userId,
jobName: trainJobName,
Expand Down
20 changes: 0 additions & 20 deletions apps/portal-server/src/clusterops/app.ts
Original file line number Diff line number Diff line change
Expand Up @@ -78,26 +78,6 @@ export const appOps = (cluster: string): AppOps => {
const { appId, userId, account, coreCount, nodeCount, gpuCount, memory, maxTime, proxyBasePath,
partition, qos, customAttributes, appJobName } = request;

// 检查是否存在同名的作业
const existedJobName = await callOnOne(
cluster,
logger,
async (client) => await asyncClientCall(client.job, "getJobs", {
fields: ["job_id"],
filter: {
users: [userId], accounts: [], states: [],jobName:appJobName,
},
}),
).then((resp) => resp.jobs);

if (existedJobName.length) {
throw new DetailedError({
code: Status.ALREADY_EXISTS,
message: `appJobName ${appJobName} is already existed`,
details: [errorInfo("ALREADY EXISTS")],
});
}

const memoryMb = memory ? Number(memory.slice(0, -2)) : undefined;


Expand Down
20 changes: 0 additions & 20 deletions apps/portal-server/src/services/job.ts
Original file line number Diff line number Diff line change
Expand Up @@ -233,26 +233,6 @@ export const jobServiceServer = plugin((server) => {
, errorOutput, memory, scriptOutput } = request;
await checkActivatedClusters({ clusterIds: cluster });

// 检查是否存在同名的作业
const existedJobName = await callOnOne(
cluster,
logger,
async (client) => await asyncClientCall(client.job, "getJobs", {
fields: ["job_id"],
filter: {
users: [userId], accounts: [], states: [],jobName,
},
}),
).then((resp) => resp.jobs);

if (existedJobName.length) {
throw {
code: Status.ALREADY_EXISTS,
message: "already exists",
details: `jobName ${jobName} is already existed`,
} as ServiceError;
}

// make sure working directory exists
const host = getClusterLoginNode(cluster);
if (!host) { throw clusterNotFound(cluster); }
Expand Down
9 changes: 1 addition & 8 deletions apps/portal-web/src/pageComponents/app/LaunchAppForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -116,7 +116,7 @@ export const LaunchAppForm: React.FC<Props> = ({ clusterId, appId, attributes, a
maxTime,
customAttributes: customFormKeyValue,
} })
.httpError(500, (e) => {
.httpError(409, (e) => {
if (e.code === "SBATCH_FAILED") {
createErrorModal(e.message);
} else {
Expand All @@ -137,13 +137,6 @@ export const LaunchAppForm: React.FC<Props> = ({ clusterId, appId, attributes, a
throw e;
}
})
.httpError(409, (e) => {
if (e.code === "ALREADY_EXISTS") {
createErrorModal(e.message);
} else {
throw e;
}
})
.then(() => {
message.success(t(p("successMessage")));
Router.push(`/apps/${clusterId}/sessions`);
Expand Down
10 changes: 0 additions & 10 deletions apps/portal-web/src/pageComponents/job/SubmitJobForm.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -132,16 +132,6 @@ export const SubmitJobForm: React.FC<Props> = ({ initial = initialValues, submit
throw e;
}
})
.httpError(409, (e) => {
if (e.code === "ALREADY_EXISTS") {
modal.error({
title: t(p("errorMessage")),
content: e.message,
});
} else {
throw e;
}
})
.then(({ jobId }) => {
message.success(t(p("successMessage")) + jobId);
Router.push("/jobs/runningJobs");
Expand Down
11 changes: 2 additions & 9 deletions apps/portal-web/src/pages/api/app/createAppSession.ts
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,12 @@ export const CreateAppSessionSchema = typeboxRouteSchema({
message: Type.String(),
}),

409: Type.Object({
code: Type.Literal("ALREADY_EXISTS"),
message: Type.String(),
}),

404: Type.Object({
code: Type.Literal("APP_NOT_FOUND"),
message: Type.String(),
}),

500: Type.Object({
409: Type.Object({
code: Type.Literal("SBATCH_FAILED"),
message: Type.String(),
}),
Expand Down Expand Up @@ -131,13 +126,11 @@ export default /* #__PURE__*/route(CreateAppSessionSchema, async (req, res) => {
if (errors[0] && errors[0].$type === "google.rpc.ErrorInfo") {
switch (errors[0].reason) {
case "SBATCH_FAILED":
return { 500: { code: "SBATCH_FAILED" as const, message: ex.details } };
return { 409: { code: "SBATCH_FAILED" as const, message: ex.details } };
case "NOT FOUND":
return { 404: { code: "APP_NOT_FOUND" as const, message: ex.details } };
case "INVALID ARGUMENT":
return { 400: { code: "INVALID_INPUT" as const, message: ex.details } };
case "ALREADY EXISTS":
return { 409: { code: "ALREADY_EXISTS" as const, message: ex.details } };
default:
return e;
}
Expand Down
6 changes: 0 additions & 6 deletions apps/portal-web/src/pages/api/job/submitJob.ts
Original file line number Diff line number Diff line change
Expand Up @@ -65,11 +65,6 @@ export const SubmitJobSchema = typeboxRouteSchema({
message: Type.String(),
}),

409: Type.Object({
code: Type.Literal("ALREADY_EXISTS"),
message: Type.String(),
}),

500: Type.Object({
code: Type.Literal("SCHEDULER_FAILED"),
message: Type.String(),
Expand Down Expand Up @@ -142,7 +137,6 @@ export default route(SubmitJobSchema, async (req, res) => {
.catch(handlegRPCError({
[status.INTERNAL]: (err) => ({ 500: { code: "SCHEDULER_FAILED", message: err.details } } as const),
[status.NOT_FOUND]: (err) => ({ 404: { code: "NOT_FOUND", message: err.details } } as const),
[status.ALREADY_EXISTS]: (err) => ({ 409: { code: "ALREADY_EXISTS", message: err.details } } as const),
},
async () => await callLog(
{ ...logInfo,
Expand Down

0 comments on commit 56f5a69

Please sign in to comment.