import * as z from "zod";

import { Clip } from "~/utils/videoClips";

import { serviceTranscriptWordSchema } from "../Transcription";

// all of these schemas are copy-pasted from the backend,
// along with any schemas they depend on.
// TODO: move them to a separate package and share between backend and frontend

// ------------------- DTO (video) --------------------- //
// From `backend/core/src/modules/video/video.types.ts`

const VideoJobStatusInfoSchema = z.object({
  job_id: z.string(),
  status: z.string(),
  progress: z.number().nullable().default(null),
  error_message: z.string().nullable().default(null),
});

const DtoRes_StartJobSchema = VideoJobStatusInfoSchema.transform((encodeJob) => ({
  jobId: encodeJob.job_id,
  errorMessage: encodeJob.error_message,
  progress: encodeJob.progress,
  status: encodeJob.status,
}));

// ------------------- DTO (audio) --------------------- //
// exact copy-paste from `backend/core/src/modules/audio/audio.types.ts`

export type DtoReq_StartAudioDubJob = z.infer<typeof DtoReq_StartAudioDubJobSchema>;
export type DtoRes_StartAudioDubJob = z.infer<typeof DtoRes_StartAudioDubJobSchema>;
export type DtoRes_GetAudioDubJob = z.infer<typeof DtoRes_GetAudioDubJobSchema>;
export type DubbingStatus = z.infer<typeof JobStatusSchema>;
export type DubbingPhrase = z.infer<typeof dubbingPhraseSchema>;

export const DtoReq_StartAudioExtractionJobSchema = z.object({
  inputFileId: z.string(),
});

export const DtoRes_StartAudioExtractionJobSchema = DtoRes_StartJobSchema;

export const DtoReq_StartAudioDubJobSchema = z.object({
  transcriptionJobId: z.string(),
  translateId: z.string(),
});

export const DtoRes_StartAudioDubJobSchema = z.object({
  audioDubJobId: z.string(),
});

export const JobStatusSchema = z.enum(["started", "progress", "processing", "error", "finished"]);

export const dubbingPhraseSchema = z.object({
  id: z.string(),
  startTime: z.number(),
  endTime: z.number(),
  text: z.string(),
  speaker: z.string().nullable(),
  gcsUri: z.string().nullable(),
  downloadUrl: z.string().nullable(),
});

export type SpeechToTextProvider = z.infer<typeof SpeechToTextProviderSchema>;

export const SpeechToTextProviderSchema = z.enum(["wsp", "gcp", "gcp_chirp", "assembly", "scribe"]);

export type TranslateJobStatus = z.infer<typeof TRANSLATE_STATUS_ENUMS>;

export const TRANSLATE_STATUS_ENUMS = z.enum([
  "TRANSLATION_NOT_STARTED",
  "TRANSLATING",
  "TRANSLATION_SUCCESS",
  "TRANSLATION_FAILED",
]);

export type AudioVisualLocalizationJobResult = z.infer<
  typeof AudioVisualLocalizationJobResultSchema
>;

export const AudioVisualLocalizationJobResultSchema = z.object({
  duration: z.number(),
  audioBucket: z.string(),
  audioPath: z.string(),
  speechDurations: z.array(
    z.object({
      startTime: z.number(),
      endTime: z.number(),
    })
  ), // voice activity detected
  transcript: z.string(),
  keywords: z.array(z.string()),
  words: z.array(serviceTranscriptWordSchema),
  phrases: z.array(dubbingPhraseSchema),

  // audio dub
  backgroundAudioUri: z.string().nullable(),
  backgroundAudioSignedUrl: z.string().nullable(),
  vocalAudioUri: z.string().nullable(),
});

export const AudioVisualLocalizationJobSchema = z.object({
  // transcription job
  // createdAt: z.instanceof(firestore.Timestamp),
  // finishedAt: z.instanceof(firestore.Timestamp).nullable(),
  errorMessage: z.string().nullable(),
  status: JobStatusSchema,
  transcriptionJobId: z.string(),
  progress: z.number(),
  provider: SpeechToTextProviderSchema,

  // translation job
  translateStatus: TRANSLATE_STATUS_ENUMS.optional(),
  targetLanguageCode: z.string().optional(),
  translateId: z.string().optional(),

  // audio dub job
  audioDubJobId: z.string().optional(),
  audioDubJobStatus: JobStatusSchema.optional(),

  // outcome of the job
  result: z.record(z.string(), AudioVisualLocalizationJobResultSchema).nullable(),
});

export const DtoRes_GetAudioDubJobSchema = z.object({
  job: AudioVisualLocalizationJobSchema,
});

// Caption phrase should probably not be here, and not have so many internal fields
// keeping as is for now to avoid breaking changes

/**
 * Describes a single caption phrase
 *
 * @property {string} id - the phrase's unique identifier
 * @property {string} text - the phrase's text contents
 * @property {number} startTime - the timestamp the phrase starts being active
 * @property {number} endTime - the timestamp the phrase stops being active
 * @property {string} [speaker] - the phrase's speaker
 * @property {string} [gcsUri] - the phrase's audio GCS URI (dubbing only)
 * @property {string} [downloadUrl] - the phrase's audio download URL (dubbing only)
 * @property {Clip[]} [audioClips] - the phrase's audio clips (dubbing only)
 */
export interface CaptionPhrase {
  id: string;
  startTime: number;
  endTime: number;
  text: string;
  speaker?: string | null;
  gcsUri?: string | null;
  downloadUrl?: string | null;
  audioClips?: Clip[];
}
