import {
  type AutoSyncedSourceTypes,
  type EmbeddingGenerators,
  type Integration,
  type IntegrationName,
} from "carbon-connect";

export const getCarbonIntegrations = (
  autoSyncedSourceTypes: typeof AutoSyncedSourceTypes,
  embeddingGenerators: typeof EmbeddingGenerators,
  integrationName: typeof IntegrationName,
): Integration[] => {
  return [
    {
      id: integrationName.LOCAL_FILES,
      // Ideal overlap is 30% of chunk size
      chunkSize: 1000,
      overlapSize: 300,
      embeddingModel: embeddingGenerators.OPENAI,
      // Toggle to skip embedding generation
      skipEmbeddingGeneration: false,
      // This is not needed for the file uploads. Hence setting to false.
      enableAutoSync: false,
      // Toggle to true to generate sparse vectors for hybrid search.
      generateSparseVectors: false,
      // Adds the file title to each chunk for the integration.
      prependFilenameToChunks: false,
      // TODO: Need more information on best practices for this
      // Specifies the number of items to include in a specific chunk.
      maxItemsPerChunk: undefined,
      // This is not needed for the file uploads. Hence setting to false.
      syncFilesOnConnection: false,
      // This is not needed for the file uploads. Hence setting to false.
      syncSourceItems: false,
      // When set to true, the page is considered as a boundary for the chunks.
      // Setting this as false for all files, as this is only needed for PDFs.
      // This will be set to true for PDFs in the allowed file types.
      setPageAsBoundary: false,
      // Toggle to enable Optical Character Recognition (OCR) for PDFs.
      // This is not needed for the file uploads. Hence setting to false.
      // This will be set to true for PDFs in the allowed file types.
      useOcr: false,
      // Enables table parsing when useOCR is set to true.
      // This is not needed for the file uploads. Hence setting to false.
      // This will be set to true for PDFs in the allowed file types.
      parsePdfTablesWithOcr: false,
      // Maximum file size allowed for upload in bytes.
      maxFileSize: 20000000,
      // Maximum number of files allowed for upload at once.
      maxFilesCount: 10,
      // Specifies whether speaker diarization will be enabled for the audio transcription services.
      // This allows us to format chunks so that the text is organized by utterances and each utterance will be labeled with the speaker.
      includeSpeakerLabels: false,
      // When this flag is set to true, documents will be chunked without generating embeddings.
      generateChunksOnly: false,
      // Specifies which columns are displayed in the file list view
      filesTabColumns: ["name", "status", "created_at"],
      allowedFileTypes: [
        {
          extension: "txt",
          chunkSize: 1000,
          overlapSize: 300,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "pdf",
          chunkSize: 1000,
          overlapSize: 300,
          skipEmbeddingGeneration: false,
          setPageAsBoundary: true,
          useOcr: true,
          generateSparseVectors: false,
          parsePdfTablesWithOcr: true,
        },
        {
          extension: "md",
          chunkSize: 1000,
          overlapSize: 300,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "msg",
          chunkSize: 1000,
          overlapSize: 300,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "EML",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "HTML",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "JSON",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "PPTX",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "TSV",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "RTF",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "DOCX",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "CSV",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "XLSM",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
        {
          extension: "XLSX",
          chunkSize: 1500,
          overlapSize: 20,
          skipEmbeddingGeneration: false,
          generateSparseVectors: false,
        },
      ],
    },
    {
      id: integrationName.NOTION,
      embeddingModel: embeddingGenerators.OPENAI,
      // Ideal overlap is 30% of chunk size
      chunkSize: 1500,
      overlapSize: 450,
      // Toggle to skip embedding generation
      skipEmbeddingGeneration: false,
      // TODO: Need more information for this
      // Setting this to false, as there is no infomration in the documenbtatino about this.
      enableAutoSync: false,
      // Toggle to true to generate sparse vectors for hybrid search.
      generateSparseVectors: false,
      // Adds the file title to each chunk for the integration.
      prependFilenameToChunks: false,
      // TODO: Need more information on best practices for this
      // Specifies the number of items to include in a specific chunk.
      maxItemsPerChunk: undefined,
      // TODO: Need more information on best practices for this
      // Auto-sync all files from a user’s connected account.
      syncFilesOnConnection: false,
      // TODO: Need more information on best practices for this
      // Controls whether items from the file directory are synced by default.
      syncSourceItems: true,
      // Shows the synced files tab in Carbon Connect for this specific integration.
      showFilesTab: true,
      // TODO: Need more information on best practices for this
      // Controls whether Carbon Connect defaults to Carbon’s file picker instead of the source’s file picker.
      // (Carbon Connect 3.0 only)
      useCarbonFilePicker: false,
      // By setting it to true, only new or updated files since the last sync will be re-synced.
      incrementalSync: true,
      // Specifies which columns are displayed in the file list view.
      filesTabColumns: ["name", "status", "external_url", "created_at"],
    },
    {
      id: integrationName.ZENDESK,
      embeddingModel: embeddingGenerators.OPENAI,
      // Ideal overlap is 30% of chunk size
      chunkSize: 1500,
      overlapSize: 450,
      // Toggle to skip embedding generation
      skipEmbeddingGeneration: false,
      // TODO: Need more information for this
      // Setting this to false, as there is no infomration in the documenbtatino about this.
      enableAutoSync: false,
      // Toggle to true to generate sparse vectors for hybrid search.
      generateSparseVectors: false,
      // Adds the file title to each chunk for the integration.
      prependFilenameToChunks: false,
      // TODO: Need more information on best practices for this
      // Specifies the number of items to include in a specific chunk.
      maxItemsPerChunk: undefined,
      // TODO: Need more information on best practices for this
      // Auto-sync all files from a user’s connected account.
      syncFilesOnConnection: false,
      // TODO: Need more information on best practices for this
      // Controls whether items from the file directory are synced by default.
      syncSourceItems: true,
      // Shows the synced files tab in Carbon Connect for this specific integration.
      showFilesTab: true,
      // TODO: Need more information on best practices for this
      // Controls whether Carbon Connect defaults to Carbon’s file picker instead of the source’s file picker.
      // (Carbon Connect 3.0 only)
      useCarbonFilePicker: false,
      // By setting it to true, only new or updated files since the last sync will be re-synced.
      incrementalSync: true,
      // Specifies which columns are displayed in the file list view.
      filesTabColumns: ["name", "status", "external_url", "created_at"],
      // Includes data source and file specific configurations.
      // Here it will be tickets and articles.
      fileSyncConfig: {
        //  An array specifying the types of items to sync.
        auto_synced_source_types: [
          autoSyncedSourceTypes.TICKET,
          autoSyncedSourceTypes.ARTICLE,
        ],
        // A boolean indicating whether to sync attachments from ticket comments.
        sync_attachments: true,
      },
    },
    {
      id: integrationName.SALESFORCE,
      embeddingModel: embeddingGenerators.OPENAI,
      // Ideal overlap is 30% of chunk size
      chunkSize: 1500,
      overlapSize: 450,
      // Toggle to skip embedding generation
      skipEmbeddingGeneration: false,
      // TODO: Need more information for this
      // Setting this to false, as there is no infomration in the documenbtatino about this.
      enableAutoSync: false,
      // Toggle to true to generate sparse vectors for hybrid search.
      generateSparseVectors: false,
      // Adds the file title to each chunk for the integration.
      prependFilenameToChunks: false,
      // TODO: Need more information on best practices for this
      // Specifies the number of items to include in a specific chunk.
      maxItemsPerChunk: undefined,
      // TODO: Need more information on best practices for this
      // Auto-sync all files from a user’s connected account.
      syncFilesOnConnection: false,
      // TODO: Need more information on best practices for this
      // Controls whether items from the file directory are synced by default.
      syncSourceItems: true,
      // Shows the synced files tab in Carbon Connect for this specific integration.
      showFilesTab: true,
      // TODO: Need more information on best practices for this
      // Controls whether Carbon Connect defaults to Carbon’s file picker instead of the source’s file picker.
      // (Carbon Connect 3.0 only)
      useCarbonFilePicker: false,
      // By setting it to true, only new or updated files since the last sync will be re-synced.
      incrementalSync: true,
      // Specifies which columns are displayed in the file list view.
      filesTabColumns: ["name", "status", "external_url", "created_at"],
    },
    {
      id: integrationName.WEB_SCRAPER,
      embeddingModel: embeddingGenerators.OPENAI,
      // Ideal overlap is 30% of chunk size
      chunkSize: 1500,
      overlapSize: 450,
      // Toggle to skip embedding generation
      skipEmbeddingGeneration: false,
      // TODO: Need more information on how scheduled syncs work.
      // Toggle to enable scheduled syncs.
      enableAutoSync: false,
      // Toggle to true to generate sparse vectors for hybrid search.
      generateSparseVectors: false,
      // Adds the file title to each chunk for the integration.
      prependFilenameToChunks: false,
      // TODO: Need more information on best practices for this
      // Specifies the number of items to include in a specific chunk.
      maxItemsPerChunk: undefined,
      // TODO: Need more information on best practices for this
      // Auto-sync all files from a user’s connected account.
      syncFilesOnConnection: false,
      // TODO: Need more information on best practices for this
      // Controls whether items from the file directory are synced by default.
      syncSourceItems: true,
      // Shows the synced files tab in Carbon Connect for this specific integration.
      showFilesTab: true,
      // TODO: Need more information on best practices for this
      // Controls whether Carbon Connect defaults to Carbon’s file picker instead of the source’s file picker.
      // (Carbon Connect 3.0 only)
      useCarbonFilePicker: false,
      // By setting it to true, only new or updated files since the last sync will be re-synced.
      incrementalSync: true,
      // Specifies which columns are displayed in the file list view.
      filesTabColumns: ["name", "status", "external_url", "created_at"],
      // This option enables the sitemap tab to be displayed.
      sitemapEnabled: true,
      // Depth of recursion for scraping.
      // Use 1 to disable recursion and 0 to scrape recursively until reaching the maxPagesToScrape limit.
      recursionDepth: 10,
      // Maximum number of pages to scrape.
      maxPagesToScrape: 1000,
      // Define HTML tags to exclude when converting HTML to plaintext.
      htmlTagsToSkip: [],
      // Define CSS Classes to exclude when converting HTML to plaintext.
      cssClassesToSkip: [],
      // Define CSS Selectors to exclude when converting HTML to plaintext.
      cssSelectorsToSkip: [],
      // When this flag is set to true, documents will be chunked without generating embeddings.
      generateChunksOnly: false,
    },
    {
      id: integrationName.GOOGLE_DRIVE,
      // Ideal overlap is 30% of chunk size
      chunkSize: 1500,
      overlapSize: 450,
      // Toggle to skip embedding generation
      skipEmbeddingGeneration: false,
      // TODO: Need more information for this
      // Setting this to false, as there is no infomration in the documenbtatino about this.
      enableAutoSync: false,
      // Toggle to true to generate sparse vectors for hybrid search.
      generateSparseVectors: false,
      // Adds the file title to each chunk for the integration.
      prependFilenameToChunks: false,
      // TODO: Need more information on best practices for this
      // Specifies the number of items to include in a specific chunk.
      maxItemsPerChunk: undefined,
      // TODO: Need more information on best practices for this
      // Auto-sync all files from a user’s connected account.
      syncFilesOnConnection: false,
      // TODO: Need more information on best practices for this
      // Controls whether items from the file directory are synced by default.
      syncSourceItems: true,
      // Shows the synced files tab in Carbon Connect for this specific integration.
      showFilesTab: true,
      // TODO: Need more information on best practices for this
      // Controls whether Carbon Connect defaults to Carbon’s file picker instead of the source’s file picker.
      // (Carbon Connect 3.0 only)
      useCarbonFilePicker: false,
      // By setting it to true, only new or updated files since the last sync will be re-synced.
      incrementalSync: true,
      // Specifies which columns are displayed in the file list view.
      filesTabColumns: ["name", "status", "external_url", "created_at"],
      // TODO: This information is not present in Documentation. So need to check with the team.
      // When set to true, the page is considered as a boundary for the chunks.
      setPageAsBoundary: true,
      // Toggle to enable Optical Character Recognition (OCR) for PDFs.
      useOcr: true,
      // Enables table parsing when useOCR is set to true.
      // This is not needed for the file uploads. Hence setting to false.
      // This will be set to true for PDFs in the allowed file types.
      parsePdfTablesWithOcr: true,
    },
  ];
};
