"use strict";

Object.defineProperty(exports, "__esModule", {
  value: true
});
exports.unmappedFieldsRoute = exports.schemaFieldsSimulationRoute = exports.internalSchemaRoutes = void 0;
var _std = require("@kbn/std");
var _streamsSchema = require("@kbn/streams-schema");
var _zod = require("@kbn/zod");
var _root_stream_definition = require("../../../../lib/streams/root_stream_definition");
var _generate_index_template = require("../../../../lib/streams/index_templates/generate_index_template");
var _constants = require("../../../../../common/constants");
var _security_error = require("../../../../lib/streams/errors/security_error");
var _stream_crud = require("../../../../lib/streams/stream_crud");
var _create_server_route = require("../../../create_server_route");
/*
 * Copyright Elasticsearch B.V. and/or licensed to Elasticsearch B.V. under one
 * or more contributor license agreements. Licensed under the Elastic License
 * 2.0; you may not use this file except in compliance with the Elastic License
 * 2.0.
 */

const UNMAPPED_SAMPLE_SIZE = 500;
const FIELD_SIMULATION_TIMEOUT = '1s';
const unmappedFieldsRoute = exports.unmappedFieldsRoute = (0, _create_server_route.createServerRoute)({
  endpoint: 'GET /internal/streams/{name}/schema/unmapped_fields',
  options: {
    access: 'internal'
  },
  security: {
    authz: {
      requiredPrivileges: [_constants.STREAMS_API_PRIVILEGES.read]
    }
  },
  params: _zod.z.object({
    path: _zod.z.object({
      name: _zod.z.string()
    })
  }),
  handler: async ({
    params,
    request,
    getScopedClients
  }) => {
    const {
      scopedClusterClient,
      streamsClient
    } = await getScopedClients({
      request
    });
    const searchBody = {
      sort: [{
        '@timestamp': {
          order: 'desc'
        }
      }],
      size: UNMAPPED_SAMPLE_SIZE
    };
    const [streamDefinition, ancestors, results] = await Promise.all([streamsClient.getStream(params.path.name), streamsClient.getAncestors(params.path.name), scopedClusterClient.asCurrentUser.search({
      index: params.path.name,
      ...searchBody
    })]);
    const sourceFields = new Set();
    results.hits.hits.forEach(hit => {
      Object.keys((0, _std.getFlattenedObject)(hit._source)).forEach(field => {
        sourceFields.add(field);
      });
    });

    // Mapped fields from the stream's definition and inherited from ancestors
    const mappedFields = new Set();
    if (_streamsSchema.Streams.ClassicStream.Definition.is(streamDefinition)) {
      Object.keys(streamDefinition.ingest.classic.field_overrides || {}).forEach(name => mappedFields.add(name));
    }
    if (_streamsSchema.Streams.WiredStream.Definition.is(streamDefinition)) {
      Object.keys(streamDefinition.ingest.wired.fields).forEach(name => mappedFields.add(name));
    }
    for (const ancestor of ancestors) {
      Object.keys(ancestor.ingest.wired.fields).forEach(name => mappedFields.add(name));
    }
    const unmappedFields = Array.from(sourceFields).filter(field => !mappedFields.has(field)).sort();
    return {
      unmappedFields
    };
  }
});
const FIELD_SIMILATION_SAMPLE_SIZE = 20;
const schemaFieldsSimulationRoute = exports.schemaFieldsSimulationRoute = (0, _create_server_route.createServerRoute)({
  endpoint: 'POST /internal/streams/{name}/schema/fields_simulation',
  options: {
    access: 'internal'
  },
  security: {
    authz: {
      requiredPrivileges: [_constants.STREAMS_API_PRIVILEGES.read]
    }
  },
  params: _zod.z.object({
    path: _zod.z.object({
      name: _zod.z.string()
    }),
    body: _zod.z.object({
      field_definitions: _zod.z.array(_zod.z.intersection(_streamsSchema.fieldDefinitionConfigSchema, _zod.z.object({
        name: _zod.z.string()
      })))
    })
  }),
  handler: async ({
    params,
    request,
    getScopedClients
  }) => {
    const {
      scopedClusterClient
    } = await getScopedClients({
      request
    });
    const {
      read
    } = await (0, _stream_crud.checkAccess)({
      name: params.path.name,
      scopedClusterClient
    });
    if (!read) {
      throw new _security_error.SecurityError(`Cannot read stream ${params.path.name}, insufficient privileges`);
    }
    const userFieldDefinitions = params.body.field_definitions.flatMap(field => {
      // filter out potential system fields since we can't simulate them anyway
      if (field.type === 'system') {
        return [];
      }
      return [field];
    });
    const propertiesForSample = Object.fromEntries(userFieldDefinitions.map(field => [field.name, {
      type: 'keyword'
    }]));
    const documentSamplesSearchBody = {
      // Add keyword runtime mappings so we can pair with exists, this is to attempt to "miss" less documents for the simulation.
      runtime_mappings: propertiesForSample,
      query: {
        bool: {
          filter: Object.keys(propertiesForSample).map(field => ({
            exists: {
              field
            }
          }))
        }
      },
      size: FIELD_SIMILATION_SAMPLE_SIZE,
      track_total_hits: false,
      terminate_after: FIELD_SIMILATION_SAMPLE_SIZE,
      timeout: FIELD_SIMULATION_TIMEOUT
    };
    const sampleResults = await scopedClusterClient.asCurrentUser.search({
      index: params.path.name,
      ...documentSamplesSearchBody
    });
    if (sampleResults.hits.hits.length === 0) {
      return {
        status: 'unknown',
        simulationError: null,
        documentsWithRuntimeFieldsApplied: null
      };
    }
    const propertiesForSimulation = Object.fromEntries(userFieldDefinitions.map(({
      name,
      ...field
    }) => [name, field]));
    const fieldDefinitionKeys = Object.keys(propertiesForSimulation);
    const sampleResultsAsSimulationDocs = sampleResults.hits.hits.map(hit => ({
      // For wired streams direct writes to child streams are not allowed, we must use the "logs" index.
      _index: params.path.name.startsWith(`${_root_stream_definition.LOGS_ROOT_STREAM_NAME}.`) ? _root_stream_definition.LOGS_ROOT_STREAM_NAME : params.path.name,
      _id: hit._id,
      _source: Object.fromEntries(Object.entries((0, _std.getFlattenedObject)(hit._source)).filter(([k]) => fieldDefinitionKeys.includes(k) || k === '@timestamp'))
    }));
    const simulation = await simulateIngest(sampleResultsAsSimulationDocs, params.path.name, propertiesForSimulation, scopedClusterClient);
    const hasErrors = simulation.docs.some(doc => doc.doc.error !== undefined);
    if (hasErrors) {
      const documentWithError = simulation.docs.find(doc => {
        return doc.doc.error !== undefined;
      });
      return {
        status: 'failure',
        simulationError: JSON.stringify(
        // Use the first error as a representative error
        documentWithError.doc.error),
        documentsWithRuntimeFieldsApplied: null
      };
    }

    // Convert the field definitions to a format that can be used in runtime mappings (match_only_text -> keyword)
    const propertiesCompatibleWithRuntimeMappings = Object.fromEntries(userFieldDefinitions.map(field => [field.name, {
      type: field.type === 'match_only_text' ? 'keyword' : field.type,
      ...(field.format ? {
        format: field.format
      } : {})
    }]));
    const runtimeFieldsSearchBody = {
      runtime_mappings: propertiesCompatibleWithRuntimeMappings,
      size: FIELD_SIMILATION_SAMPLE_SIZE,
      fields: params.body.field_definitions.map(field => field.name),
      _source: false,
      track_total_hits: false,
      terminate_after: FIELD_SIMILATION_SAMPLE_SIZE,
      timeout: FIELD_SIMULATION_TIMEOUT
    };

    // This gives us a "fields" representation rather than _source from the simulation
    const runtimeFieldsResult = await scopedClusterClient.asCurrentUser.search({
      index: params.path.name,
      query: {
        ids: {
          values: sampleResults.hits.hits.map(hit => hit._id)
        }
      },
      ...runtimeFieldsSearchBody
    });
    return {
      status: 'success',
      simulationError: null,
      documentsWithRuntimeFieldsApplied: runtimeFieldsResult.hits.hits.map((hit, index) => {
        if (!hit.fields) {
          return {
            ignored_fields: simulation.docs[index].doc.ignored_fields || []
          };
        }
        return {
          values: Object.keys(hit.fields).reduce((acc, field) => {
            acc[field] = hit.fields[field][0];
            return acc;
          }, {}),
          ignored_fields: simulation.docs[index].doc.ignored_fields || []
        };
      })
    };
  }
});
const internalSchemaRoutes = exports.internalSchemaRoutes = {
  ...unmappedFieldsRoute,
  ...schemaFieldsSimulationRoute
};
const DUMMY_PIPELINE_NAME = '__dummy_pipeline__';
async function simulateIngest(sampleResultsAsSimulationDocs, dataStreamName, propertiesForSimulation, scopedClusterClient) {
  var _indexTemplate$templa, _indexTemplate$templa2, _indexTemplate$templa3;
  // fetch the index template to get the base mappings
  const dataStream = await scopedClusterClient.asCurrentUser.indices.getDataStream({
    name: dataStreamName
  });
  const indexTemplate = (await scopedClusterClient.asCurrentUser.indices.getIndexTemplate({
    name: dataStream.data_streams[0].template
  })).index_templates[0].index_template;

  // We need to build a patched index template instead of using mapping_addition
  // because of https://github.com/elastic/elasticsearch/issues/131608
  const patchedIndexTemplate = {
    ...indexTemplate,
    priority: indexTemplate.priority && indexTemplate.priority > _generate_index_template.MAX_PRIORITY ?
    // max priority passed as a string so we don't lose precision
    `${_generate_index_template.MAX_PRIORITY}` : indexTemplate.priority,
    composed_of: [...(indexTemplate.composed_of || []), '__DUMMY_COMPONENT_TEMPLATE__'],
    template: {
      ...indexTemplate.template,
      mappings: {
        ...((_indexTemplate$templa = indexTemplate.template) === null || _indexTemplate$templa === void 0 ? void 0 : _indexTemplate$templa.mappings),
        properties: {
          ...((_indexTemplate$templa2 = indexTemplate.template) === null || _indexTemplate$templa2 === void 0 ? void 0 : (_indexTemplate$templa3 = _indexTemplate$templa2.mappings) === null || _indexTemplate$templa3 === void 0 ? void 0 : _indexTemplate$templa3.properties),
          ...propertiesForSimulation
        }
      }
    }
  };
  const simulationBody = {
    docs: sampleResultsAsSimulationDocs,
    index_template_substitutions: {
      [dataStream.data_streams[0].template]: patchedIndexTemplate
    },
    component_template_substitutions: {
      __DUMMY_COMPONENT_TEMPLATE__: {
        template: {
          mappings: {
            properties: propertiesForSimulation
          }
        }
      }
    },
    pipeline_substitutions: {
      [DUMMY_PIPELINE_NAME]: {
        // The sampleResults are already gathered directly from the child stream index. But, we can't
        // simulate an _index other than logs for wired streams, this reroutes the documents back to the child stream.
        // After the reroute the override below ensures no double processing happens.
        processors: [...(dataStreamName.startsWith(`${_root_stream_definition.LOGS_ROOT_STREAM_NAME}.`) ? [{
          reroute: {
            destination: dataStreamName
          }
        }] : [])]
      },
      // prevent double-processing
      ...(dataStreamName.startsWith(`${_root_stream_definition.LOGS_ROOT_STREAM_NAME}.`) ? {
        [`${dataStreamName}@stream.processing`]: {
          processors: []
        }
      } : {})
    }
  };

  // TODO: We should be using scopedClusterClient.asCurrentUser.simulate.ingest() but the ES JS lib currently has a bug. The types also aren't available yet, so we use any.
  const simulation = await scopedClusterClient.asCurrentUser.transport.request({
    method: 'POST',
    path: `_ingest/_simulate?pipeline=${DUMMY_PIPELINE_NAME}`,
    body: simulationBody
  });
  return simulation;
}