import React from 'react';
import PromptPopup from 'components/pages/insights/aiPromptsTab/promptPopup';

export const aiPromptsTypes = {
  spendAndImpactCorrelations: 'spendAndImpactCorrelations',
  spendAndDarkFunnelCorrelations: 'spendAndDarkFunnelCorrelations',
  mostImpactfulPersona: 'mostImpactfulPersona',
  diminishingCurve: 'diminishingCurve',
  winningCampaigns: 'winningCampaigns',
};

const encryptedPromptPopupSubtitle = (
  <>
    Copy the prompt, export your encrypted data, paste both into ChatGPT’s Code Interpreter, and watch the magic happen.
    <br />
    Once ChatGPT returns an encrypted CSV, download it, and input it below for decryption.
    <br />
    Note: a pro ChatGPT subscription is required. The following prompt works when using the Code Interpreter model.
  </>
);

const notEncryptedPromptPopupSubtitle = (
  <>
    Export your data, copy the prompt, paste both into ChatGPT’s Code Interpreter, and watch the magic happen.
    <br />
    Note: a pro ChatGPT subscription is required. The following prompt works when using the Code Interpreter model.
  </>
);

export const aiPromptsProperties = {
  [aiPromptsTypes.spendAndImpactCorrelations]: {
    icon: 'spendAndImpactCorrelations',
    type: aiPromptsTypes.spendAndImpactCorrelations,
    title: 'Find correlations between spend and impact',
    subTitle: 'Automatically scan your spend and performance across channels, uncover unexpected relationships, and find out what’s behind your dark funnel. Export a ready-for-analysis cost/performance CSV and analyze it in a click.',
    popupSubTitle: notEncryptedPromptPopupSubtitle,
    popupComponent: PromptPopup,
    promptDescriptionToCopy: `Objective:
      Conduct a Cost/Conversion Correlation Analysis. The aim is to identify relationships between the spend on one marketing channel and the performance of another.

      Dataset Description:
      You'll be provided with weekly spend and performance data for different marketing channels. The dataset columns are WEEK_OF_YEAR (representing the week number of the year), YEAR, and DATA. Within DATA: the key is a Marketing channel name, and the value is an object that has properties “spend” (the amount spent) and “funnel1" (the number of leads).

      Prerequisites - make sure to carefully review all prerequisites before moving forward:
      1. For channels with no spend/performance data on a given week, treat the empty cell as zero, if present. Remember each week you should have, per channel, spend and performance data.
      
      2. “Unmapped” and “direct” channels can only be influenced by spend from other channels. Do not look for impacts from spend on these channels.
      
      3. Important to note: Direct and unmapped aren’t actual channels. Direct is a combination of users who directly typed the URL (so they did not come from a marketing channel) and those who landed without a UTM, so the source isn't known. This means a large chunk of data is not accounted for. The optimal finding would be a correlation between spend on channel x and impact on performance on channel Direct. Also note that Direct refers to the channel called direct, not to channels that contain “direct”.
      
      4. Don't find correlations between the same channel. Use such correlations only for benchmarking. For example, showing Capterra spend correlates with Capterra performance is redundant.
      
      5. Performance and spend can be floating point numbers.
      
      6. DO NOT merge channels together - each channel is unique, even if the name bares some similarity
      
      7. If a channel contains the word “tools”, “hubspot”, or “salesforce” - remove it from the dataset completely.

      Analysis Instructions:
      1. Parse the dataset to a structured format.
      
      2. Conduct correlation analyses between the spend of each channel and the performance of another. Log correlation strength and p-value.
      
      3. Pinpoint the top 5-10 overall correlations.
      
      4. Draft a report with insights, confidence in findings, and recommendations.

      Desired Output:
      1. A table with the top 5-10 overall correlations, as well as a 1 sentence interpretation of the finding.
      
      2. A report summarizing insights and recommendations. If you can, make sure to include at least 5 insights and recommendations.`,
    isEncryptedMode: false,
    featureFlag: 'configStatsCorrelationsBetweenSpendAndImpact',
  },
  [aiPromptsTypes.mostImpactfulPersona]: {
    icon: 'mostImpactfulPersona',
    type: aiPromptsTypes.mostImpactfulPersona,
    title: 'Find your most impactful persona',
    subTitle: 'Identify the characteristics of your highest converting contacts. Export the traits of leads associated to your top deals, and let the prompt reveal the patterns behind your best opportunities.',
    popupSubTitle: encryptedPromptPopupSubtitle,
    popupComponent: PromptPopup,
    promptDescriptionToCopy: `Objective:
      Conduct a cluster analysis and identify the traits (property names) that are most strongly related to higher deal amounts.The clusters are based on combinations of pairs of custom fields and custom field values, and should be clustered based on the deal amount.

      Dataset Description:
      This csv contains data about deals and their associated contacts. Each row has 4 column:
      associated_contact - the contact’s email
      custom_fields - the contact’s properties. This is an object with the following structure: The key is a GUID that represents a property name, and the value the property value that has been encrypted for privacy.
      deal_amount - the amount of dollars of the deal
      Deal_id - the deal’s unique identifier, to separate deals

      Analysis Instructions:
      It’s important to get all the way to step 4 without freezing so try to be minimal with your output over steps 1-4 and reach steps 4 as soon as possible.
      1. Data Collection and sorting: Understand the dataset. Keep in mind the clusters’ features that we care about are *pairs of custom fields and custom field values*, and should be clustered based on the deal amount.
      ** So, start by concatenating the custom field and custom field values into unified strings that you can look at easily. We’ll call these pairs properties moving forward.  The properties, while hidden from you, are valuable to us, so the combination of property name + encrypted value should be shown together, as a pair, to provide context, and these properties are what we’re trying to uncover later. Ensure that the analysis consistently uses and reports on the full properties, which include both the custom field names and their respective encrypted values. Do not generalize or omit any portion of these properties at any stage of the analysis.
      
      ** Important note - an empty value on a property is not valuable, it means the deal’s contact has no value in that field, so it shouldn’t be treated as a property at all, and should be removed from the analysis. DO NOT show in the list any features that contain Empty, as these are not valuable.
      ** Additional properties to omit from your analysis: Any property that contains: “Email”, “Company”, “Name”, “URL”
      
      2. Run a cluster analysis to classify contacts based on their traits and the *deal amount*, to cluster the data based on higher and lower deal amounts per cluster.
      
      3. Identify the cluster that is most closely related to high deal amounts, and the cluster with the lowest deal amounts.
      
      4. Create a comparison of the two. Find the top 10 full properties - (ensure you analyze the full properties, for example, do not analyze “lead type”, analyze “Lead type:xanhbsgbgbagha” - the full key-value pairs) from both clusters . Ensure that the analysis consistently uses and reports on the full properties, which include both the custom field names and their respective encrypted values. Do not generalize or omit any portion of these properties at any stage of the analysis.
      
      5. Write a summary of your findings - which are the top properties of each cluster, and which properties are unique to the top performing cluster?
      
      6. Once the summary is written,  **export a csv** based on step 5’s findings:
      
      The CSV should be formatted as:
      Unique property | Average Deal size | Cluster number`,
    isEncryptedMode: true,
    featureFlag: 'configStatsMostImpactfulPersona',
  },
  [aiPromptsTypes.diminishingCurve]: {
    icon: 'diminishingCurve',
    type: aiPromptsTypes.spendAndImpactCorrelations,
    title: 'Generate diminishing return curves',
    subTitle: 'Find your investment sweet spot. Analyze how each additional dollar impacts efficiency. Determine when you\'ll encounter your next scaling challenge, so you can plan accordingly.',
    popupSubTitle: notEncryptedPromptPopupSubtitle,
    popupComponent: PromptPopup,
    promptDescriptionToCopy: `Objective:
      Conduct a marketing response curve Analysis. The aim is to identify the relationship between spend and number of Qualified Leads, to identify ideal investment ranges and avoid diminishing returns.

      Dataset Description:
      You'll be provided with weekly spend and performance data for different marketing channels. The dataset columns are week_of_year (representing the week number of the year), year, and . Within DATA: the key is a Marketing channel name, and the value is an object that has properties “spend” (the amount spent) and “funnel1" (the number of leads).

      Analysis Instructions:
      1. Data Collection and Cleansing: Make sure the data is cleaned and in a format that is easy to work with. This means removing any outliers, filling missing values, and perhaps normalizing the data if required. Use a relatively aggressive outlier removal process to improve the model’s fit and make sure we’re not distracted by anomalies such as weeks with events and one-off performance outliers.

      ** IMPORTANT:
      Your goal is to identify the spend that is correlated to leads and then explain the relationship with a curve - so, first start by understanding with spend/performance data clutters your data. Specifically, you need to understand which channels spend/performance may clutter the correlation with total performance - examples of such cases are channels with no spend or no performance at all, or with very high, one/two-time spend. We need multiple, reccuring spend and performance data points to uncover a correlation.
      
      1. Unify the data - since your goal is to build a curve that described how much leads will be generated in any spend point, you need to unify the performance data across channels, possibly also the spend data. Because eventually, we look for a prediction of the total number of leads, based on a total spend per week.
      
      2. Visualization: Start by plotting the data on a scatter plot with marketing spend on the x-axis and leads on the y-axis. This will give a visual sense of the relationship between the two. showcase a visualization highlighting the diminishing return range, and the maximum yield point.
      
      3. Model Selection: The typical relationship between marketing spend and its result (like leads) can often be represented by a logarithmic function. These functions show initial rapid growth followed by diminishing returns. You can use regression techniques to fit the data to these models and determine which model describes the data best.
      
      4. Model Evaluation: Use metrics like R-squared, RMSE (Root Mean Square Error), and visual inspection of the fit to evaluate the chosen model's performance. If the fit is good, then you can confidently use the curve to make predictions about how changes in marketing spend might affect leads.
      
      5. Clean curve - after selecting the model, create a curve showing the expected return of every dollar on a curve starting at 0 an going up 50% above the highest number in the spend dataset. Use that curve for visualizations moving forward. Make sure to bound the Predictions to avoid predicting a negative number of leads: After obtaining the predictions from our model, we can set a lower limit, ensuring that any predicted value below zero is set to zero.
      
      6. Draft a report summarizing insights and recommendations. If you can, make sure to include at least 5 insights and recommendations. Ideally, provide visual explanations and indications.`,
    isEncryptedMode: false,
    featureFlag: 'configStatsDiminishing',
  },
  [aiPromptsTypes.winningCampaigns]: {
    icon: 'winningCampaigns',
    type: aiPromptsTypes.winningCampaigns,
    title: 'Identify potential winning campaigns',
    subTitle: 'Compare recent campaigns from the past two weeks with your all-time best performers. Discover potential next winners. The export compares the initial two weeks of your top campaigns to your newly launched campaigns.',
    popupSubTitle: encryptedPromptPopupSubtitle,
    popupComponent: PromptPopup,
    promptDescriptionToCopy: '',
    isEncryptedMode: true,
    featureFlag: 'configStatsWinningCampaigns',
  },
  [aiPromptsTypes.spendAndDarkFunnelCorrelations]: {
    icon: 'spendAndDarkFunnelCorrelations',
    type: aiPromptsTypes.spendAndImpactCorrelations,
    title: 'Uncover correlations between any channel’s spend and your dark funnel',
    subTitle: 'Find channels that have an indirect influence over Direct. See which channels play a part in driving results from unattributable sources.',
    popupSubTitle: notEncryptedPromptPopupSubtitle,
    popupComponent: PromptPopup,
    promptDescriptionToCopy: `Objective:
      Uncover the influence of different channel spend on direct performance. Conduct a Cost/Conversion Correlation Analysis. The aim is to identify relationships between the spend on any marketing channel and the performance of direct.

      Dataset Description:
      You'll be provided with weekly spend and performance data for different marketing channels. The dataset columns are WEEK_OF_YEAR (representing the week number of the year), YEAR, and DATA. Within DATA: the key is a Marketing channel name, and the value is an object that has properties “spend” (the amount spent) and “funnel1" (the number of leads).

      Prerequisites:
      1. For channels with no spend/performance data on a given week, treat the empty cell as zero, if present. Remember each week you should have, per channel, spend and performance data. Note - if a channel has 0 spend across all weeks, it’s correlation is meaningless and it should be excluded from the analysis.
      
      2. Context on  Direct and unmapped: Direct (named “direct” in the dataset) and unmapped aren’t actual channels. Direct is a combination of users who directly typed the URL (so they did not come from a marketing channel) and those who landed without a UTM, so the source isn't known. This means a large chunk of data is not accounted for. The optimal finding would be a correlation between spend on channel x and impact on performance on channel Direct. Also note that Direct refers to the channel called direct, not to channels that contain “direct”.
      
      3. Don't find correlations between the same channel. Use such correlations only for benchmarking. For example, showing direct spend correlates with direct performance is redundant.
      
      4. Performance and spend can be floating point numbers.
      
      5. DO NOT merge channels together - each channel is unique, even if the name bares some similarity

      Analysis Instructions:
      ** keep text that is not outputs of tables and conclusions to a bare minimum, we are interested in the output, not the process**
      1. Parse the dataset to a structured format.
      
      2. Conduct correlation analyses between the spend of each channel permutation and the performance of direct. Log correlation strength and p-value.

      ** Channel permutation definition:
      There are three types of permutations you should look into:
      A. Single channel spend to direct (LinkedIn spend on direct performance)
      
      B. Multi-channel spend and direct (e.g. LinkedIn + Events spend on direct performance, or Google Ads + LinkedIn + SEO spend on Direct performance) - max 2 channels combined, to avoid crashing.
      
      C. All channel spend and direct: e.g., combined weekly spend of all channels correlation with direct performance.

      In this step, build all possible correlation combinations, along with p values.
      1. Detail the top 5-10 correlations for the performance of the "direct" channel.
      
      2. Draft a report with insights, confidence in findings, and recommendations.

      Desired Output:
      1. A table for the top 5-10 correlations concerning the "direct" channel's performance, as well as a 1 sentence interpretation of the finding, so that the user can uncover what impacts direct.
      
      2. A report summarizing insights and recommendations. If you can, make sure to include at least 5 insights and recommendations.`,
    isEncryptedMode: false,
    featureFlag: 'configStatsCorrelationsWithDarkFunnel',
  },
};
