% eeg2cnd_LalorNatSpeech.m
% Script to convert the LalorLab Natural Speech dataset into
% the Continuous-events Neural Data format (CND)
%
% See the CND documentation for details on the format. I suggest looking
% into the example datasets first.
% About the data format. In brief:
%
% a single file is saved for data that is shared across all subjects. This
% file contains a variable 'stim' including, for example, stimulus features
% such as the speech envelope.
%
% a single file per subject contains the subject-specific data, e.g.,
% EEG and EMG signals. If the experiment was grouped into trials or runs,
% the data variable will be grouped in the same way by using cell arrays.
% The trial order is the same as in the 'stim' variable (so the same order
% across participants), and it will include a variable indicating the
% original presentation order for each subject.
%
%
% Example for the case of an audio-book listening experiment
%
% Key structures and variables:
%
% *** dataStim.mat: Information and data that is common across subjects
% stim.data{1,:}: event data. In the present dataset, this is the speech envelope
% stim.fs: Envelope signal sampling frequency 
% stim.data{2,:}: This could be other features of speech e.g., prosodic pitch
%
% *** dataSub.mat: Subject-specific data. Here, this is the EEG signal
% eeg.fs: EEG sampling frequency
% eeg.data: cell array with the EEG signal for each run
% eeg.chanlocs: channel location information
% eeg.extChan{1}.data: EEG external mastoid electrodes (e.g., used for
%                      re-referencing the EEG signal)
% emg.data: This could be another stream for emg data
% acc.data: This could be a stream with accelerometric data
% eyetracking.data
% 
% 
% CNSP-Workshop 2021
% https://cnsp-workshop.github.io/website/index.html
% Author: Giovanni M. Di Liberto, Aaron Nidiffer
% Copyright 2020-2021 - Giovanni Di Liberto
%                       Nathaniel Zuk
%                       Michael Crosse
%                       Aaron Nidiffer
%                       (see license file for details)
% Last update: 1 August 2021
%
clear; clc;

% Parameters that are fixed for this dataset
subs = 1:19;                % Subjects to include
nRuns = 20;                 % Number of runs to include per subject

folderRoot = pwd;
folderStim = [folderRoot '\Stimuli\']; % This could be an external stimulus (speech
                            % perception experiment) or other events, such
                            % as a continuous action (speech production)
folderEEG = [folderRoot '\EEG\'];       % EEG data folder
folderCND = [folderRoot '\dataCND\'];   % Data in CND format
stimFeatures = {'Speech envelope','Spectrogram','Phonetic Features'};
                                    % Stimulus features in the same order
                                    % as in the cell array 'stim'

load([folderRoot '\chanlocs128.mat'])

if ~exist(folderCND,'dir'), mkdir(folderCND); end

%% Converting EEG to the CND format
% CND: Continuous-events Neural Data format
disp('Preparing subject-specific data file')
for sub = subs
    disp(sprintf("\b."))
    
    clear eeg
    eeg.dataType = "EEG";
    eeg.stimIdxs = 1:nRuns; % Stimulus idxs corresponding to each element
                            % (run) in the eeg cell array
    for run = 1:nRuns
        % Load EEG data run
        load([folderEEG 'Subject',num2str(sub) '\Subject',num2str(sub),'_Run',num2str(run),'.mat'],...
             'eegData','fs','mastoids')
         
        % Checking if sampling frequency is consistent with the first run
        if run == 1
            eeg.fs = fs;
        elseif eeg.fs ~= fs
            disp("Error 1: The sampling frequency for run " + run + " is inconsistent with run " + 1)
            disp("         Please make sure that all runs have the same sampling frequency before running this script.")
            return
        end
        eeg.data{run} = eegData;             % Main EEG data
        eeg.extChan{1}.data{run} = mastoids; % External channels (mastoids)
    end
    eeg.extChan{1}.description = 'Mastoids';
    eeg.chanlocs = chanlocs;
    save([folderCND 'dataSub',num2str(sub) '.mat'],'eeg')
end

%% Preprocessing Stimulus and conversion to CND
disp('Preparing stimulus data file')
clear stim 
% Stimulus features data (same for all participants). If trials
% were shuffled, then they would have to be sorted back in the
% subject-specific structures (e.g., 'eeg'). The presentation
% order will be preserved in an additional variable.

load([folderRoot '\stimAll.mat'],'stimAll')
stim.data = stimAll;
stim.fs = 64; % it was hard coded
stim.name = stimFeatures;

save(folderCND + "dataStim.mat",'stim')

disp('Done!')

