-
Notifications
You must be signed in to change notification settings - Fork 3
Convert hexadecimal file to decimal & feature extraction #32
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
591b70a
42bff7a
f25e371
0883070
33a8714
e95ffd8
9e1185f
52c7bf7
dea9e43
d002411
b4bff8a
6659308
4028b5e
5e5d7b5
09ef601
cd1ea6d
d98dfe4
9081929
0146d5a
ffb6252
558d30d
8254de2
99acbe7
1afa170
f392984
778b6bc
ffdbfd7
7346338
804f157
94c8284
4e1b2c1
307cd95
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Large diffs are not rendered by default.
Large diffs are not rendered by default.
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -1,5 +1,5 @@ | ||
| { | ||
| "board": "CYTHON", | ||
| "board": "CYTON", | ||
| "subject": { | ||
| "age": 28, | ||
| "sex": "F" | ||
|
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,31 @@ | ||
| from enum import Enum | ||
|
|
||
|
|
||
| class Sex(Enum): | ||
| # based from subject description file (see header) | ||
| # https://physionet.org/content/sleep-edfx/1.0.0/SC-subjects.xls | ||
| F = 1 | ||
| M = 2 | ||
|
|
||
|
|
||
| ALLOWED_FILE_EXTENSIONS = ('.txt', '.csv') | ||
|
|
||
| EEG_CHANNELS = [ | ||
| 'EEG Fpz-Cz', | ||
| 'EEG Pz-Oz' | ||
| ] | ||
|
|
||
| EPOCH_DURATION = 30 | ||
| FILE_MINIMUM_DURATION = EPOCH_DURATION | ||
|
|
||
| DATASET_SAMPLE_RATE = 100 | ||
| OPENBCI_CYTON_SAMPLE_RATE = 250 | ||
| OPENBCI_GANGLION_SAMPLE_RATE = 200 | ||
|
|
||
| AGE_FEATURE_BINS = [ | ||
|
conorato marked this conversation as resolved.
|
||
| [12, 49], | ||
| [50, 59], | ||
| [60, 84], | ||
| [85, 125] | ||
| ] | ||
| ACCEPTED_AGE_RANGE = [AGE_FEATURE_BINS[0][0], AGE_FEATURE_BINS[-1][-1]] | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,13 @@ | ||
| class ClassificationError(Exception): | ||
| """Base errors for application errors that can occur""" | ||
| message = "An error occured while calculating sleep stages." | ||
|
|
||
|
|
||
| class TimestampsError(ClassificationError): | ||
| """Raised when timestamps are incoherent or doesn't fit with the provided file""" | ||
| message = "Received file, stream start time, bedtime or wakeup time are incoherent" | ||
|
conorato marked this conversation as resolved.
|
||
|
|
||
|
|
||
| class FileSizeError(ClassificationError): | ||
| """Raised when file is either too big or too small""" | ||
| message = "Received file is either too big or too small" | ||
|
conorato marked this conversation as resolved.
|
||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,29 @@ | ||
| import numpy as np | ||
|
|
||
| from classification.features.extraction import ( | ||
| get_eeg_features, | ||
| get_non_eeg_features, | ||
| ) | ||
|
|
||
|
|
||
| def get_features(signal, info): | ||
| """Returns the raw features | ||
| Input: | ||
| - raw_eeg: instance of mne.io.RawArray | ||
| Should contain 2 channels (1: FPZ-CZ, 2: PZ-OZ) | ||
| - info: dict | ||
| Should contain the following keys: | ||
| - sex: instance of Sex enum | ||
| - age: indicates the subject's age | ||
| - in_bed_seconds: timespan, in seconds, from which | ||
| the subject started the recording and went to bed | ||
| - out_of_bed_seconds: timespan, in seconds, from which | ||
| the subject started the recording and got out of bed | ||
| Returns | ||
| ------- | ||
| - features X in a vector of (nb_epochs, nb_features) | ||
| """ | ||
| X_eeg = get_eeg_features(signal, info['in_bed_seconds'], info['out_of_bed_seconds']) | ||
| X_categorical = get_non_eeg_features(info['age'], info['sex'], X_eeg.shape[0]) | ||
|
|
||
| return np.append(X_categorical, X_eeg, axis=1) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,32 @@ | ||
| from classification.config.constants import ( | ||
| DATASET_SAMPLE_RATE, | ||
| EPOCH_DURATION, | ||
| ) | ||
|
|
||
| NYQUIST_FREQ = DATASET_SAMPLE_RATE / 2 | ||
|
|
||
| DELTA = "delta" | ||
| THETA = "theta" | ||
| ALPHA = "alpha" | ||
| SIGMA = "sigma" | ||
| BETA = "beta" | ||
|
|
||
| FREQ_BANDS_RANGE = { | ||
| DELTA: [0.5, 4.5], | ||
| THETA: [4.5, 8.5], | ||
| ALPHA: [8.5, 11.5], | ||
| SIGMA: [11.5, 15.5], | ||
| BETA: [15.5, 30] | ||
| } | ||
|
|
||
| FREQ_BANDS_ORDERS = { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Enum this?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. hmm oui bonne idée. Par contre, j'ai deux dictionnaires ( |
||
| DELTA: 5, | ||
| THETA: 8, | ||
| ALPHA: 9, | ||
| SIGMA: 9, | ||
| BETA: 14 | ||
| } | ||
|
|
||
| DATASET_HIGH_PASS_FREQ = 0.5 | ||
| HIGH_PASS_FILTER_ORDER = 6 | ||
| HIGH_PASS_MAX_RIPPLE_DB = 0.2 | ||
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,63 @@ | ||
| """Feature extraction tools based off a two channel EEG recording""" | ||
| import numpy as np | ||
|
|
||
| from classification.config.constants import ( | ||
| EEG_CHANNELS, | ||
| AGE_FEATURE_BINS, | ||
| ) | ||
| from classification.features.pipeline import get_feature_union | ||
| from classification.features.preprocessing import preprocess | ||
|
|
||
|
|
||
| def get_eeg_features(raw_data, in_bed_seconds, out_of_bed_seconds): | ||
| """Returns the continuous feature matrix | ||
| Input | ||
| ------- | ||
| raw_signal: MNE.Raw object with signals with or without annotations | ||
| in_bed_seconds: timespan, in seconds, from which the subject started | ||
| the recording and went to bed | ||
| out_of_bed_seconds: timespan, in seconds, from which the subject | ||
| started the recording and got out of bed | ||
|
|
||
| Returns | ||
| ------- | ||
| Array of size (nb_epochs, nb_continuous_features) | ||
| """ | ||
| features_file = [] | ||
| feature_union = get_feature_union() | ||
|
|
||
| for channel in EEG_CHANNELS: | ||
| chan_data = preprocess(raw_data, channel, in_bed_seconds, out_of_bed_seconds) | ||
|
|
||
| X_features = feature_union.transform(chan_data) | ||
| features_file.append(X_features) | ||
|
|
||
| print( | ||
| f"Done extracting {X_features.shape[1]} features " | ||
| f"on {X_features.shape[0]} epochs for {channel}\n" | ||
| ) | ||
|
|
||
| return np.hstack(tuple(features_file)) | ||
|
|
||
|
|
||
| def get_non_eeg_features(age, sex, nb_epochs): | ||
| """Returns the categorical feature matrix | ||
| Input | ||
| ------- | ||
| age: Age of the subject | ||
| sex: Sex of the subject | ||
| nb_epochs: corresponds to the nb of epochs which will be analyzed. | ||
|
|
||
| Returns | ||
| ------- | ||
| Array of size (nb_epochs,nb_categorical_features), which contains | ||
| (duplicated) value for all epochs because it concerns the same subject. | ||
| """ | ||
| age_category = next( | ||
| category_index | ||
| for category_index, age_range in enumerate(AGE_FEATURE_BINS) | ||
| if age >= age_range[0] and age <= age_range[1] | ||
| ) | ||
| X_categorical = [sex.value, age_category] | ||
|
|
||
| return np.array(X_categorical * nb_epochs).reshape(nb_epochs, -1) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,19 @@ | ||
| from sklearn.pipeline import FeatureUnion | ||
|
|
||
| from classification.features.pipeline.time_domain import ( | ||
| get_time_domain_pipeline, | ||
| ) | ||
| from classification.features.pipeline.frequency_domain import ( | ||
| get_frequency_domain_pipeline, | ||
| ) | ||
| from classification.features.pipeline.time_subband import ( | ||
| get_subband_feature_union, | ||
| ) | ||
|
|
||
|
|
||
| def get_feature_union(): | ||
| return FeatureUnion([ | ||
| ('time_domain', get_time_domain_pipeline()), | ||
| ('frequency_domain', get_frequency_domain_pipeline()), | ||
| ('subband_time_domain', get_subband_feature_union()) | ||
| ], n_jobs=1) |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,113 @@ | ||
| import numpy as np | ||
| from sklearn.pipeline import FeatureUnion, Pipeline | ||
| from sklearn.preprocessing import FunctionTransformer | ||
|
|
||
| from classification.features.pipeline.utils import ( | ||
| get_psds_from_epochs, | ||
| ) | ||
| from classification.features.constants import ( | ||
| FREQ_BANDS_RANGE, | ||
| ) | ||
|
|
||
|
|
||
| def _get_mean_psds(psds_with_freqs, are_relative=False): | ||
| """EEG power band feature extraction. | ||
| Input | ||
| ------- | ||
| psds_with_freqs: tuple which contains | ||
| - (nb_epochs, nb_chan=1, nb_freqs) psds amplitudes | ||
| - (nb_freqs,) corresponding frequency values | ||
|
|
||
| are_relative: boolean which indicates if the mean band powers | ||
| for each subband are relative to the total power or not. | ||
|
|
||
| Returns | ||
| ------- | ||
| X : numpy array of shape [n_samples, nb_subband=5] | ||
| Transformed data. | ||
| """ | ||
| psds = psds_with_freqs[0] | ||
| freqs = psds_with_freqs[1] | ||
|
|
||
| if are_relative: | ||
| psds /= np.sum(psds, axis=-1, keepdims=True) | ||
|
|
||
| X = [] | ||
| for fmin, fmax in FREQ_BANDS_RANGE.values(): | ||
| psds_band = psds[:, :, (freqs >= fmin) & (freqs < fmax)].mean(axis=-1) | ||
| X.append(psds_band.reshape(len(psds), -1)) | ||
|
|
||
| return np.concatenate(X, axis=1) | ||
|
|
||
|
|
||
| def _get_sefd_on_all_epochs(psds_with_freqs): | ||
| """SEFd on all epochs | ||
| """ | ||
| SUBBAND_FREQ_SEFD = [8., 16.] | ||
|
|
||
| psds = psds_with_freqs[0].squeeze() | ||
| freqs = psds_with_freqs[1] | ||
|
|
||
| psds = psds[:, (freqs >= SUBBAND_FREQ_SEFD[0]) | ||
| & (freqs < SUBBAND_FREQ_SEFD[1])] | ||
| freqs = freqs[(freqs >= SUBBAND_FREQ_SEFD[0]) | ||
| & (freqs < SUBBAND_FREQ_SEFD[1])] | ||
|
|
||
| def get_sefd(psd, freqs): | ||
| """Spectral edge frequency difference | ||
| Input | ||
| ------- | ||
| psd: array of the power spectrum density for one epoch | ||
| freqs: array of the frequencies | ||
|
|
||
| Returns | ||
| ------- | ||
| Difference between the frequencies under which | ||
| cumulates 95 and 50 percent of the power | ||
| """ | ||
| assert len(psd) == len( | ||
| freqs), "All PSD value must have a corresponding frequency value" | ||
|
|
||
| CUMUL_POWER_RATIO = [0.50, 0.95] | ||
|
|
||
| total_power = np.sum(psd) | ||
| cumul_power = 0 | ||
|
|
||
| lower_freq = None | ||
| upper_freq = None | ||
|
|
||
| for amp, freq in zip(psd, freqs): | ||
| cumul_power += amp | ||
| if cumul_power >= CUMUL_POWER_RATIO[1] * total_power: | ||
| upper_freq = freq | ||
| break | ||
| elif lower_freq is None and cumul_power >= CUMUL_POWER_RATIO[0] * total_power: | ||
| lower_freq = freq | ||
|
|
||
| return upper_freq - lower_freq | ||
|
|
||
| return [[get_sefd(one_epoch_psd, freqs)] for one_epoch_psd in psds] | ||
|
|
||
|
|
||
| def get_frequency_domain_pipeline(): | ||
| get_psds_from_epochs_transformer = FunctionTransformer( | ||
| get_psds_from_epochs, validate=False) | ||
| absolute_mean_psds_transformer = FunctionTransformer( | ||
| _get_mean_psds, validate=False) | ||
| relative_mean_psds_transformer = FunctionTransformer( | ||
| lambda psds_with_freq: _get_mean_psds( | ||
| psds_with_freq, | ||
| are_relative=True | ||
| ), validate=False) | ||
|
|
||
| sefd_transformer = FunctionTransformer( | ||
| _get_sefd_on_all_epochs, validate=False) | ||
|
|
||
| return Pipeline([ | ||
| ('get_psds_from_epochs', get_psds_from_epochs_transformer), | ||
| ('frequency_domain_features', FeatureUnion([ | ||
| ('absolute_mean_power_band', absolute_mean_psds_transformer), | ||
| ('relative_mean_power_band', relative_mean_psds_transformer), | ||
| ('sefd', sefd_transformer) | ||
| ], n_jobs=1)) | ||
| ]) |
Uh oh!
There was an error while loading. Please reload this page.