Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
32 commits
Select commit Hold shift + click to select a range
591b70a
feature extraction outline
conorato Oct 6, 2020
42bff7a
added outline for feature pipeline extraction
conorato Oct 6, 2020
f25e371
renamed
conorato Oct 7, 2020
0883070
added frequency features
conorato Oct 7, 2020
33a8714
Merge branch 'master' of github.com:PolyCortex/polydodo into backend/…
conorato Oct 8, 2020
e95ffd8
renamed package backend to classification
conorato Oct 8, 2020
9e1185f
added time subband
conorato Oct 15, 2020
52c7bf7
Merge branch 'master' of github.com:PolyCortex/polydodo into backend/…
conorato Oct 15, 2020
dea9e43
convert sd file to decimal
conorato Oct 17, 2020
d002411
linked sd card file uploaded to feature extraction
conorato Oct 18, 2020
b4bff8a
fix docstring on create epochs & sex value
conorato Oct 18, 2020
6659308
Merge branch 'master' of github.com:PolyCortex/polydodo into backend/…
conorato Oct 18, 2020
4028b5e
added bad request when raising classification exception
conorato Oct 18, 2020
5e5d7b5
fixed constants & requirements
conorato Oct 19, 2020
09ef601
moved form data to int in parent file
conorato Oct 19, 2020
cd1ea6d
updated min age
conorato Oct 19, 2020
d98dfe4
Merge branch 'master' of github.com:PolyCortex/polydodo into backend/…
conorato Oct 19, 2020
9081929
call from FE to BE works
conorato Oct 19, 2020
0146d5a
changed file extension checks
conorato Oct 20, 2020
ffb6252
Apply suggestions from code review
conorato Oct 22, 2020
558d30d
added filtering exploration for OpenBCI Cyton data & analyzed resampl…
conorato Oct 22, 2020
8254de2
Merge branch 'backend/classification' of github.com:PolyCortex/polydo…
conorato Oct 22, 2020
99acbe7
renamed utils to preprocessing
conorato Oct 22, 2020
1afa170
added HP filter & refactored to preprocessing module
conorato Oct 22, 2020
f392984
Fix conflicts
abelfodil Oct 22, 2020
778b6bc
switched to fixe length epochs
conorato Oct 22, 2020
ffdbfd7
Merge branch 'backend/classification' of github.com:PolyCortex/polydo…
conorato Oct 22, 2020
7346338
added underscores
conorato Oct 22, 2020
804f157
converted fct name
conorato Oct 22, 2020
94c8284
deleted min between out of bed seconds and file max, because validati…
conorato Oct 22, 2020
4e1b2c1
renamed file content var
conorato Oct 22, 2020
307cd95
gets srate from raw object (preprocessing independant of acquisition …
conorato Oct 22, 2020
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions .editorconfig
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,9 @@ indent_size = 2
insert_final_newline = true
trim_trailing_whitespace = true

[*.py]
indent_size = 4

[*.md]
max_line_length = off
trim_trailing_whitespace = false
546 changes: 456 additions & 90 deletions ai/feature_extraction.ipynb

Large diffs are not rendered by default.

127 changes: 68 additions & 59 deletions ai/prediction_openbci.ipynb

Large diffs are not rendered by default.

42 changes: 38 additions & 4 deletions backend/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,22 +3,37 @@
from waitress import serve
from http import HTTPStatus

from classification.file_loading import get_raw_array
from classification.predict import predict
from classification.exceptions import ClassificationError
from classification.config.constants import Sex, ALLOWED_FILE_EXTENSIONS

app = Flask(__name__)


def allowed_file(filename):
ALLOWED_EXTENSIONS = {'txt', 'csv'}
return '.' in filename and filename.rsplit('.', 1)[1].lower() in ALLOWED_EXTENSIONS
return filename.lower().endswith(ALLOWED_FILE_EXTENSIONS)


@app.route("/")
def status():
return ""
Comment thread
conorato marked this conversation as resolved.


@app.route('/analyze_sleep', methods=['POST'])
@app.route('/analyze-sleep', methods=['POST'])
Comment thread
abelfodil marked this conversation as resolved.
def analyze_sleep():
"""
Request payload example
{
"file": File(...),
"device": "CYTON",
"sex": "F",
"age": "23",
"stream_start": 1602895800000,
"bedtime": 1602898320000,
"wakeup": 1602931800000
}
"""
if 'file' not in request.files:
return 'Missing file', HTTPStatus.BAD_REQUEST
file = request.files['file']
Expand All @@ -29,9 +44,28 @@ def analyze_sleep():
if not allowed_file(file.filename):
return 'File format not allowed', HTTPStatus.BAD_REQUEST

Comment thread
conorato marked this conversation as resolved.
file_content = file.read()
form_data = request.form.to_dict()

try:
age = int(form_data['age'])
sex = Sex[form_data['sex']]
stream_start = int(form_data['stream_start'])
bedtime = int(form_data['bedtime'])
wakeup = int(form_data['wakeup'])
except (KeyError, ValueError):
return 'Missing or invalid request parameters', HTTPStatus.BAD_REQUEST

try:
raw_array = get_raw_array(file)
predict(raw_array, info={
'sex': sex,
'age': age,
'in_bed_seconds': bedtime - stream_start,
'out_of_bed_seconds': wakeup - stream_start
})
except ClassificationError as e:
return e.message, HTTPStatus.BAD_REQUEST

with open("assets/mock_response.json", "r") as mock_response_file:
return mock_response_file.read()

Expand Down
2 changes: 1 addition & 1 deletion backend/assets/mock_response.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{
"board": "CYTHON",
"board": "CYTON",
"subject": {
"age": 28,
"sex": "F"
Expand Down
31 changes: 31 additions & 0 deletions backend/classification/config/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
from enum import Enum


class Sex(Enum):
# based from subject description file (see header)
# https://physionet.org/content/sleep-edfx/1.0.0/SC-subjects.xls
F = 1
M = 2


ALLOWED_FILE_EXTENSIONS = ('.txt', '.csv')

EEG_CHANNELS = [
'EEG Fpz-Cz',
'EEG Pz-Oz'
]

EPOCH_DURATION = 30
FILE_MINIMUM_DURATION = EPOCH_DURATION

DATASET_SAMPLE_RATE = 100
OPENBCI_CYTON_SAMPLE_RATE = 250
OPENBCI_GANGLION_SAMPLE_RATE = 200

AGE_FEATURE_BINS = [
Comment thread
conorato marked this conversation as resolved.
[12, 49],
[50, 59],
[60, 84],
[85, 125]
]
ACCEPTED_AGE_RANGE = [AGE_FEATURE_BINS[0][0], AGE_FEATURE_BINS[-1][-1]]
13 changes: 13 additions & 0 deletions backend/classification/exceptions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
class ClassificationError(Exception):
"""Base errors for application errors that can occur"""
message = "An error occured while calculating sleep stages."


class TimestampsError(ClassificationError):
"""Raised when timestamps are incoherent or doesn't fit with the provided file"""
message = "Received file, stream start time, bedtime or wakeup time are incoherent"
Comment thread
conorato marked this conversation as resolved.


class FileSizeError(ClassificationError):
"""Raised when file is either too big or too small"""
message = "Received file is either too big or too small"
Comment thread
conorato marked this conversation as resolved.
29 changes: 29 additions & 0 deletions backend/classification/features/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
import numpy as np

from classification.features.extraction import (
get_eeg_features,
get_non_eeg_features,
)


def get_features(signal, info):
"""Returns the raw features
Input:
- raw_eeg: instance of mne.io.RawArray
Should contain 2 channels (1: FPZ-CZ, 2: PZ-OZ)
- info: dict
Should contain the following keys:
- sex: instance of Sex enum
- age: indicates the subject's age
- in_bed_seconds: timespan, in seconds, from which
the subject started the recording and went to bed
- out_of_bed_seconds: timespan, in seconds, from which
the subject started the recording and got out of bed
Returns
-------
- features X in a vector of (nb_epochs, nb_features)
"""
X_eeg = get_eeg_features(signal, info['in_bed_seconds'], info['out_of_bed_seconds'])
X_categorical = get_non_eeg_features(info['age'], info['sex'], X_eeg.shape[0])

return np.append(X_categorical, X_eeg, axis=1)
32 changes: 32 additions & 0 deletions backend/classification/features/constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
from classification.config.constants import (
DATASET_SAMPLE_RATE,
EPOCH_DURATION,
)

NYQUIST_FREQ = DATASET_SAMPLE_RATE / 2

DELTA = "delta"
THETA = "theta"
ALPHA = "alpha"
SIGMA = "sigma"
BETA = "beta"

FREQ_BANDS_RANGE = {
DELTA: [0.5, 4.5],
THETA: [4.5, 8.5],
ALPHA: [8.5, 11.5],
SIGMA: [11.5, 15.5],
BETA: [15.5, 30]
}

FREQ_BANDS_ORDERS = {
Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Enum this?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

hmm oui bonne idée. Par contre, j'ai deux dictionnaires (FREQ_BANDS_RANGE et FREQ_BAND_ORDERS) qui partagent les mêmes clés. Pour le moment, j'ai déclarer des constantes DELTA,...BETA que je reprends pour définir les clés des deux dictionnaires. En déclarant un Enum pour l'un des deux, je perds la notion qu'ils ont les mêmes clés.

DELTA: 5,
THETA: 8,
ALPHA: 9,
SIGMA: 9,
BETA: 14
}

DATASET_HIGH_PASS_FREQ = 0.5
HIGH_PASS_FILTER_ORDER = 6
HIGH_PASS_MAX_RIPPLE_DB = 0.2
63 changes: 63 additions & 0 deletions backend/classification/features/extraction.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,63 @@
"""Feature extraction tools based off a two channel EEG recording"""
import numpy as np

from classification.config.constants import (
EEG_CHANNELS,
AGE_FEATURE_BINS,
)
from classification.features.pipeline import get_feature_union
from classification.features.preprocessing import preprocess


def get_eeg_features(raw_data, in_bed_seconds, out_of_bed_seconds):
"""Returns the continuous feature matrix
Input
-------
raw_signal: MNE.Raw object with signals with or without annotations
in_bed_seconds: timespan, in seconds, from which the subject started
the recording and went to bed
out_of_bed_seconds: timespan, in seconds, from which the subject
started the recording and got out of bed

Returns
-------
Array of size (nb_epochs, nb_continuous_features)
"""
features_file = []
feature_union = get_feature_union()

for channel in EEG_CHANNELS:
chan_data = preprocess(raw_data, channel, in_bed_seconds, out_of_bed_seconds)

X_features = feature_union.transform(chan_data)
features_file.append(X_features)

print(
f"Done extracting {X_features.shape[1]} features "
f"on {X_features.shape[0]} epochs for {channel}\n"
)

return np.hstack(tuple(features_file))


def get_non_eeg_features(age, sex, nb_epochs):
"""Returns the categorical feature matrix
Input
-------
age: Age of the subject
sex: Sex of the subject
nb_epochs: corresponds to the nb of epochs which will be analyzed.

Returns
-------
Array of size (nb_epochs,nb_categorical_features), which contains
(duplicated) value for all epochs because it concerns the same subject.
"""
age_category = next(
category_index
for category_index, age_range in enumerate(AGE_FEATURE_BINS)
if age >= age_range[0] and age <= age_range[1]
)
X_categorical = [sex.value, age_category]

return np.array(X_categorical * nb_epochs).reshape(nb_epochs, -1)
19 changes: 19 additions & 0 deletions backend/classification/features/pipeline/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
from sklearn.pipeline import FeatureUnion

from classification.features.pipeline.time_domain import (
get_time_domain_pipeline,
)
from classification.features.pipeline.frequency_domain import (
get_frequency_domain_pipeline,
)
from classification.features.pipeline.time_subband import (
get_subband_feature_union,
)


def get_feature_union():
return FeatureUnion([
('time_domain', get_time_domain_pipeline()),
('frequency_domain', get_frequency_domain_pipeline()),
('subband_time_domain', get_subband_feature_union())
], n_jobs=1)
113 changes: 113 additions & 0 deletions backend/classification/features/pipeline/frequency_domain.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,113 @@
import numpy as np
from sklearn.pipeline import FeatureUnion, Pipeline
from sklearn.preprocessing import FunctionTransformer

from classification.features.pipeline.utils import (
get_psds_from_epochs,
)
from classification.features.constants import (
FREQ_BANDS_RANGE,
)


def _get_mean_psds(psds_with_freqs, are_relative=False):
"""EEG power band feature extraction.
Input
-------
psds_with_freqs: tuple which contains
- (nb_epochs, nb_chan=1, nb_freqs) psds amplitudes
- (nb_freqs,) corresponding frequency values

are_relative: boolean which indicates if the mean band powers
for each subband are relative to the total power or not.

Returns
-------
X : numpy array of shape [n_samples, nb_subband=5]
Transformed data.
"""
psds = psds_with_freqs[0]
freqs = psds_with_freqs[1]

if are_relative:
psds /= np.sum(psds, axis=-1, keepdims=True)

X = []
for fmin, fmax in FREQ_BANDS_RANGE.values():
psds_band = psds[:, :, (freqs >= fmin) & (freqs < fmax)].mean(axis=-1)
X.append(psds_band.reshape(len(psds), -1))

return np.concatenate(X, axis=1)


def _get_sefd_on_all_epochs(psds_with_freqs):
"""SEFd on all epochs
"""
SUBBAND_FREQ_SEFD = [8., 16.]

psds = psds_with_freqs[0].squeeze()
freqs = psds_with_freqs[1]

psds = psds[:, (freqs >= SUBBAND_FREQ_SEFD[0])
& (freqs < SUBBAND_FREQ_SEFD[1])]
freqs = freqs[(freqs >= SUBBAND_FREQ_SEFD[0])
& (freqs < SUBBAND_FREQ_SEFD[1])]

def get_sefd(psd, freqs):
"""Spectral edge frequency difference
Input
-------
psd: array of the power spectrum density for one epoch
freqs: array of the frequencies

Returns
-------
Difference between the frequencies under which
cumulates 95 and 50 percent of the power
"""
assert len(psd) == len(
freqs), "All PSD value must have a corresponding frequency value"

CUMUL_POWER_RATIO = [0.50, 0.95]

total_power = np.sum(psd)
cumul_power = 0

lower_freq = None
upper_freq = None

for amp, freq in zip(psd, freqs):
cumul_power += amp
if cumul_power >= CUMUL_POWER_RATIO[1] * total_power:
upper_freq = freq
break
elif lower_freq is None and cumul_power >= CUMUL_POWER_RATIO[0] * total_power:
lower_freq = freq

return upper_freq - lower_freq

return [[get_sefd(one_epoch_psd, freqs)] for one_epoch_psd in psds]


def get_frequency_domain_pipeline():
get_psds_from_epochs_transformer = FunctionTransformer(
get_psds_from_epochs, validate=False)
absolute_mean_psds_transformer = FunctionTransformer(
_get_mean_psds, validate=False)
relative_mean_psds_transformer = FunctionTransformer(
lambda psds_with_freq: _get_mean_psds(
psds_with_freq,
are_relative=True
), validate=False)

sefd_transformer = FunctionTransformer(
_get_sefd_on_all_epochs, validate=False)

return Pipeline([
('get_psds_from_epochs', get_psds_from_epochs_transformer),
('frequency_domain_features', FeatureUnion([
('absolute_mean_power_band', absolute_mean_psds_transformer),
('relative_mean_power_band', relative_mean_psds_transformer),
('sefd', sefd_transformer)
], n_jobs=1))
])
Loading