Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
<p align="center">
<a href="https://go-cart.io"><img src ="img/gocart_logo.svg" width="65%"></a>
</p>
The website can be accessed using this <a href = "https://go-cart.io">link.</a>

This program uses the fast flow-based method developed by Michael T. Gastner, Vivien Seguy, and Pratyush More. For more information, you may refer to the following [paper](https://www.pnas.org/content/115/10/E2156):

Expand Down
14 changes: 14 additions & 0 deletions include/string_to_decimal_converter.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,20 @@
#include <string>
#include <vector>

/**
* @brief A utility class for converting string representations of numbers to decimal format.
*
* This class handles various number formats including:
* - Regular decimal numbers (e.g. "123.456", "123,456")
* - Scientific notation (e.g. "1.23e-4", "1.23E4")
* - Special value "NA"
*
* For scientific notation:
* - Both 'e' and 'E' are supported as exponent markers
* - The mantissa can use either '.' or ',' as decimal separator
* - The exponent must be an integer and can be negative
* - The mantissa must be a valid decimal number
*/
class StringToDecimalConverter
{
private:
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
pip
wheel
conan==2.16.1
cmake==3.30.0
cmake==3.30.0
90 changes: 76 additions & 14 deletions src/misc/string_to_decimal_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ const std::string StringToDecimalConverter::NA_ = "NA";

bool StringToDecimalConverter::is_valid_char(char ch)
{
return (std::isdigit(ch)) || ch == point_ || ch == comma_ || ch == minus_;
return (std::isdigit(ch)) || ch == point_ || ch == comma_ || ch == minus_ || ch == 'e' || ch == 'E';
}

std::string StringToDecimalConverter::remove_char(std::string str, char ch)
Expand Down Expand Up @@ -87,22 +87,66 @@ bool StringToDecimalConverter::is_str_valid_characters(const std::string &str)
return true;
}

// Only 0 to 9, '.', '-', and ',' are allowed
// Only 0 to 9, '.', '-', ',', 'e', and 'E' are allowed
for (const auto &c : str) {
if (!is_valid_char(c)) {
return false;
}
}

// '-' can only be used once
if (count_char(str, minus_) > 1) {
return false;
// '-' can only be used once in the mantissa and once in the exponent
size_t first_e = str.find_first_of("eE");
if (first_e != std::string::npos) {
// Check mantissa part
std::string mantissa = str.substr(0, first_e);
if (count_char(mantissa, minus_) > 1) {
return false;
}
if (count_char(mantissa, minus_) == 1 && mantissa[0] != minus_) {
return false;
}
// Check exponent part
std::string exponent = str.substr(first_e + 1);
if (count_char(exponent, minus_) > 1) {
return false;
}
if (count_char(exponent, minus_) == 1 && exponent[0] != minus_) {
return false;
}
} else {
// No scientific notation, check as before
if (count_char(str, minus_) > 1) {
return false;
}
if (count_char(str, minus_) == 1 && str[0] != minus_) {
return false;
}
}

// '-' can only be used at the beginning
if (count_char(str, minus_) == 1 and str[0] != minus_) {
// Check for valid scientific notation format
size_t e_count = count_char(str, 'e') + count_char(str, 'E');
if (e_count > 1) {
return false;
}
if (e_count == 1) {
size_t e_pos = str.find_first_of("eE");
// Must have digits before and after 'e'/'E'
if (e_pos == 0 || e_pos == str.length() - 1) {
return false;
}
// Must have at least one digit after 'e'/'E'
bool has_digit_after = false;
for (size_t i = e_pos + 1; i < str.length(); i++) {
if (std::isdigit(str[i])) {
has_digit_after = true;
break;
}
}
if (!has_digit_after) {
return false;
}
}

return true;
}

Expand All @@ -111,19 +155,37 @@ bool StringToDecimalConverter::is_str_correct_format(const std::string &str)
assert(is_str_valid_characters(str));
assert(is_str_NA(str) == false);

// if the number of commas and points both are more than 1, then this format
// does not belong to any known convention
// Handle scientific notation separately
size_t e_pos = str.find_first_of("eE");
if (e_pos != std::string::npos) {
// Check mantissa part
std::string mantissa = str.substr(0, e_pos);
if (has_multiple_commas_and_points(mantissa)) {
return false;
}
if (has_invalid_comma_point_sequence(mantissa)) {
return false;
}
if (has_separator_at_the_end(mantissa)) {
return false;
}
// Exponent part should only contain digits and optional minus sign
std::string exponent = str.substr(e_pos + 1);
for (char c : exponent) {
if (!std::isdigit(c) && c != minus_) {
return false;
}
}
return true;
}

// Original format validation for non-scientific notation
if (has_multiple_commas_and_points(str)) {
return false;
}

// Check for commas before and after a point, or points before and after a
// comma
if (has_invalid_comma_point_sequence(str)) {
return false;
}

// Check for separators at the end of the string
if (has_separator_at_the_end(str)) {
return false;
}
Expand Down
83 changes: 83 additions & 0 deletions tests/unit/test_string_to_decimal_converter.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -78,4 +78,87 @@ BOOST_AUTO_TEST_CASE(TestCorrectFormat_PointAtEnd)
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format("123456789."));
}

BOOST_AUTO_TEST_CASE(TestValidCharacters_ScientificNotation)
{
// Basic scientific notation
BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.23e4"));
BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.23E4"));

// Negative exponents
BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.23e-4"));
BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.23E-4"));

// With commas as thousand separators
BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1,234.56e4"));
BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("1.234,56E4"));

// Negative numbers
BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("-1.23e4"));
BOOST_CHECK(StringToDecimalConverter::is_str_valid_characters("-1.23E-4"));

// Invalid scientific notation
BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
"1.23e")); // Missing exponent
BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
"e4")); // Missing mantissa
BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
"1.23ee4")); // Multiple e's
BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
"1.23e4.5")); // Non-integer exponent
BOOST_CHECK(!StringToDecimalConverter::is_str_valid_characters(
"1.23e-")); // Incomplete negative exponent
}

BOOST_AUTO_TEST_CASE(TestCorrectFormat_ScientificNotation)
{
// Valid formats
BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("1.23e4"));
BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("1.23E4"));
BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("1,234.56e4"));
BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("1.234,56E4"));
BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("-1.23e4"));
BOOST_CHECK(StringToDecimalConverter::is_str_correct_format("-1.23E-4"));

// Invalid formats
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
"1.23e4.5")); // Non-integer exponent
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
"1.23e4,")); // Comma at end
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
"1.23e4.")); // Point at end
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
"1.23,456.789e4")); // Multiple separators
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
"1.23e4e5")); // Multiple e's
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
"1.23e")); // Missing exponent
BOOST_CHECK(!StringToDecimalConverter::is_str_correct_format(
"e4")); // Missing mantissa
}

BOOST_AUTO_TEST_CASE(TestParseStr_ScientificNotation)
{
// Test parsing with point as decimal separator
BOOST_CHECK_EQUAL(
StringToDecimalConverter::parse_str("1.23e4", true),
"1.23e4");
BOOST_CHECK_EQUAL(
StringToDecimalConverter::parse_str("1,234.56e4", true),
"1234.56e4");
BOOST_CHECK_EQUAL(
StringToDecimalConverter::parse_str("-1.23e-4", true),
"-1.23e-4");

// Test parsing with comma as decimal separator
BOOST_CHECK_EQUAL(
StringToDecimalConverter::parse_str("1,23e4", false),
"1.23e4");
BOOST_CHECK_EQUAL(
StringToDecimalConverter::parse_str("1.234,56E4", false),
"1234.56E4");
BOOST_CHECK_EQUAL(
StringToDecimalConverter::parse_str("-1,23e-4", false),
"-1.23e-4");
}

BOOST_AUTO_TEST_SUITE_END()
Loading