Skip to content

Commit 48e24af

Browse files
authored
Merge pull request #9 from kvedala/machine_learning/adaline
Machine learning/adaline
2 parents c9e4e8c + 1c05a58 commit 48e24af

File tree

4 files changed

+291
-0
lines changed

4 files changed

+291
-0
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,7 @@ add_subdirectory(search)
5959
add_subdirectory(strings)
6060
add_subdirectory(sorting)
6161
add_subdirectory(probability)
62+
add_subdirectory(machine_learning)
6263
add_subdirectory(computer_oriented_statistical_methods)
6364

6465
if(USE_OPENMP)

DIRECTORY.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -101,6 +101,9 @@
101101
* [Linear Probing Hash Table](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/hashing/linear_probing_hash_table.cpp)
102102
* [Quadratic Probing Hash Table](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/hashing/quadratic_probing_hash_table.cpp)
103103

104+
## Machine Learning
105+
* [Adaline Learning](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/machine_learning/adaline_learning.cpp)
106+
104107
## Math
105108
* [Binary Exponent](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/math/binary_exponent.cpp)
106109
* [Double Factorial](https://github.com/TheAlgorithms/C-Plus-Plus/blob/master/math/double_factorial.cpp)

machine_learning/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
# If necessary, use the RELATIVE flag, otherwise each source file may be listed
2+
# with full pathname. RELATIVE may makes it easier to extract an executable name
3+
# automatically.
4+
file( GLOB APP_SOURCES RELATIVE ${CMAKE_CURRENT_SOURCE_DIR} *.cpp )
5+
# file( GLOB APP_SOURCES ${CMAKE_SOURCE_DIR}/*.c )
6+
# AUX_SOURCE_DIRECTORY(${CMAKE_CURRENT_SOURCE_DIR} APP_SOURCES)
7+
foreach( testsourcefile ${APP_SOURCES} )
8+
# I used a simple string replace, to cut off .cpp.
9+
string( REPLACE ".cpp" "" testname ${testsourcefile} )
10+
add_executable( ${testname} ${testsourcefile} )
11+
12+
set_target_properties(${testname} PROPERTIES LINKER_LANGUAGE CXX)
13+
if(OpenMP_CXX_FOUND)
14+
target_link_libraries(${testname} OpenMP::OpenMP_CXX)
15+
endif()
16+
install(TARGETS ${testname} DESTINATION "bin/machine_learning")
17+
18+
endforeach( testsourcefile ${APP_SOURCES} )
Lines changed: 269 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,269 @@
1+
/**
2+
* \file
3+
* \brief [Adaptive Linear Neuron
4+
* (ADALINE)](https://en.wikipedia.org/wiki/ADALINE) implementation
5+
*
6+
* <img
7+
* src="https://upload.wikimedia.org/wikipedia/commons/b/be/Adaline_flow_chart.gif"
8+
* width="200px">
9+
* [source](https://commons.wikimedia.org/wiki/File:Adaline_flow_chart.gif)
10+
* ADALINE is one of the first and simplest single layer artificial neural
11+
* network. The algorithm essentially implements a linear function
12+
* \f[ f\left(x_0,x_1,x_2,\ldots\right) =
13+
* \sum_j x_jw_j+\theta
14+
* \f]
15+
* where \f$x_j\f$ are the input features of a sample, \f$w_j\f$ are the
16+
* coefficients of the linear function and \f$\theta\f$ is a constant. If we
17+
* know the \f$w_j\f$, then for any given set of features, \f$y\f$ can be
18+
* computed. Computing the \f$w_j\f$ is a supervised learning algorithm wherein
19+
* a set of features and their corresponding outputs are given and weights are
20+
* computed using stochastic gradient descent method.
21+
*/
22+
23+
#include <cassert>
24+
#include <climits>
25+
#include <cmath>
26+
#include <cstdlib>
27+
#include <ctime>
28+
#include <iostream>
29+
#include <vector>
30+
31+
#define MAX_ITER 500 // INT_MAX ///< Maximum number of iterations to learn
32+
33+
class adaline {
34+
public:
35+
/**
36+
* Default constructor
37+
* \param[in] num_features number of features present
38+
* \param[in] eta learning rate (optional, default=0.1)
39+
* \param[in] convergence accuracy (optional, default=\f$1\times10^{-5}\f$)
40+
*/
41+
adaline(int num_features, const double eta = 0.01f,
42+
const double accuracy = 1e-5)
43+
: eta(eta), accuracy(accuracy) {
44+
if (eta <= 0) {
45+
std::cerr << "learning rate should be positive and nonzero"
46+
<< std::endl;
47+
std::exit(EXIT_FAILURE);
48+
}
49+
50+
weights = std::vector<double>(
51+
num_features +
52+
1); // additional weight is for the constant bias term
53+
54+
// initialize with random weights in the range [-50, 49]
55+
for (int i = 0; i < weights.size(); i++)
56+
weights[i] = (static_cast<double>(std::rand() % 100) - 50);
57+
}
58+
59+
/**
60+
* Operator to print the weights of the model
61+
*/
62+
friend std::ostream &operator<<(std::ostream &out, const adaline &ada) {
63+
out << "<";
64+
for (int i = 0; i < ada.weights.size(); i++) {
65+
out << ada.weights[i];
66+
if (i < ada.weights.size() - 1)
67+
out << ", ";
68+
}
69+
out << ">";
70+
return out;
71+
}
72+
73+
/**
74+
* predict the output of the model for given set of features
75+
* \param[in] x input vector
76+
* \returns model prediction output
77+
*/
78+
int predict(const std::vector<double> &x) {
79+
if (!check_size_match(x))
80+
return 0;
81+
82+
double y = weights.back(); // assign bias value
83+
84+
for (int i = 0; i < x.size(); i++) y += x[i] * weights[i];
85+
86+
return y >= 0 ? 1 : -1; // quantizer: apply ADALINE threshold function
87+
}
88+
89+
/**
90+
* Update the weights of the model using supervised learning for one feature
91+
* vector
92+
* \param[in] x feature vector
93+
* \param[in] y known output value
94+
* \returns correction factor
95+
*/
96+
double fit(const std::vector<double> &x, const int &y) {
97+
if (!check_size_match(x))
98+
return 0;
99+
100+
/* output of the model with current weights */
101+
int p = predict(x);
102+
int prediction_error = y - p; // error in estimation
103+
double correction_factor = eta * prediction_error;
104+
105+
/* update each weight, the last weight is the bias term */
106+
for (int i = 0; i < x.size(); i++) {
107+
weights[i] += correction_factor * x[i];
108+
}
109+
weights[x.size()] += correction_factor; // update bias
110+
111+
return correction_factor;
112+
}
113+
114+
/**
115+
* Update the weights of the model using supervised learning for an array of
116+
* vectors.
117+
* \param[in] X array of feature vector
118+
* \param[in] y known output value for each feature vector
119+
*/
120+
template <int N>
121+
void fit(std::vector<double> const (&X)[N], const int *y) {
122+
double avg_pred_error = 1.f;
123+
124+
int iter;
125+
for (iter = 0; (iter < MAX_ITER) && (avg_pred_error > accuracy);
126+
iter++) {
127+
avg_pred_error = 0.f;
128+
129+
// perform fit for each sample
130+
for (int i = 0; i < N; i++) {
131+
double err = fit(X[i], y[i]);
132+
avg_pred_error += std::abs(err);
133+
}
134+
avg_pred_error /= N;
135+
136+
// Print updates every 200th iteration
137+
// if (iter % 100 == 0)
138+
std::cout << "\tIter " << iter << ": Training weights: " << *this
139+
<< "\tAvg error: " << avg_pred_error << std::endl;
140+
}
141+
142+
if (iter < MAX_ITER)
143+
144+
std::cout << "Converged after " << iter << " iterations."
145+
<< std::endl;
146+
else
147+
std::cout << "Did not converge after " << iter << " iterations."
148+
<< std::endl;
149+
}
150+
151+
private:
152+
/**
153+
* convenient function to check if input feature vector size matches the
154+
* model weights size
155+
* \param[in] x fecture vector to check
156+
* \returns `true` size matches
157+
* \returns `false` size does not match
158+
*/
159+
bool check_size_match(const std::vector<double> &x) {
160+
if (x.size() != (weights.size() - 1)) {
161+
std::cerr << __func__ << ": "
162+
<< "Number of features in x does not match the feature "
163+
"dimension in model!"
164+
<< std::endl;
165+
return false;
166+
}
167+
return true;
168+
}
169+
170+
const double eta; ///< learning rate of the algorithm
171+
const double accuracy; ///< model fit convergence accuracy
172+
std::vector<double> weights; ///< weights of the neural network
173+
};
174+
175+
/**
176+
* test function to predict points in a 2D coordinate system above the line
177+
* \f$x=y\f$ as +1 and others as -1.
178+
* Note that each point is defined by 2 values or 2 features.
179+
* \param[in] eta learning rate (optional, default=0.01)
180+
*/
181+
void test1(double eta = 0.01) {
182+
adaline ada(2, eta); // 2 features
183+
184+
const int N = 10; // number of sample points
185+
186+
std::vector<double> X[N] = {{0, 1}, {1, -2}, {2, 3}, {3, -1},
187+
{4, 1}, {6, -5}, {-7, -3}, {-8, 5},
188+
{-9, 2}, {-10, -15}};
189+
int y[] = {1, -1, 1, -1, -1, -1, 1, 1, 1, -1}; // corresponding y-values
190+
191+
std::cout << "------- Test 1 -------" << std::endl;
192+
std::cout << "Model before fit: " << ada << std::endl;
193+
194+
ada.fit(X, y);
195+
std::cout << "Model after fit: " << ada << std::endl;
196+
197+
int predict = ada.predict({5, -3});
198+
std::cout << "Predict for x=(5,-3): " << predict;
199+
assert(predict == -1);
200+
std::cout << " ...passed" << std::endl;
201+
202+
predict = ada.predict({5, 8});
203+
std::cout << "Predict for x=(5,8): " << predict;
204+
assert(predict == 1);
205+
std::cout << " ...passed" << std::endl;
206+
}
207+
208+
/**
209+
* test function to predict points in a 2D coordinate system above the line
210+
* \f$x+y=-1\f$ as +1 and others as -1.
211+
* Note that each point is defined by 2 values or 2 features.
212+
* The function will create random sample points for training and test purposes.
213+
* \param[in] eta learning rate (optional, default=0.01)
214+
*/
215+
void test2(double eta = 0.01) {
216+
adaline ada(2, eta); // 2 features
217+
218+
const int N = 50; // number of sample points
219+
220+
std::vector<double> X[N];
221+
int Y[N]; // corresponding y-values
222+
223+
int range = 500; // sample points range
224+
int range2 = range >> 1;
225+
for (int i = 0; i < N; i++) {
226+
double x0 = ((std::rand() % range) - range2) / 100.f;
227+
double x1 = ((std::rand() % range) - range2) / 100.f;
228+
X[i] = {x0, x1};
229+
Y[i] = (x0 + x1) > -1 ? 1 : -1;
230+
}
231+
232+
std::cout << "------- Test 1 -------" << std::endl;
233+
std::cout << "Model before fit: " << ada << std::endl;
234+
235+
ada.fit(X, Y);
236+
std::cout << "Model after fit: " << ada << std::endl;
237+
238+
int N_test_cases = 5;
239+
for (int i = 0; i < N_test_cases; i++) {
240+
double x0 = ((std::rand() % range) - range2) / 100.f;
241+
double x1 = ((std::rand() % range) - range2) / 100.f;
242+
243+
int predict = ada.predict({x0, x1});
244+
245+
std::cout << "Predict for x=(" << x0 << "," << x1 << "): " << predict;
246+
247+
int expected_val = (x0 + x1) > -1 ? 1 : -1;
248+
assert(predict == expected_val);
249+
std::cout << " ...passed" << std::endl;
250+
}
251+
}
252+
253+
/** Main function */
254+
int main(int argc, char **argv) {
255+
std::srand(std::time(nullptr)); // initialize random number generator
256+
257+
double eta = 0.2; // default value of eta
258+
if (argc == 2) // read eta value from commandline argument if present
259+
eta = strtof(argv[1], nullptr);
260+
261+
test1(eta);
262+
263+
std::cout << "Press ENTER to continue..." << std::endl;
264+
std::cin.get();
265+
266+
test2(eta);
267+
268+
return 0;
269+
}

0 commit comments

Comments
 (0)